Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(112)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 7119003: Create a browser feature extractor that runs after the renderer has (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Address Garrett's comments. Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6
7 #include <map>
8 #include <utility>
9
10 #include "base/stl_util-inl.h"
11 #include "base/task.h"
12 #include "base/time.h"
13 #include "chrome/common/safe_browsing/csd.pb.h"
14 #include "chrome/browser/history/history.h"
15 #include "chrome/browser/history/history_types.h"
16 #include "chrome/browser/profiles/profile.h"
17 #include "content/common/page_transition_types.h"
18 #include "content/browser/browser_thread.h"
19 #include "content/browser/cancelable_request.h"
20 #include "content/browser/tab_contents/tab_contents.h"
21 #include "googleurl/src/gurl.h"
22
23 namespace safe_browsing {
24 namespace features {
25 const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount";
26 const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount";
27 const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount";
28 const char kUrlHistoryVisitCountMoreThan24hAgo[] =
29 "UrlHistoryVisitCountMoreThan24hAgo";
30 const char kHttpHostVisitCount[] = "HttpHostVisitCount";
31 const char kHttpsHostVisitCount[] = "HttpsHostVisitCount";
32 const char kFirstHttpHostVisitMoreThan24hAgo[] =
33 "FirstHttpHostVisitMoreThan24hAgo";
34 const char kFirstHttpsHostVisitMoreThan24hAgo[] =
35 "FirstHttpsHostVisitMoreThan24hAgo";
36 } // namespace features
37
38 static void AddFeature(const string& feature_name,
39 double feature_value,
40 ClientPhishingRequest* request) {
41 DCHECK(request);
42 ClientPhishingRequest::Feature* feature =
43 request->add_non_model_feature_map();
44 feature->set_name(feature_name);
45 feature->set_value(feature_value);
46 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
47 }
48
49 BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab)
50 : tab_(tab),
51 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
52 DCHECK(tab);
53 }
54
55 BrowserFeatureExtractor::~BrowserFeatureExtractor() {
56 method_factory_.RevokeAll();
57 // Delete all the pending extractions (delete callback and request objects).
58 STLDeleteContainerPairPointers(pending_extractions_.begin(),
59 pending_extractions_.end());
60 // Also cancel all the pending history service queries.
61 HistoryService* history;
62 DCHECK(GetHistoryService(&history) || pending_queries_.size() == 0);
Brian Ryner 2011/06/10 04:45:10 This won't work in release builds because GetHisto
noelutz 2011/06/14 01:10:10 Good catch. Done.
63 // Cancel all the pending history lookups and cleanup the memory.
64 for (PendingQueriesMap::iterator it = pending_queries_.begin();
65 it != pending_queries_.end(); ++it) {
66 if (history) {
67 history->CancelRequest(it->first);
68 }
69 ExtractionData& extraction = it->second;
70 delete extraction.first; // delete request
71 delete extraction.second; // delete callback
72 }
73 pending_queries_.clear();
74 }
75
76 void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request,
77 DoneCallback* callback) {
78 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
79 DCHECK(request);
80 DCHECK(request->url().find("http:") == 0);
Brian Ryner 2011/06/10 04:45:10 DCHECK_EQ
noelutz 2011/06/14 01:10:10 Done.
81 DCHECK(callback);
82 if (!callback) {
83 DLOG(ERROR) << "ExtractFeatures called without a callback object";
84 return;
85 }
86 pending_extractions_.insert(std::make_pair(request, callback));
87 MessageLoop::current()->PostTask(
88 FROM_HERE,
89 method_factory_.NewRunnableMethod(
90 &BrowserFeatureExtractor::StartExtractFeatures,
91 request, callback));
92 }
93
94 void BrowserFeatureExtractor::StartExtractFeatures(
95 ClientPhishingRequest* request,
96 DoneCallback* callback) {
97 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
98 ExtractionData extraction = std::make_pair(request, callback);
99 DCHECK_EQ(1U, pending_extractions_.erase(extraction));
Brian Ryner 2011/06/10 04:45:10 Ditto here, the erase will not happen in release b
noelutz 2011/06/14 01:10:10 Done.
100 HistoryService* history;
101 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) {
102 callback->Run(false, request);
103 return;
104 }
105 CancelableRequestProvider::Handle handle = history->QueryURL(
106 GURL(request->url()),
107 true /* wants_visits */,
108 &request_consumer_,
109 NewCallback(this,
110 &BrowserFeatureExtractor::QueryUrlHistoryDone));
111
112 StorePendingQuery(handle, request, callback);
113 }
114
115 void BrowserFeatureExtractor::QueryUrlHistoryDone(
116 CancelableRequestProvider::Handle handle,
117 bool success,
118 const history::URLRow* row,
119 history::VisitVector* visits) {
120 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
121 ClientPhishingRequest* request;
122 DoneCallback* callback;
123 if (!GetPendingQuery(handle, &request, &callback)) {
124 DLOG(FATAL) << "No pending history query found";
125 return;
126 }
127 DCHECK(request);
128 DCHECK(callback);
129 if (!success) {
130 // URL is not found in the history. In practice this should not
131 // happen (unless there is a real error) because we just visited
132 // that URL.
133 callback->Run(false, request);
134 return;
135 }
136 AddFeature(features::kUrlHistoryVisitCount,
137 static_cast<double>(row->visit_count()),
138 request);
139
140 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1);
141 int num_visits_24h_ago = 0;
142 int num_visits_typed = 0;
143 int num_visits_link = 0;
144 for (history::VisitVector::const_iterator it = visits->begin();
145 it != visits->end(); ++it) {
146 if (!PageTransition::IsMainFrame(it->transition)) {
147 continue;
148 }
149 if (it->visit_time < threshold) {
150 ++num_visits_24h_ago;
151 }
152 PageTransition::Type transition = PageTransition::StripQualifier(
153 it->transition);
154 if (transition == PageTransition::TYPED) {
155 ++num_visits_typed;
156 } else if (transition == PageTransition::LINK) {
157 ++num_visits_link;
158 }
159 }
160 AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo,
161 static_cast<double>(num_visits_24h_ago),
162 request);
163 AddFeature(features::kUrlHistoryTypedCount,
164 static_cast<double>(num_visits_typed),
165 request);
166 AddFeature(features::kUrlHistoryLinkCount,
167 static_cast<double>(num_visits_link),
168 request);
169
170 // Issue next history lookup for host visits.
171 HistoryService* history;
172 if (!GetHistoryService(&history)) {
173 callback->Run(false, request);
174 return;
175 }
176 CancelableRequestProvider::Handle next_handle =
177 history->GetVisibleVisitCountToHost(
178 GURL(request->url()),
179 &request_consumer_,
180 NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone));
181 StorePendingQuery(next_handle, request, callback);
182 }
183
184 void BrowserFeatureExtractor::QueryHttpHostVisitsDone(
185 CancelableRequestProvider::Handle handle,
186 bool success,
187 int num_visits,
188 base::Time first_visit) {
189 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
190 ClientPhishingRequest* request;
191 DoneCallback* callback;
192 if (!GetPendingQuery(handle, &request, &callback)) {
193 DLOG(FATAL) << "No pending history query found";
194 return;
195 }
196 DCHECK(request);
197 DCHECK(callback);
198 if (!success) {
199 callback->Run(false, request);
200 return;
201 }
202 SetHostVisitsFeatures(num_visits, first_visit, true, request);
203
204 // Same lookup but for the HTTPS URL.
205 HistoryService* history;
206 if (!GetHistoryService(&history)) {
207 callback->Run(false, request);
208 return;
209 }
210 std::string https_url = request->url();
211 CancelableRequestProvider::Handle next_handle =
212 history->GetVisibleVisitCountToHost(
213 GURL(https_url.replace(0, 5, "https:")),
214 &request_consumer_,
215 NewCallback(this,
216 &BrowserFeatureExtractor::QueryHttpsHostVisitsDone));
217 StorePendingQuery(next_handle, request, callback);
218 }
219
220 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone(
221 CancelableRequestProvider::Handle handle,
222 bool success,
223 int num_visits,
224 base::Time first_visit) {
225 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
226 ClientPhishingRequest* request;
227 DoneCallback* callback;
228 if (!GetPendingQuery(handle, &request, &callback)) {
229 DLOG(FATAL) << "No pending history query found";
230 return;
231 }
232 DCHECK(request);
233 DCHECK(callback);
234 if (!success) {
235 callback->Run(false, request);
236 return;
237 }
238 SetHostVisitsFeatures(num_visits, first_visit, false, request);
239 callback->Run(true, request); // We're done with all the history lookups.
240 }
241
242 void BrowserFeatureExtractor::SetHostVisitsFeatures(
243 int num_visits,
244 base::Time first_visit,
245 bool is_http_query,
246 ClientPhishingRequest* request) {
247 DCHECK(request);
248 AddFeature(is_http_query ?
249 features::kHttpHostVisitCount : features::kHttpsHostVisitCount,
250 static_cast<double>(num_visits),
251 request);
252 AddFeature(
253 is_http_query ?
254 features::kFirstHttpHostVisitMoreThan24hAgo :
255 features::kFirstHttpsHostVisitMoreThan24hAgo,
256 (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ?
257 1.0 : 0.0,
258 request);
259 }
260
261 void BrowserFeatureExtractor::StorePendingQuery(
262 CancelableRequestProvider::Handle handle,
263 ClientPhishingRequest* request,
264 DoneCallback* callback) {
265 DCHECK(0 == pending_queries_.count(handle));
Brian Ryner 2011/06/10 04:45:10 DCHECK_EQ
noelutz 2011/06/14 01:10:10 Done.
266 pending_queries_[handle] = std::make_pair(request, callback);
267 }
268
269 bool BrowserFeatureExtractor::GetPendingQuery(
270 CancelableRequestProvider::Handle handle,
271 ClientPhishingRequest** request,
272 DoneCallback** callback) {
273 PendingQueriesMap::iterator it = pending_queries_.find(handle);
274 DCHECK(it != pending_queries_.end());
275 if (it != pending_queries_.end()) {
276 *request = it->second.first;
277 *callback = it->second.second;
278 pending_queries_.erase(it);
279 return true;
280 }
281 return false;
282 }
283 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) {
284 *history = NULL;
285 if (tab_ && tab_->profile()) {
286 *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS);
Brian Ryner 2011/06/10 04:45:10 I'm not totally clear on whether EXPLICIT or IMPLI
noelutz 2011/06/14 01:10:10 Only EXPLICIT_ACCESS does something. See the prof
Brian Ryner 2011/06/14 01:25:15 In incognito mode (OffTheRecordProfileImpl), that'
287 if (*history) {
288 return true;
289 }
290 }
291 VLOG(2) << "Unable to query history. No history service available.";
292 return false;
293 }
294 }; // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698