Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1467)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 7119003: Create a browser feature extractor that runs after the renderer has (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Add missing delete Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6
7 #include <map>
8 #include <utility>
9
10 #include "base/stl_util-inl.h"
11 #include "base/task.h"
12 #include "base/time.h"
13 #include "chrome/common/safe_browsing/csd.pb.h"
14 #include "chrome/browser/history/history.h"
15 #include "chrome/browser/history/history_types.h"
16 #include "chrome/browser/profiles/profile.h"
17 #include "content/common/page_transition_types.h"
18 #include "content/browser/browser_thread.h"
19 #include "content/browser/cancelable_request.h"
20 #include "content/browser/tab_contents/navigation_controller.h"
mattm 2011/06/15 23:22:39 unused now?
noelutz 2011/06/16 17:01:39 Done.
21 #include "content/browser/tab_contents/tab_contents.h"
22 #include "googleurl/src/gurl.h"
23
24 namespace safe_browsing {
25 namespace features {
26 const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount";
27 const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount";
28 const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount";
29 const char kUrlHistoryVisitCountMoreThan24hAgo[] =
30 "UrlHistoryVisitCountMoreThan24hAgo";
31 const char kHttpHostVisitCount[] = "HttpHostVisitCount";
32 const char kHttpsHostVisitCount[] = "HttpsHostVisitCount";
33 const char kFirstHttpHostVisitMoreThan24hAgo[] =
34 "FirstHttpHostVisitMoreThan24hAgo";
35 const char kFirstHttpsHostVisitMoreThan24hAgo[] =
36 "FirstHttpsHostVisitMoreThan24hAgo";
37 } // namespace features
38
39 static void AddFeature(const std::string& feature_name,
40 double feature_value,
41 ClientPhishingRequest* request) {
42 DCHECK(request);
43 ClientPhishingRequest::Feature* feature =
44 request->add_non_model_feature_map();
45 feature->set_name(feature_name);
46 feature->set_value(feature_value);
47 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
48 }
49
50 BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab)
51 : tab_(tab),
52 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
53 DCHECK(tab);
54 }
55
56 BrowserFeatureExtractor::~BrowserFeatureExtractor() {
57 method_factory_.RevokeAll();
58 // Delete all the pending extractions (delete callback and request objects).
59 STLDeleteContainerPairPointers(pending_extractions_.begin(),
60 pending_extractions_.end());
61 // Also cancel all the pending history service queries.
62 HistoryService* history;
63 bool success = GetHistoryService(&history);
64 DCHECK(success || pending_queries_.size() == 0);
65 // Cancel all the pending history lookups and cleanup the memory.
66 for (PendingQueriesMap::iterator it = pending_queries_.begin();
67 it != pending_queries_.end(); ++it) {
68 if (history) {
69 history->CancelRequest(it->first);
70 }
71 ExtractionData& extraction = it->second;
72 delete extraction.first; // delete request
73 delete extraction.second; // delete callback
74 }
75 pending_queries_.clear();
76 }
77
78 void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request,
79 DoneCallback* callback) {
80 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
81 DCHECK(request);
82 DCHECK_EQ(0U, request->url().find("http:"));
83 DCHECK(callback);
84 if (!callback) {
85 DLOG(ERROR) << "ExtractFeatures called without a callback object";
86 return;
87 }
88 pending_extractions_.insert(std::make_pair(request, callback));
89 MessageLoop::current()->PostTask(
90 FROM_HERE,
91 method_factory_.NewRunnableMethod(
92 &BrowserFeatureExtractor::StartExtractFeatures,
93 request, callback));
94 }
95
96 void BrowserFeatureExtractor::StartExtractFeatures(
97 ClientPhishingRequest* request,
98 DoneCallback* callback) {
99 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
100 ExtractionData extraction = std::make_pair(request, callback);
101 size_t removed = pending_extractions_.erase(extraction);
102 DCHECK_EQ(1U, removed);
103 HistoryService* history;
104 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) {
105 callback->Run(false, request);
106 delete callback;
107 return;
108 }
109 CancelableRequestProvider::Handle handle = history->QueryURL(
110 GURL(request->url()),
111 true /* wants_visits */,
112 &request_consumer_,
113 NewCallback(this,
114 &BrowserFeatureExtractor::QueryUrlHistoryDone));
115
116 StorePendingQuery(handle, request, callback);
117 }
118
119 void BrowserFeatureExtractor::QueryUrlHistoryDone(
120 CancelableRequestProvider::Handle handle,
121 bool success,
122 const history::URLRow* row,
123 history::VisitVector* visits) {
124 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
125 ClientPhishingRequest* request;
126 DoneCallback* callback;
127 if (!GetPendingQuery(handle, &request, &callback)) {
128 DLOG(FATAL) << "No pending history query found";
129 return;
130 }
131 DCHECK(request);
132 DCHECK(callback);
133 if (!success) {
134 // URL is not found in the history. In practice this should not
135 // happen (unless there is a real error) because we just visited
136 // that URL.
137 callback->Run(false, request);
138 delete callback;
139 return;
140 }
141 AddFeature(features::kUrlHistoryVisitCount,
142 static_cast<double>(row->visit_count()),
143 request);
144
145 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1);
146 int num_visits_24h_ago = 0;
147 int num_visits_typed = 0;
148 int num_visits_link = 0;
149 for (history::VisitVector::const_iterator it = visits->begin();
150 it != visits->end(); ++it) {
151 if (!PageTransition::IsMainFrame(it->transition)) {
152 continue;
153 }
154 if (it->visit_time < threshold) {
155 ++num_visits_24h_ago;
156 }
157 PageTransition::Type transition = PageTransition::StripQualifier(
158 it->transition);
159 if (transition == PageTransition::TYPED) {
160 ++num_visits_typed;
161 } else if (transition == PageTransition::LINK) {
162 ++num_visits_link;
163 }
164 }
165 AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo,
166 static_cast<double>(num_visits_24h_ago),
167 request);
168 AddFeature(features::kUrlHistoryTypedCount,
169 static_cast<double>(num_visits_typed),
170 request);
171 AddFeature(features::kUrlHistoryLinkCount,
172 static_cast<double>(num_visits_link),
173 request);
174
175 // Issue next history lookup for host visits.
176 HistoryService* history;
177 if (!GetHistoryService(&history)) {
178 callback->Run(false, request);
179 delete callback;
180 return;
181 }
182 CancelableRequestProvider::Handle next_handle =
183 history->GetVisibleVisitCountToHost(
184 GURL(request->url()),
185 &request_consumer_,
186 NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone));
187 StorePendingQuery(next_handle, request, callback);
188 }
189
190 void BrowserFeatureExtractor::QueryHttpHostVisitsDone(
191 CancelableRequestProvider::Handle handle,
192 bool success,
193 int num_visits,
194 base::Time first_visit) {
195 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
196 ClientPhishingRequest* request;
197 DoneCallback* callback;
198 if (!GetPendingQuery(handle, &request, &callback)) {
199 DLOG(FATAL) << "No pending history query found";
200 return;
201 }
202 DCHECK(request);
203 DCHECK(callback);
204 if (!success) {
205 callback->Run(false, request);
206 delete callback;
207 return;
208 }
209 SetHostVisitsFeatures(num_visits, first_visit, true, request);
210
211 // Same lookup but for the HTTPS URL.
212 HistoryService* history;
213 if (!GetHistoryService(&history)) {
214 callback->Run(false, request);
215 delete callback;
216 return;
217 }
218 std::string https_url = request->url();
219 CancelableRequestProvider::Handle next_handle =
220 history->GetVisibleVisitCountToHost(
221 GURL(https_url.replace(0, 5, "https:")),
222 &request_consumer_,
223 NewCallback(this,
224 &BrowserFeatureExtractor::QueryHttpsHostVisitsDone));
225 StorePendingQuery(next_handle, request, callback);
226 }
227
228 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone(
229 CancelableRequestProvider::Handle handle,
230 bool success,
231 int num_visits,
232 base::Time first_visit) {
233 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
234 ClientPhishingRequest* request;
235 DoneCallback* callback;
236 if (!GetPendingQuery(handle, &request, &callback)) {
237 DLOG(FATAL) << "No pending history query found";
238 return;
239 }
240 DCHECK(request);
241 DCHECK(callback);
242 if (!success) {
243 callback->Run(false, request);
244 delete callback;
245 return;
246 }
247 SetHostVisitsFeatures(num_visits, first_visit, false, request);
248 callback->Run(true, request); // We're done with all the history lookups.
249 delete callback;
250 }
251
252 void BrowserFeatureExtractor::SetHostVisitsFeatures(
253 int num_visits,
254 base::Time first_visit,
255 bool is_http_query,
256 ClientPhishingRequest* request) {
257 DCHECK(request);
258 AddFeature(is_http_query ?
259 features::kHttpHostVisitCount : features::kHttpsHostVisitCount,
260 static_cast<double>(num_visits),
261 request);
262 AddFeature(
263 is_http_query ?
264 features::kFirstHttpHostVisitMoreThan24hAgo :
265 features::kFirstHttpsHostVisitMoreThan24hAgo,
266 (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ?
267 1.0 : 0.0,
268 request);
269 }
270
271 void BrowserFeatureExtractor::StorePendingQuery(
272 CancelableRequestProvider::Handle handle,
273 ClientPhishingRequest* request,
274 DoneCallback* callback) {
275 DCHECK_EQ(0U, pending_queries_.count(handle));
276 pending_queries_[handle] = std::make_pair(request, callback);
277 }
278
279 bool BrowserFeatureExtractor::GetPendingQuery(
280 CancelableRequestProvider::Handle handle,
281 ClientPhishingRequest** request,
282 DoneCallback** callback) {
283 PendingQueriesMap::iterator it = pending_queries_.find(handle);
284 DCHECK(it != pending_queries_.end());
285 if (it != pending_queries_.end()) {
286 *request = it->second.first;
287 *callback = it->second.second;
288 pending_queries_.erase(it);
289 return true;
290 }
291 return false;
292 }
293
294 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) {
295 *history = NULL;
296 if (tab_ && tab_->profile()) {
297 *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS);
298 if (*history) {
299 return true;
300 }
301 }
302 VLOG(2) << "Unable to query history. No history service available.";
303 return false;
304 }
305 }; // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698