Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(87)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 7172018: Revert 89178 - Create a browser feature extractor that runs after the renderer has (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6
7 #include <map>
8 #include <utility>
9
10 #include "base/stl_util-inl.h"
11 #include "base/task.h"
12 #include "base/time.h"
13 #include "chrome/common/safe_browsing/csd.pb.h"
14 #include "chrome/browser/history/history.h"
15 #include "chrome/browser/history/history_types.h"
16 #include "chrome/browser/profiles/profile.h"
17 #include "content/common/page_transition_types.h"
18 #include "content/browser/browser_thread.h"
19 #include "content/browser/cancelable_request.h"
20 #include "content/browser/tab_contents/tab_contents.h"
21 #include "googleurl/src/gurl.h"
22
23 namespace safe_browsing {
24 namespace features {
25 const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount";
26 const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount";
27 const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount";
28 const char kUrlHistoryVisitCountMoreThan24hAgo[] =
29 "UrlHistoryVisitCountMoreThan24hAgo";
30 const char kHttpHostVisitCount[] = "HttpHostVisitCount";
31 const char kHttpsHostVisitCount[] = "HttpsHostVisitCount";
32 const char kFirstHttpHostVisitMoreThan24hAgo[] =
33 "FirstHttpHostVisitMoreThan24hAgo";
34 const char kFirstHttpsHostVisitMoreThan24hAgo[] =
35 "FirstHttpsHostVisitMoreThan24hAgo";
36 } // namespace features
37
38 static void AddFeature(const std::string& feature_name,
39 double feature_value,
40 ClientPhishingRequest* request) {
41 DCHECK(request);
42 ClientPhishingRequest::Feature* feature =
43 request->add_non_model_feature_map();
44 feature->set_name(feature_name);
45 feature->set_value(feature_value);
46 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
47 }
48
49 BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab)
50 : tab_(tab),
51 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
52 DCHECK(tab);
53 }
54
55 BrowserFeatureExtractor::~BrowserFeatureExtractor() {
56 method_factory_.RevokeAll();
57 // Delete all the pending extractions (delete callback and request objects).
58 STLDeleteContainerPairPointers(pending_extractions_.begin(),
59 pending_extractions_.end());
60 // Also cancel all the pending history service queries.
61 HistoryService* history;
62 bool success = GetHistoryService(&history);
63 DCHECK(success || pending_queries_.size() == 0);
64 // Cancel all the pending history lookups and cleanup the memory.
65 for (PendingQueriesMap::iterator it = pending_queries_.begin();
66 it != pending_queries_.end(); ++it) {
67 if (history) {
68 history->CancelRequest(it->first);
69 }
70 ExtractionData& extraction = it->second;
71 delete extraction.first; // delete request
72 delete extraction.second; // delete callback
73 }
74 pending_queries_.clear();
75 }
76
77 void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request,
78 DoneCallback* callback) {
79 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
80 DCHECK(request);
81 DCHECK_EQ(0U, request->url().find("http:"));
82 DCHECK(callback);
83 if (!callback) {
84 DLOG(ERROR) << "ExtractFeatures called without a callback object";
85 return;
86 }
87 pending_extractions_.insert(std::make_pair(request, callback));
88 MessageLoop::current()->PostTask(
89 FROM_HERE,
90 method_factory_.NewRunnableMethod(
91 &BrowserFeatureExtractor::StartExtractFeatures,
92 request, callback));
93 }
94
95 void BrowserFeatureExtractor::StartExtractFeatures(
96 ClientPhishingRequest* request,
97 DoneCallback* callback) {
98 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
99 ExtractionData extraction = std::make_pair(request, callback);
100 size_t removed = pending_extractions_.erase(extraction);
101 DCHECK_EQ(1U, removed);
102 HistoryService* history;
103 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) {
104 callback->Run(false, request);
105 return;
106 }
107 CancelableRequestProvider::Handle handle = history->QueryURL(
108 GURL(request->url()),
109 true /* wants_visits */,
110 &request_consumer_,
111 NewCallback(this,
112 &BrowserFeatureExtractor::QueryUrlHistoryDone));
113
114 StorePendingQuery(handle, request, callback);
115 }
116
117 void BrowserFeatureExtractor::QueryUrlHistoryDone(
118 CancelableRequestProvider::Handle handle,
119 bool success,
120 const history::URLRow* row,
121 history::VisitVector* visits) {
122 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
123 ClientPhishingRequest* request;
124 DoneCallback* callback;
125 if (!GetPendingQuery(handle, &request, &callback)) {
126 DLOG(FATAL) << "No pending history query found";
127 return;
128 }
129 DCHECK(request);
130 DCHECK(callback);
131 if (!success) {
132 // URL is not found in the history. In practice this should not
133 // happen (unless there is a real error) because we just visited
134 // that URL.
135 callback->Run(false, request);
136 return;
137 }
138 AddFeature(features::kUrlHistoryVisitCount,
139 static_cast<double>(row->visit_count()),
140 request);
141
142 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1);
143 int num_visits_24h_ago = 0;
144 int num_visits_typed = 0;
145 int num_visits_link = 0;
146 for (history::VisitVector::const_iterator it = visits->begin();
147 it != visits->end(); ++it) {
148 if (!PageTransition::IsMainFrame(it->transition)) {
149 continue;
150 }
151 if (it->visit_time < threshold) {
152 ++num_visits_24h_ago;
153 }
154 PageTransition::Type transition = PageTransition::StripQualifier(
155 it->transition);
156 if (transition == PageTransition::TYPED) {
157 ++num_visits_typed;
158 } else if (transition == PageTransition::LINK) {
159 ++num_visits_link;
160 }
161 }
162 AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo,
163 static_cast<double>(num_visits_24h_ago),
164 request);
165 AddFeature(features::kUrlHistoryTypedCount,
166 static_cast<double>(num_visits_typed),
167 request);
168 AddFeature(features::kUrlHistoryLinkCount,
169 static_cast<double>(num_visits_link),
170 request);
171
172 // Issue next history lookup for host visits.
173 HistoryService* history;
174 if (!GetHistoryService(&history)) {
175 callback->Run(false, request);
176 return;
177 }
178 CancelableRequestProvider::Handle next_handle =
179 history->GetVisibleVisitCountToHost(
180 GURL(request->url()),
181 &request_consumer_,
182 NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone));
183 StorePendingQuery(next_handle, request, callback);
184 }
185
186 void BrowserFeatureExtractor::QueryHttpHostVisitsDone(
187 CancelableRequestProvider::Handle handle,
188 bool success,
189 int num_visits,
190 base::Time first_visit) {
191 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
192 ClientPhishingRequest* request;
193 DoneCallback* callback;
194 if (!GetPendingQuery(handle, &request, &callback)) {
195 DLOG(FATAL) << "No pending history query found";
196 return;
197 }
198 DCHECK(request);
199 DCHECK(callback);
200 if (!success) {
201 callback->Run(false, request);
202 return;
203 }
204 SetHostVisitsFeatures(num_visits, first_visit, true, request);
205
206 // Same lookup but for the HTTPS URL.
207 HistoryService* history;
208 if (!GetHistoryService(&history)) {
209 callback->Run(false, request);
210 return;
211 }
212 std::string https_url = request->url();
213 CancelableRequestProvider::Handle next_handle =
214 history->GetVisibleVisitCountToHost(
215 GURL(https_url.replace(0, 5, "https:")),
216 &request_consumer_,
217 NewCallback(this,
218 &BrowserFeatureExtractor::QueryHttpsHostVisitsDone));
219 StorePendingQuery(next_handle, request, callback);
220 }
221
222 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone(
223 CancelableRequestProvider::Handle handle,
224 bool success,
225 int num_visits,
226 base::Time first_visit) {
227 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
228 ClientPhishingRequest* request;
229 DoneCallback* callback;
230 if (!GetPendingQuery(handle, &request, &callback)) {
231 DLOG(FATAL) << "No pending history query found";
232 return;
233 }
234 DCHECK(request);
235 DCHECK(callback);
236 if (!success) {
237 callback->Run(false, request);
238 return;
239 }
240 SetHostVisitsFeatures(num_visits, first_visit, false, request);
241 callback->Run(true, request); // We're done with all the history lookups.
242 }
243
244 void BrowserFeatureExtractor::SetHostVisitsFeatures(
245 int num_visits,
246 base::Time first_visit,
247 bool is_http_query,
248 ClientPhishingRequest* request) {
249 DCHECK(request);
250 AddFeature(is_http_query ?
251 features::kHttpHostVisitCount : features::kHttpsHostVisitCount,
252 static_cast<double>(num_visits),
253 request);
254 AddFeature(
255 is_http_query ?
256 features::kFirstHttpHostVisitMoreThan24hAgo :
257 features::kFirstHttpsHostVisitMoreThan24hAgo,
258 (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ?
259 1.0 : 0.0,
260 request);
261 }
262
263 void BrowserFeatureExtractor::StorePendingQuery(
264 CancelableRequestProvider::Handle handle,
265 ClientPhishingRequest* request,
266 DoneCallback* callback) {
267 DCHECK_EQ(0U, pending_queries_.count(handle));
268 pending_queries_[handle] = std::make_pair(request, callback);
269 }
270
271 bool BrowserFeatureExtractor::GetPendingQuery(
272 CancelableRequestProvider::Handle handle,
273 ClientPhishingRequest** request,
274 DoneCallback** callback) {
275 PendingQueriesMap::iterator it = pending_queries_.find(handle);
276 DCHECK(it != pending_queries_.end());
277 if (it != pending_queries_.end()) {
278 *request = it->second.first;
279 *callback = it->second.second;
280 pending_queries_.erase(it);
281 return true;
282 }
283 return false;
284 }
285 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) {
286 *history = NULL;
287 if (tab_ && tab_->profile()) {
288 *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS);
289 if (*history) {
290 return true;
291 }
292 }
293 VLOG(2) << "Unable to query history. No history service available.";
294 return false;
295 }
296 }; // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698