OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" | |
6 | |
7 #include <map> | |
8 #include <utility> | |
9 | |
10 #include "base/stl_util-inl.h" | |
11 #include "base/task.h" | |
12 #include "base/time.h" | |
13 #include "chrome/common/safe_browsing/csd.pb.h" | |
14 #include "chrome/browser/history/history.h" | |
15 #include "chrome/browser/history/history_types.h" | |
16 #include "chrome/browser/profiles/profile.h" | |
17 #include "content/common/page_transition_types.h" | |
18 #include "content/browser/browser_thread.h" | |
19 #include "content/browser/cancelable_request.h" | |
20 #include "content/browser/tab_contents/tab_contents.h" | |
21 #include "googleurl/src/gurl.h" | |
22 | |
23 namespace safe_browsing { | |
24 namespace features { | |
25 const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount"; | |
26 const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount"; | |
27 const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount"; | |
28 const char kUrlHistoryVisitCountMoreThan24hAgo[] = | |
29 "UrlHistoryVisitCountMoreThan24hAgo"; | |
30 const char kHttpHostVisitCount[] = "HttpHostVisitCount"; | |
31 const char kHttpsHostVisitCount[] = "HttpsHostVisitCount"; | |
32 const char kFirstHttpHostVisitMoreThan24hAgo[] = | |
33 "FirstHttpHostVisitMoreThan24hAgo"; | |
34 const char kFirstHttpsHostVisitMoreThan24hAgo[] = | |
35 "FirstHttpsHostVisitMoreThan24hAgo"; | |
36 } // namespace features | |
37 | |
38 static void AddFeature(const string& feature_name, | |
39 double feature_value, | |
40 ClientPhishingRequest* request) { | |
41 DCHECK(request); | |
42 ClientPhishingRequest::Feature* feature = | |
43 request->add_non_model_feature_map(); | |
44 feature->set_name(feature_name); | |
45 feature->set_value(feature_value); | |
46 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); | |
47 } | |
48 | |
49 BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab) | |
50 : tab_(tab), | |
51 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { | |
52 DCHECK(tab); | |
53 } | |
54 | |
55 BrowserFeatureExtractor::~BrowserFeatureExtractor() { | |
56 method_factory_.RevokeAll(); | |
57 // Delete all the pending extractions (delete callback and request objects). | |
58 STLDeleteContainerPairPointers(pending_extractions_.begin(), | |
59 pending_extractions_.end()); | |
60 // Also cancel all the pending history service queries. | |
61 HistoryService* history; | |
62 DCHECK(GetHistoryService(&history) || pending_queries_.size() == 0); | |
Brian Ryner
2011/06/10 04:45:10
This won't work in release builds because GetHisto
noelutz
2011/06/14 01:10:10
Good catch. Done.
| |
63 // Cancel all the pending history lookups and cleanup the memory. | |
64 for (PendingQueriesMap::iterator it = pending_queries_.begin(); | |
65 it != pending_queries_.end(); ++it) { | |
66 if (history) { | |
67 history->CancelRequest(it->first); | |
68 } | |
69 ExtractionData& extraction = it->second; | |
70 delete extraction.first; // delete request | |
71 delete extraction.second; // delete callback | |
72 } | |
73 pending_queries_.clear(); | |
74 } | |
75 | |
76 void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request, | |
77 DoneCallback* callback) { | |
78 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
79 DCHECK(request); | |
80 DCHECK(request->url().find("http:") == 0); | |
Brian Ryner
2011/06/10 04:45:10
DCHECK_EQ
noelutz
2011/06/14 01:10:10
Done.
| |
81 DCHECK(callback); | |
82 if (!callback) { | |
83 DLOG(ERROR) << "ExtractFeatures called without a callback object"; | |
84 return; | |
85 } | |
86 pending_extractions_.insert(std::make_pair(request, callback)); | |
87 MessageLoop::current()->PostTask( | |
88 FROM_HERE, | |
89 method_factory_.NewRunnableMethod( | |
90 &BrowserFeatureExtractor::StartExtractFeatures, | |
91 request, callback)); | |
92 } | |
93 | |
94 void BrowserFeatureExtractor::StartExtractFeatures( | |
95 ClientPhishingRequest* request, | |
96 DoneCallback* callback) { | |
97 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
98 ExtractionData extraction = std::make_pair(request, callback); | |
99 DCHECK_EQ(1U, pending_extractions_.erase(extraction)); | |
Brian Ryner
2011/06/10 04:45:10
Ditto here, the erase will not happen in release b
noelutz
2011/06/14 01:10:10
Done.
| |
100 HistoryService* history; | |
101 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) { | |
102 callback->Run(false, request); | |
103 return; | |
104 } | |
105 CancelableRequestProvider::Handle handle = history->QueryURL( | |
106 GURL(request->url()), | |
107 true /* wants_visits */, | |
108 &request_consumer_, | |
109 NewCallback(this, | |
110 &BrowserFeatureExtractor::QueryUrlHistoryDone)); | |
111 | |
112 StorePendingQuery(handle, request, callback); | |
113 } | |
114 | |
115 void BrowserFeatureExtractor::QueryUrlHistoryDone( | |
116 CancelableRequestProvider::Handle handle, | |
117 bool success, | |
118 const history::URLRow* row, | |
119 history::VisitVector* visits) { | |
120 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
121 ClientPhishingRequest* request; | |
122 DoneCallback* callback; | |
123 if (!GetPendingQuery(handle, &request, &callback)) { | |
124 DLOG(FATAL) << "No pending history query found"; | |
125 return; | |
126 } | |
127 DCHECK(request); | |
128 DCHECK(callback); | |
129 if (!success) { | |
130 // URL is not found in the history. In practice this should not | |
131 // happen (unless there is a real error) because we just visited | |
132 // that URL. | |
133 callback->Run(false, request); | |
134 return; | |
135 } | |
136 AddFeature(features::kUrlHistoryVisitCount, | |
137 static_cast<double>(row->visit_count()), | |
138 request); | |
139 | |
140 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1); | |
141 int num_visits_24h_ago = 0; | |
142 int num_visits_typed = 0; | |
143 int num_visits_link = 0; | |
144 for (history::VisitVector::const_iterator it = visits->begin(); | |
145 it != visits->end(); ++it) { | |
146 if (!PageTransition::IsMainFrame(it->transition)) { | |
147 continue; | |
148 } | |
149 if (it->visit_time < threshold) { | |
150 ++num_visits_24h_ago; | |
151 } | |
152 PageTransition::Type transition = PageTransition::StripQualifier( | |
153 it->transition); | |
154 if (transition == PageTransition::TYPED) { | |
155 ++num_visits_typed; | |
156 } else if (transition == PageTransition::LINK) { | |
157 ++num_visits_link; | |
158 } | |
159 } | |
160 AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo, | |
161 static_cast<double>(num_visits_24h_ago), | |
162 request); | |
163 AddFeature(features::kUrlHistoryTypedCount, | |
164 static_cast<double>(num_visits_typed), | |
165 request); | |
166 AddFeature(features::kUrlHistoryLinkCount, | |
167 static_cast<double>(num_visits_link), | |
168 request); | |
169 | |
170 // Issue next history lookup for host visits. | |
171 HistoryService* history; | |
172 if (!GetHistoryService(&history)) { | |
173 callback->Run(false, request); | |
174 return; | |
175 } | |
176 CancelableRequestProvider::Handle next_handle = | |
177 history->GetVisibleVisitCountToHost( | |
178 GURL(request->url()), | |
179 &request_consumer_, | |
180 NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone)); | |
181 StorePendingQuery(next_handle, request, callback); | |
182 } | |
183 | |
184 void BrowserFeatureExtractor::QueryHttpHostVisitsDone( | |
185 CancelableRequestProvider::Handle handle, | |
186 bool success, | |
187 int num_visits, | |
188 base::Time first_visit) { | |
189 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
190 ClientPhishingRequest* request; | |
191 DoneCallback* callback; | |
192 if (!GetPendingQuery(handle, &request, &callback)) { | |
193 DLOG(FATAL) << "No pending history query found"; | |
194 return; | |
195 } | |
196 DCHECK(request); | |
197 DCHECK(callback); | |
198 if (!success) { | |
199 callback->Run(false, request); | |
200 return; | |
201 } | |
202 SetHostVisitsFeatures(num_visits, first_visit, true, request); | |
203 | |
204 // Same lookup but for the HTTPS URL. | |
205 HistoryService* history; | |
206 if (!GetHistoryService(&history)) { | |
207 callback->Run(false, request); | |
208 return; | |
209 } | |
210 std::string https_url = request->url(); | |
211 CancelableRequestProvider::Handle next_handle = | |
212 history->GetVisibleVisitCountToHost( | |
213 GURL(https_url.replace(0, 5, "https:")), | |
214 &request_consumer_, | |
215 NewCallback(this, | |
216 &BrowserFeatureExtractor::QueryHttpsHostVisitsDone)); | |
217 StorePendingQuery(next_handle, request, callback); | |
218 } | |
219 | |
220 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone( | |
221 CancelableRequestProvider::Handle handle, | |
222 bool success, | |
223 int num_visits, | |
224 base::Time first_visit) { | |
225 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
226 ClientPhishingRequest* request; | |
227 DoneCallback* callback; | |
228 if (!GetPendingQuery(handle, &request, &callback)) { | |
229 DLOG(FATAL) << "No pending history query found"; | |
230 return; | |
231 } | |
232 DCHECK(request); | |
233 DCHECK(callback); | |
234 if (!success) { | |
235 callback->Run(false, request); | |
236 return; | |
237 } | |
238 SetHostVisitsFeatures(num_visits, first_visit, false, request); | |
239 callback->Run(true, request); // We're done with all the history lookups. | |
240 } | |
241 | |
242 void BrowserFeatureExtractor::SetHostVisitsFeatures( | |
243 int num_visits, | |
244 base::Time first_visit, | |
245 bool is_http_query, | |
246 ClientPhishingRequest* request) { | |
247 DCHECK(request); | |
248 AddFeature(is_http_query ? | |
249 features::kHttpHostVisitCount : features::kHttpsHostVisitCount, | |
250 static_cast<double>(num_visits), | |
251 request); | |
252 AddFeature( | |
253 is_http_query ? | |
254 features::kFirstHttpHostVisitMoreThan24hAgo : | |
255 features::kFirstHttpsHostVisitMoreThan24hAgo, | |
256 (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ? | |
257 1.0 : 0.0, | |
258 request); | |
259 } | |
260 | |
261 void BrowserFeatureExtractor::StorePendingQuery( | |
262 CancelableRequestProvider::Handle handle, | |
263 ClientPhishingRequest* request, | |
264 DoneCallback* callback) { | |
265 DCHECK(0 == pending_queries_.count(handle)); | |
Brian Ryner
2011/06/10 04:45:10
DCHECK_EQ
noelutz
2011/06/14 01:10:10
Done.
| |
266 pending_queries_[handle] = std::make_pair(request, callback); | |
267 } | |
268 | |
269 bool BrowserFeatureExtractor::GetPendingQuery( | |
270 CancelableRequestProvider::Handle handle, | |
271 ClientPhishingRequest** request, | |
272 DoneCallback** callback) { | |
273 PendingQueriesMap::iterator it = pending_queries_.find(handle); | |
274 DCHECK(it != pending_queries_.end()); | |
275 if (it != pending_queries_.end()) { | |
276 *request = it->second.first; | |
277 *callback = it->second.second; | |
278 pending_queries_.erase(it); | |
279 return true; | |
280 } | |
281 return false; | |
282 } | |
283 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) { | |
284 *history = NULL; | |
285 if (tab_ && tab_->profile()) { | |
286 *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS); | |
Brian Ryner
2011/06/10 04:45:10
I'm not totally clear on whether EXPLICIT or IMPLI
noelutz
2011/06/14 01:10:10
Only EXPLICIT_ACCESS does something. See the prof
Brian Ryner
2011/06/14 01:25:15
In incognito mode (OffTheRecordProfileImpl), that'
| |
287 if (*history) { | |
288 return true; | |
289 } | |
290 } | |
291 VLOG(2) << "Unable to query history. No history service available."; | |
292 return false; | |
293 } | |
294 }; // namespace safe_browsing | |
OLD | NEW |