OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" | |
6 | |
7 #include <map> | |
8 #include <utility> | |
9 | |
10 #include "base/stl_util-inl.h" | |
11 #include "base/task.h" | |
12 #include "base/time.h" | |
13 #include "chrome/common/safe_browsing/csd.pb.h" | |
14 #include "chrome/browser/history/history.h" | |
15 #include "chrome/browser/history/history_types.h" | |
16 #include "chrome/browser/profiles/profile.h" | |
17 #include "content/common/page_transition_types.h" | |
18 #include "content/browser/browser_thread.h" | |
19 #include "content/browser/cancelable_request.h" | |
20 #include "content/browser/tab_contents/tab_contents.h" | |
21 #include "googleurl/src/gurl.h" | |
22 | |
23 namespace safe_browsing { | |
24 namespace features { | |
25 const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount="; | |
26 const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount="; | |
27 const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount="; | |
28 const char kUrlHistoryVisitCount24hAgo[] = "UrlHistoryVisitCount24hAgo="; | |
29 const char kHttpHostVisitCount[] = "HttpHostVisitCount="; | |
30 const char kHttpsHostVisitCount[] = "HttpsHostVisitCount="; | |
mattm
2011/06/08 23:18:18
Why do these ones have trailing '=' but the next t
noelutz
2011/06/09 19:21:39
Oups. I just realized I misunderstood this notati
| |
31 const char kFirstHttpHostVisitMoreThan24hAgo[] = | |
32 "FirstHttpHostVisitMoreThan24hAgo"; | |
33 const char kFirstHttpsHostVisitMoreThan24hAgo[] = | |
34 "FirstHttpsHostVisitMoreThan24hAgo"; | |
35 } // namespace features | |
36 | |
37 BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab) | |
38 : tab_(tab), | |
39 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { | |
40 DCHECK(tab); | |
41 } | |
42 | |
43 BrowserFeatureExtractor::~BrowserFeatureExtractor() { | |
44 method_factory_.RevokeAll(); | |
45 // Delete all the pending extractions (delete callback and request objects). | |
46 STLDeleteContainerPairPointers(pending_extractions_.begin(), | |
47 pending_extractions_.end()); | |
48 // Also cancel all the pending history service queries. | |
49 HistoryService* history; | |
50 DCHECK(GetHistoryService(&history) || pending_queries_.size() == 0); | |
51 if (history) { | |
52 // Cancel all the pending history lookups and cleanup the memory. | |
53 for (PendingQueriesMap::iterator it = pending_queries_.begin(); | |
54 it != pending_queries_.end(); ++it) { | |
55 history->CancelRequest(it->first); | |
56 } | |
57 } | |
58 // Once we cancelled all the pending queries to the history service we also | |
59 // need to cleanup the request and callback objects. | |
60 for (PendingQueriesMap::iterator it = pending_queries_.begin(); | |
61 it != pending_queries_.end(); ++it) { | |
62 ExtractionData& extraction = it->second; | |
63 delete extraction.first; // delete request | |
64 delete extraction.second; // delete callback | |
65 } | |
66 } | |
67 | |
68 void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request, | |
69 DoneCallback* callback) { | |
70 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
71 DCHECK(request); | |
72 DCHECK(request->url().find("http:") == 0); | |
73 DCHECK(callback); | |
74 if (!callback) { | |
75 DLOG(ERROR) << "ExtractFeatures called without a callback object"; | |
76 return; | |
77 } | |
78 pending_extractions_.insert(std::make_pair(request, callback)); | |
79 MessageLoop::current()->PostTask( | |
80 FROM_HERE, | |
81 method_factory_.NewRunnableMethod( | |
82 &BrowserFeatureExtractor::StartExtractFeatures, | |
83 request, callback)); | |
84 } | |
85 | |
86 void BrowserFeatureExtractor::StartExtractFeatures( | |
87 ClientPhishingRequest* request, | |
88 DoneCallback* callback) { | |
89 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
90 ExtractionData extraction = std::make_pair(request, callback); | |
91 DCHECK(1 == pending_extractions_.erase(extraction)); | |
mattm
2011/06/08 23:18:18
DCHECK_EQ
noelutz
2011/06/09 19:21:39
Done.
| |
92 HistoryService* history; | |
93 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) { | |
94 callback->Run(false, request); | |
95 return; | |
96 } | |
97 CancelableRequestProvider::Handle handle = history->QueryURL( | |
98 GURL(request->url()), | |
99 true /* wants_visits */, | |
100 &request_consumer_, | |
101 NewCallback(this, | |
102 &BrowserFeatureExtractor::QueryUrlHistoryDone)); | |
103 | |
104 StorePendingQuery(handle, request, callback); | |
105 } | |
106 | |
107 void BrowserFeatureExtractor::QueryUrlHistoryDone( | |
108 CancelableRequestProvider::Handle handle, | |
109 bool success, | |
110 const history::URLRow* row, | |
111 history::VisitVector* visits) { | |
112 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
113 ClientPhishingRequest* request; | |
114 DoneCallback* callback; | |
115 if (!GetPendingQuery(handle, &request, &callback)) { | |
116 DLOG(FATAL) << "No pending history query found"; | |
117 return; | |
118 } | |
119 DCHECK(request); | |
120 DCHECK(callback); | |
121 if (!success) { | |
122 // URL is not found in the history. In practice this should not | |
123 // happen (unless there is a real error) because we just visited | |
124 // that URL. | |
125 callback->Run(false, request); | |
126 return; | |
127 } | |
128 ClientPhishingRequest::Feature* feature = request->add_feature_map(); | |
129 feature->set_name(features::kUrlHistoryVisitCount); | |
130 feature->set_value(static_cast<double>(row->visit_count())); | |
131 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); | |
132 | |
133 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1); | |
134 int num_visits_24h_ago = 0; | |
135 int num_visits_typed = 0; | |
136 int num_visits_link = 0; | |
137 for (history::VisitVector::const_iterator it = visits->begin(); | |
138 it != visits->end(); ++it) { | |
139 if (!PageTransition::IsMainFrame(it->transition)) { | |
140 continue; | |
141 } | |
142 if (it->visit_time < threshold) { | |
143 ++num_visits_24h_ago; | |
144 } | |
145 PageTransition::Type transition = PageTransition::StripQualifier( | |
146 it->transition); | |
147 if (transition == PageTransition::TYPED) { | |
148 ++num_visits_typed; | |
149 } else if (transition == PageTransition::LINK) { | |
150 ++num_visits_link; | |
151 } | |
152 } | |
153 feature = request->add_feature_map(); | |
154 feature->set_name(features::kUrlHistoryVisitCount24hAgo); | |
155 feature->set_value(static_cast<double>(num_visits_24h_ago)); | |
156 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); | |
157 | |
158 feature = request->add_feature_map(); | |
159 feature->set_name(features::kUrlHistoryTypedCount); | |
160 feature->set_value(static_cast<double>(num_visits_typed)); | |
161 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); | |
162 | |
163 feature = request->add_feature_map(); | |
164 feature->set_name(features::kUrlHistoryLinkCount); | |
165 feature->set_value(static_cast<double>(num_visits_link)); | |
166 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); | |
167 | |
168 // Issue next history lookup for hist visits. | |
169 HistoryService* history; | |
170 if (!GetHistoryService(&history)) { | |
171 callback->Run(false, request); | |
172 return; | |
173 } | |
174 CancelableRequestProvider::Handle next_handle = | |
175 history->GetVisibleVisitCountToHost( | |
176 GURL(request->url()), | |
177 &request_consumer_, | |
178 NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone)); | |
179 StorePendingQuery(next_handle, request, callback); | |
180 } | |
181 | |
182 void BrowserFeatureExtractor::QueryHttpHostVisitsDone( | |
183 CancelableRequestProvider::Handle handle, | |
184 bool success, | |
185 int num_visits, | |
186 base::Time first_visit) { | |
187 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
188 ClientPhishingRequest* request; | |
189 DoneCallback* callback; | |
190 if (!GetPendingQuery(handle, &request, &callback)) { | |
191 DLOG(FATAL) << "No pending history query found"; | |
192 return; | |
193 } | |
194 DCHECK(request); | |
195 DCHECK(callback); | |
196 if (!success) { | |
197 callback->Run(false, request); | |
198 return; | |
199 } | |
200 SetHostVisitsFeatures(num_visits, first_visit, true, request); | |
201 | |
202 // Same lookup but for the HTTPS URL. | |
203 HistoryService* history; | |
204 if (!GetHistoryService(&history)) { | |
205 callback->Run(false, request); | |
206 return; | |
207 } | |
208 std::string https_url = request->url(); | |
209 CancelableRequestProvider::Handle next_handle = | |
210 history->GetVisibleVisitCountToHost( | |
211 GURL(https_url.replace(0, 5, "https:")), | |
212 &request_consumer_, | |
213 NewCallback(this, | |
214 &BrowserFeatureExtractor::QueryHttpsHostVisitsDone)); | |
215 StorePendingQuery(next_handle, request, callback); | |
216 } | |
217 | |
218 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone( | |
219 CancelableRequestProvider::Handle handle, | |
220 bool success, | |
221 int num_visits, | |
222 base::Time first_visit) { | |
223 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
224 ClientPhishingRequest* request; | |
225 DoneCallback* callback; | |
226 if (!GetPendingQuery(handle, &request, &callback)) { | |
227 DLOG(FATAL) << "No pending history query found"; | |
228 return; | |
229 } | |
230 DCHECK(request); | |
231 DCHECK(callback); | |
232 if (!success) { | |
233 callback->Run(false, request); | |
234 return; | |
235 } | |
236 SetHostVisitsFeatures(num_visits, first_visit, false, request); | |
237 callback->Run(true, request); // We're done with all the history lookups. | |
238 } | |
239 | |
240 void BrowserFeatureExtractor::SetHostVisitsFeatures( | |
241 int num_visits, | |
242 base::Time first_visit, | |
243 bool is_http_query, | |
244 ClientPhishingRequest* request) { | |
245 DCHECK(request); | |
246 ClientPhishingRequest::Feature* feature = request->add_feature_map(); | |
247 feature->set_name(is_http_query ? | |
248 features::kHttpHostVisitCount : | |
249 features::kHttpsHostVisitCount); | |
250 feature->set_value(static_cast<double>(num_visits)); | |
251 VLOG(2) << "Browser feature: " << feature->name() << " " | |
252 << feature->value(); | |
253 | |
254 feature = request->add_feature_map(); | |
255 feature->set_name(is_http_query ? | |
256 features::kFirstHttpHostVisitMoreThan24hAgo : | |
257 features::kFirstHttpsHostVisitMoreThan24hAgo); | |
258 if (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) { | |
259 feature->set_value(1.0); | |
260 } else { | |
261 feature->set_value(0.0); | |
262 } | |
263 VLOG(2) << "Browser feature: " << feature->name() << " " | |
264 << feature->value(); | |
265 } | |
266 | |
267 void BrowserFeatureExtractor::StorePendingQuery( | |
268 CancelableRequestProvider::Handle handle, | |
269 ClientPhishingRequest* request, | |
270 DoneCallback* callback) { | |
271 DCHECK(0 == pending_queries_.count(handle)); | |
272 pending_queries_[handle] = std::make_pair(request, callback); | |
273 } | |
274 | |
275 bool BrowserFeatureExtractor::GetPendingQuery( | |
276 CancelableRequestProvider::Handle handle, | |
277 ClientPhishingRequest** request, | |
278 DoneCallback** callback) { | |
279 PendingQueriesMap::iterator it = pending_queries_.find(handle); | |
280 DCHECK(it != pending_queries_.end()); | |
281 if (it != pending_queries_.end()) { | |
282 *request = it->second.first; | |
283 *callback = it->second.second; | |
284 pending_queries_.erase(it); | |
285 return true; | |
286 } | |
287 return false; | |
288 } | |
289 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) { | |
290 *history = NULL; | |
291 if (tab_ && tab_->profile()) { | |
292 *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS); | |
293 if (*history) { | |
294 return true; | |
295 } | |
296 } | |
297 VLOG(2) << "Unable to query history. No history service available."; | |
298 return false; | |
299 } | |
300 }; // namespace safe_browsing | |
OLD | NEW |