OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" |
| 6 |
| 7 #include <map> |
| 8 #include <utility> |
| 9 |
| 10 #include "base/stl_util-inl.h" |
| 11 #include "base/task.h" |
| 12 #include "base/time.h" |
| 13 #include "chrome/common/safe_browsing/csd.pb.h" |
| 14 #include "chrome/browser/history/history.h" |
| 15 #include "chrome/browser/history/history_types.h" |
| 16 #include "chrome/browser/profiles/profile.h" |
| 17 #include "content/common/page_transition_types.h" |
| 18 #include "content/browser/browser_thread.h" |
| 19 #include "content/browser/cancelable_request.h" |
| 20 #include "content/browser/tab_contents/tab_contents.h" |
| 21 #include "googleurl/src/gurl.h" |
| 22 |
| 23 namespace safe_browsing { |
| 24 namespace features { |
| 25 const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount="; |
| 26 const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount="; |
| 27 const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount="; |
| 28 const char kUrlHistoryVisitCount24hAgo[] = "UrlHistoryVisitCount24hAgo="; |
| 29 const char kHttpHostVisitCount[] = "HttpHostVisitCount="; |
| 30 const char kHttpsHostVisitCount[] = "HttpsHostVisitCount="; |
| 31 const char kFirstHttpHostVisitMoreThan24hAgo[] = |
| 32 "FirstHttpHostVisitMoreThan24hAgo"; |
| 33 const char kFirstHttpsHostVisitMoreThan24hAgo[] = |
| 34 "FirstHttpsHostVisitMoreThan24hAgo"; |
| 35 } // namespace features |
| 36 |
| 37 BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab) |
| 38 : tab_(tab), |
| 39 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { |
| 40 DCHECK(tab); |
| 41 } |
| 42 |
| 43 BrowserFeatureExtractor::~BrowserFeatureExtractor() { |
| 44 method_factory_.RevokeAll(); |
| 45 // Delete all the pending extractions (delete callback and request objects). |
| 46 STLDeleteContainerPairPointers(pending_extractions_.begin(), |
| 47 pending_extractions_.end()); |
| 48 // Also cancel all the pending history service queries. |
| 49 HistoryService* history; |
| 50 DCHECK(GetHistoryService(&history) || pending_queries_.size() == 0); |
| 51 if (history) { |
| 52 // Cancel all the pending history lookups and cleanup the memory. |
| 53 for (PendingQueriesMap::iterator it = pending_queries_.begin(); |
| 54 it != pending_queries_.end(); ++it) { |
| 55 history->CancelRequest(it->first); |
| 56 } |
| 57 } |
| 58 // Once we cancelled all the pending queries to the history service we also |
| 59 // need to cleanup the request and callback objects. |
| 60 for (PendingQueriesMap::iterator it = pending_queries_.begin(); |
| 61 it != pending_queries_.end(); ++it) { |
| 62 ExtractionData& extraction = it->second; |
| 63 delete extraction.first; // delete request |
| 64 delete extraction.second; // delete callback |
| 65 } |
| 66 } |
| 67 |
| 68 void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request, |
| 69 DoneCallback* callback) { |
| 70 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| 71 DCHECK(request); |
| 72 DCHECK(request->url().find("http:") == 0); |
| 73 DCHECK(callback); |
| 74 if (!callback) { |
| 75 DLOG(ERROR) << "ExtractFeatures called without a callback object"; |
| 76 return; |
| 77 } |
| 78 pending_extractions_.insert(std::make_pair(request, callback)); |
| 79 MessageLoop::current()->PostTask( |
| 80 FROM_HERE, |
| 81 method_factory_.NewRunnableMethod( |
| 82 &BrowserFeatureExtractor::StartExtractFeatures, |
| 83 request, callback)); |
| 84 } |
| 85 |
| 86 void BrowserFeatureExtractor::StartExtractFeatures( |
| 87 ClientPhishingRequest* request, |
| 88 DoneCallback* callback) { |
| 89 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| 90 ExtractionData extraction = std::make_pair(request, callback); |
| 91 DCHECK(1 == pending_extractions_.erase(extraction)); |
| 92 HistoryService* history; |
| 93 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) { |
| 94 callback->Run(false, request); |
| 95 return; |
| 96 } |
| 97 CancelableRequestProvider::Handle handle = history->QueryURL( |
| 98 GURL(request->url()), |
| 99 true /* wants_visits */, |
| 100 &request_consumer_, |
| 101 NewCallback(this, |
| 102 &BrowserFeatureExtractor::QueryUrlHistoryDone)); |
| 103 |
| 104 StorePendingQuery(handle, request, callback); |
| 105 } |
| 106 |
| 107 void BrowserFeatureExtractor::QueryUrlHistoryDone( |
| 108 CancelableRequestProvider::Handle handle, |
| 109 bool success, |
| 110 const history::URLRow* row, |
| 111 history::VisitVector* visits) { |
| 112 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| 113 ClientPhishingRequest* request; |
| 114 DoneCallback* callback; |
| 115 if (!GetPendingQuery(handle, &request, &callback)) { |
| 116 DLOG(FATAL) << "No pending history query found"; |
| 117 return; |
| 118 } |
| 119 DCHECK(request); |
| 120 DCHECK(callback); |
| 121 if (!success) { |
| 122 // URL is not found in the history. In practice this should not |
| 123 // happen (unless there is a real error) because we just visited |
| 124 // that URL. |
| 125 callback->Run(false, request); |
| 126 return; |
| 127 } |
| 128 ClientPhishingRequest::Feature* feature = request->add_feature_map(); |
| 129 feature->set_name(features::kUrlHistoryVisitCount); |
| 130 feature->set_value(static_cast<double>(row->visit_count())); |
| 131 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); |
| 132 |
| 133 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1); |
| 134 int num_visits_24h_ago = 0; |
| 135 int num_visits_typed = 0; |
| 136 int num_visits_link = 0; |
| 137 for (history::VisitVector::const_iterator it = visits->begin(); |
| 138 it != visits->end(); ++it) { |
| 139 if (!PageTransition::IsMainFrame(it->transition)) { |
| 140 continue; |
| 141 } |
| 142 if (it->visit_time < threshold) { |
| 143 ++num_visits_24h_ago; |
| 144 } |
| 145 PageTransition::Type transition = PageTransition::StripQualifier( |
| 146 it->transition); |
| 147 if (transition == PageTransition::TYPED) { |
| 148 ++num_visits_typed; |
| 149 } else if (transition == PageTransition::LINK) { |
| 150 ++num_visits_link; |
| 151 } |
| 152 } |
| 153 feature = request->add_feature_map(); |
| 154 feature->set_name(features::kUrlHistoryVisitCount24hAgo); |
| 155 feature->set_value(static_cast<double>(num_visits_24h_ago)); |
| 156 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); |
| 157 |
| 158 feature = request->add_feature_map(); |
| 159 feature->set_name(features::kUrlHistoryTypedCount); |
| 160 feature->set_value(static_cast<double>(num_visits_typed)); |
| 161 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); |
| 162 |
| 163 feature = request->add_feature_map(); |
| 164 feature->set_name(features::kUrlHistoryLinkCount); |
| 165 feature->set_value(static_cast<double>(num_visits_link)); |
| 166 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); |
| 167 |
| 168 // Issue next history lookup for hist visits. |
| 169 HistoryService* history; |
| 170 if (!GetHistoryService(&history)) { |
| 171 callback->Run(false, request); |
| 172 return; |
| 173 } |
| 174 CancelableRequestProvider::Handle next_handle = |
| 175 history->GetVisibleVisitCountToHost( |
| 176 GURL(request->url()), |
| 177 &request_consumer_, |
| 178 NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone)); |
| 179 StorePendingQuery(next_handle, request, callback); |
| 180 } |
| 181 |
| 182 void BrowserFeatureExtractor::QueryHttpHostVisitsDone( |
| 183 CancelableRequestProvider::Handle handle, |
| 184 bool success, |
| 185 int num_visits, |
| 186 base::Time first_visit) { |
| 187 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| 188 ClientPhishingRequest* request; |
| 189 DoneCallback* callback; |
| 190 if (!GetPendingQuery(handle, &request, &callback)) { |
| 191 DLOG(FATAL) << "No pending history query found"; |
| 192 return; |
| 193 } |
| 194 DCHECK(request); |
| 195 DCHECK(callback); |
| 196 if (!success) { |
| 197 callback->Run(false, request); |
| 198 return; |
| 199 } |
| 200 SetHostVisitsFeatures(num_visits, first_visit, true, request); |
| 201 |
| 202 // Same lookup but for the HTTPS URL. |
| 203 HistoryService* history; |
| 204 if (!GetHistoryService(&history)) { |
| 205 callback->Run(false, request); |
| 206 return; |
| 207 } |
| 208 std::string https_url = request->url(); |
| 209 CancelableRequestProvider::Handle next_handle = |
| 210 history->GetVisibleVisitCountToHost( |
| 211 GURL(https_url.replace(0, 5, "https:")), |
| 212 &request_consumer_, |
| 213 NewCallback(this, |
| 214 &BrowserFeatureExtractor::QueryHttpsHostVisitsDone)); |
| 215 StorePendingQuery(next_handle, request, callback); |
| 216 } |
| 217 |
| 218 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone( |
| 219 CancelableRequestProvider::Handle handle, |
| 220 bool success, |
| 221 int num_visits, |
| 222 base::Time first_visit) { |
| 223 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| 224 ClientPhishingRequest* request; |
| 225 DoneCallback* callback; |
| 226 if (!GetPendingQuery(handle, &request, &callback)) { |
| 227 DLOG(FATAL) << "No pending history query found"; |
| 228 return; |
| 229 } |
| 230 DCHECK(request); |
| 231 DCHECK(callback); |
| 232 if (!success) { |
| 233 callback->Run(false, request); |
| 234 return; |
| 235 } |
| 236 SetHostVisitsFeatures(num_visits, first_visit, false, request); |
| 237 callback->Run(true, request); // We're done with all the history lookups. |
| 238 } |
| 239 |
| 240 void BrowserFeatureExtractor::SetHostVisitsFeatures( |
| 241 int num_visits, |
| 242 base::Time first_visit, |
| 243 bool is_http_query, |
| 244 ClientPhishingRequest* request) { |
| 245 DCHECK(request); |
| 246 ClientPhishingRequest::Feature* feature = request->add_feature_map(); |
| 247 feature->set_name(is_http_query ? |
| 248 features::kHttpHostVisitCount : |
| 249 features::kHttpsHostVisitCount); |
| 250 feature->set_value(static_cast<double>(num_visits)); |
| 251 VLOG(2) << "Browser feature: " << feature->name() << " " |
| 252 << feature->value(); |
| 253 |
| 254 feature = request->add_feature_map(); |
| 255 feature->set_name(is_http_query ? |
| 256 features::kFirstHttpHostVisitMoreThan24hAgo : |
| 257 features::kFirstHttpsHostVisitMoreThan24hAgo); |
| 258 if (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) { |
| 259 feature->set_value(1.0); |
| 260 } else { |
| 261 feature->set_value(0.0); |
| 262 } |
| 263 VLOG(2) << "Browser feature: " << feature->name() << " " |
| 264 << feature->value(); |
| 265 } |
| 266 |
| 267 void BrowserFeatureExtractor::StorePendingQuery( |
| 268 CancelableRequestProvider::Handle handle, |
| 269 ClientPhishingRequest* request, |
| 270 DoneCallback* callback) { |
| 271 DCHECK(0 == pending_queries_.count(handle)); |
| 272 pending_queries_[handle] = std::make_pair(request, callback); |
| 273 } |
| 274 |
| 275 bool BrowserFeatureExtractor::GetPendingQuery( |
| 276 CancelableRequestProvider::Handle handle, |
| 277 ClientPhishingRequest** request, |
| 278 DoneCallback** callback) { |
| 279 PendingQueriesMap::iterator it = pending_queries_.find(handle); |
| 280 DCHECK(it != pending_queries_.end()); |
| 281 if (it != pending_queries_.end()) { |
| 282 *request = it->second.first; |
| 283 *callback = it->second.second; |
| 284 pending_queries_.erase(it); |
| 285 return true; |
| 286 } |
| 287 return false; |
| 288 } |
| 289 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) { |
| 290 *history = NULL; |
| 291 if (tab_ && tab_->profile()) { |
| 292 *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS); |
| 293 if (*history) { |
| 294 return true; |
| 295 } |
| 296 } |
| 297 VLOG(2) << "Unable to query history. No history service available."; |
| 298 return false; |
| 299 } |
| 300 }; // namespace safe_browsing |
OLD | NEW |