Index: chrome/browser/safe_browsing/browser_feature_extractor.cc |
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.cc b/chrome/browser/safe_browsing/browser_feature_extractor.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..3d56279c3327f87a9755d0f309d058e1ec429e88 |
--- /dev/null |
+++ b/chrome/browser/safe_browsing/browser_feature_extractor.cc |
@@ -0,0 +1,304 @@ |
+// Copyright (c) 2011 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "chrome/browser/safe_browsing/browser_feature_extractor.h" |
+ |
+#include <map> |
+#include <utility> |
+ |
+#include "base/stl_util-inl.h" |
+#include "base/task.h" |
+#include "base/time.h" |
+#include "chrome/common/safe_browsing/csd.pb.h" |
+#include "chrome/browser/history/history.h" |
+#include "chrome/browser/history/history_types.h" |
+#include "chrome/browser/profiles/profile.h" |
+#include "content/common/page_transition_types.h" |
+#include "content/browser/browser_thread.h" |
+#include "content/browser/cancelable_request.h" |
+#include "content/browser/tab_contents/tab_contents.h" |
+#include "googleurl/src/gurl.h" |
+ |
+namespace safe_browsing { |
+namespace features { |
+const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount"; |
+const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount"; |
+const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount"; |
+const char kUrlHistoryVisitCountMoreThan24hAgo[] = |
+ "UrlHistoryVisitCountMoreThan24hAgo"; |
+const char kHttpHostVisitCount[] = "HttpHostVisitCount"; |
+const char kHttpsHostVisitCount[] = "HttpsHostVisitCount"; |
+const char kFirstHttpHostVisitMoreThan24hAgo[] = |
+ "FirstHttpHostVisitMoreThan24hAgo"; |
+const char kFirstHttpsHostVisitMoreThan24hAgo[] = |
+ "FirstHttpsHostVisitMoreThan24hAgo"; |
+} // namespace features |
+ |
+static void AddFeature(const std::string& feature_name, |
+ double feature_value, |
+ ClientPhishingRequest* request) { |
+ DCHECK(request); |
+ ClientPhishingRequest::Feature* feature = |
+ request->add_non_model_feature_map(); |
+ feature->set_name(feature_name); |
+ feature->set_value(feature_value); |
+ VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); |
+} |
+ |
+BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab) |
+ : tab_(tab), |
+ ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { |
+ DCHECK(tab); |
+} |
+ |
+BrowserFeatureExtractor::~BrowserFeatureExtractor() { |
+ method_factory_.RevokeAll(); |
+ // Delete all the pending extractions (delete callback and request objects). |
+ STLDeleteContainerPairPointers(pending_extractions_.begin(), |
+ pending_extractions_.end()); |
+ // Also cancel all the pending history service queries. |
+ HistoryService* history; |
+ bool success = GetHistoryService(&history); |
+ DCHECK(success || pending_queries_.size() == 0); |
+ // Cancel all the pending history lookups and cleanup the memory. |
+ for (PendingQueriesMap::iterator it = pending_queries_.begin(); |
+ it != pending_queries_.end(); ++it) { |
+ if (history) { |
+ history->CancelRequest(it->first); |
+ } |
+ ExtractionData& extraction = it->second; |
+ delete extraction.first; // delete request |
+ delete extraction.second; // delete callback |
+ } |
+ pending_queries_.clear(); |
+} |
+ |
+void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request, |
+ DoneCallback* callback) { |
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
+ DCHECK(request); |
+ DCHECK_EQ(0U, request->url().find("http:")); |
+ DCHECK(callback); |
+ if (!callback) { |
+ DLOG(ERROR) << "ExtractFeatures called without a callback object"; |
+ return; |
+ } |
+ pending_extractions_.insert(std::make_pair(request, callback)); |
+ MessageLoop::current()->PostTask( |
+ FROM_HERE, |
+ method_factory_.NewRunnableMethod( |
+ &BrowserFeatureExtractor::StartExtractFeatures, |
+ request, callback)); |
+} |
+ |
+void BrowserFeatureExtractor::StartExtractFeatures( |
+ ClientPhishingRequest* request, |
+ DoneCallback* callback) { |
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
+ ExtractionData extraction = std::make_pair(request, callback); |
+ size_t removed = pending_extractions_.erase(extraction); |
+ DCHECK_EQ(1U, removed); |
+ HistoryService* history; |
+ if (!request || !request->IsInitialized() || !GetHistoryService(&history)) { |
+ callback->Run(false, request); |
+ delete callback; |
+ return; |
+ } |
+ CancelableRequestProvider::Handle handle = history->QueryURL( |
+ GURL(request->url()), |
+ true /* wants_visits */, |
+ &request_consumer_, |
+ NewCallback(this, |
+ &BrowserFeatureExtractor::QueryUrlHistoryDone)); |
+ |
+ StorePendingQuery(handle, request, callback); |
+} |
+ |
+void BrowserFeatureExtractor::QueryUrlHistoryDone( |
+ CancelableRequestProvider::Handle handle, |
+ bool success, |
+ const history::URLRow* row, |
+ history::VisitVector* visits) { |
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
+ ClientPhishingRequest* request; |
+ DoneCallback* callback; |
+ if (!GetPendingQuery(handle, &request, &callback)) { |
+ DLOG(FATAL) << "No pending history query found"; |
+ return; |
+ } |
+ DCHECK(request); |
+ DCHECK(callback); |
+ if (!success) { |
+ // URL is not found in the history. In practice this should not |
+ // happen (unless there is a real error) because we just visited |
+ // that URL. |
+ callback->Run(false, request); |
+ delete callback; |
+ return; |
+ } |
+ AddFeature(features::kUrlHistoryVisitCount, |
+ static_cast<double>(row->visit_count()), |
+ request); |
+ |
+ base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1); |
+ int num_visits_24h_ago = 0; |
+ int num_visits_typed = 0; |
+ int num_visits_link = 0; |
+ for (history::VisitVector::const_iterator it = visits->begin(); |
+ it != visits->end(); ++it) { |
+ if (!PageTransition::IsMainFrame(it->transition)) { |
+ continue; |
+ } |
+ if (it->visit_time < threshold) { |
+ ++num_visits_24h_ago; |
+ } |
+ PageTransition::Type transition = PageTransition::StripQualifier( |
+ it->transition); |
+ if (transition == PageTransition::TYPED) { |
+ ++num_visits_typed; |
+ } else if (transition == PageTransition::LINK) { |
+ ++num_visits_link; |
+ } |
+ } |
+ AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo, |
+ static_cast<double>(num_visits_24h_ago), |
+ request); |
+ AddFeature(features::kUrlHistoryTypedCount, |
+ static_cast<double>(num_visits_typed), |
+ request); |
+ AddFeature(features::kUrlHistoryLinkCount, |
+ static_cast<double>(num_visits_link), |
+ request); |
+ |
+ // Issue next history lookup for host visits. |
+ HistoryService* history; |
+ if (!GetHistoryService(&history)) { |
+ callback->Run(false, request); |
+ delete callback; |
+ return; |
+ } |
+ CancelableRequestProvider::Handle next_handle = |
+ history->GetVisibleVisitCountToHost( |
+ GURL(request->url()), |
+ &request_consumer_, |
+ NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone)); |
+ StorePendingQuery(next_handle, request, callback); |
+} |
+ |
+void BrowserFeatureExtractor::QueryHttpHostVisitsDone( |
+ CancelableRequestProvider::Handle handle, |
+ bool success, |
+ int num_visits, |
+ base::Time first_visit) { |
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
+ ClientPhishingRequest* request; |
+ DoneCallback* callback; |
+ if (!GetPendingQuery(handle, &request, &callback)) { |
+ DLOG(FATAL) << "No pending history query found"; |
+ return; |
+ } |
+ DCHECK(request); |
+ DCHECK(callback); |
+ if (!success) { |
+ callback->Run(false, request); |
+ delete callback; |
+ return; |
+ } |
+ SetHostVisitsFeatures(num_visits, first_visit, true, request); |
+ |
+ // Same lookup but for the HTTPS URL. |
+ HistoryService* history; |
+ if (!GetHistoryService(&history)) { |
+ callback->Run(false, request); |
+ delete callback; |
+ return; |
+ } |
+ std::string https_url = request->url(); |
+ CancelableRequestProvider::Handle next_handle = |
+ history->GetVisibleVisitCountToHost( |
+ GURL(https_url.replace(0, 5, "https:")), |
+ &request_consumer_, |
+ NewCallback(this, |
+ &BrowserFeatureExtractor::QueryHttpsHostVisitsDone)); |
+ StorePendingQuery(next_handle, request, callback); |
+} |
+ |
+void BrowserFeatureExtractor::QueryHttpsHostVisitsDone( |
+ CancelableRequestProvider::Handle handle, |
+ bool success, |
+ int num_visits, |
+ base::Time first_visit) { |
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
+ ClientPhishingRequest* request; |
+ DoneCallback* callback; |
+ if (!GetPendingQuery(handle, &request, &callback)) { |
+ DLOG(FATAL) << "No pending history query found"; |
+ return; |
+ } |
+ DCHECK(request); |
+ DCHECK(callback); |
+ if (!success) { |
+ callback->Run(false, request); |
+ delete callback; |
+ return; |
+ } |
+ SetHostVisitsFeatures(num_visits, first_visit, false, request); |
+ callback->Run(true, request); // We're done with all the history lookups. |
+ delete callback; |
+} |
+ |
+void BrowserFeatureExtractor::SetHostVisitsFeatures( |
+ int num_visits, |
+ base::Time first_visit, |
+ bool is_http_query, |
+ ClientPhishingRequest* request) { |
+ DCHECK(request); |
+ AddFeature(is_http_query ? |
+ features::kHttpHostVisitCount : features::kHttpsHostVisitCount, |
+ static_cast<double>(num_visits), |
+ request); |
+ AddFeature( |
+ is_http_query ? |
+ features::kFirstHttpHostVisitMoreThan24hAgo : |
+ features::kFirstHttpsHostVisitMoreThan24hAgo, |
+ (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ? |
+ 1.0 : 0.0, |
+ request); |
+} |
+ |
+void BrowserFeatureExtractor::StorePendingQuery( |
+ CancelableRequestProvider::Handle handle, |
+ ClientPhishingRequest* request, |
+ DoneCallback* callback) { |
+ DCHECK_EQ(0U, pending_queries_.count(handle)); |
+ pending_queries_[handle] = std::make_pair(request, callback); |
+} |
+ |
+bool BrowserFeatureExtractor::GetPendingQuery( |
+ CancelableRequestProvider::Handle handle, |
+ ClientPhishingRequest** request, |
+ DoneCallback** callback) { |
+ PendingQueriesMap::iterator it = pending_queries_.find(handle); |
+ DCHECK(it != pending_queries_.end()); |
+ if (it != pending_queries_.end()) { |
+ *request = it->second.first; |
+ *callback = it->second.second; |
+ pending_queries_.erase(it); |
+ return true; |
+ } |
+ return false; |
+} |
+ |
+bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) { |
+ *history = NULL; |
+ if (tab_ && tab_->profile()) { |
+ *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS); |
+ if (*history) { |
+ return true; |
+ } |
+ } |
+ VLOG(2) << "Unable to query history. No history service available."; |
+ return false; |
+} |
+}; // namespace safe_browsing |