Chromium Code Reviews| Index: chrome/browser/safe_browsing/browser_feature_extractor.cc |
| diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.cc b/chrome/browser/safe_browsing/browser_feature_extractor.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..ca427ebf62c435307e2a531cce05581fb759ff75 |
| --- /dev/null |
| +++ b/chrome/browser/safe_browsing/browser_feature_extractor.cc |
| @@ -0,0 +1,301 @@ |
| +// Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "chrome/browser/safe_browsing/browser_feature_extractor.h" |
| + |
| +#include <map> |
| +#include <utility> |
| + |
| +#include "base/stl_util-inl.h" |
| +#include "base/task.h" |
| +#include "base/time.h" |
| +#include "chrome/common/safe_browsing/csd.pb.h" |
| +#include "chrome/browser/history/history.h" |
| +#include "chrome/browser/history/history_types.h" |
| +#include "chrome/browser/profiles/profile.h" |
| +#include "content/common/page_transition_types.h" |
| +#include "content/browser/browser_thread.h" |
| +#include "content/browser/cancelable_request.h" |
| +#include "content/browser/tab_contents/tab_contents.h" |
| +#include "googleurl/src/gurl.h" |
| + |
| +namespace safe_browsing { |
| +namespace features { |
| +const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount"; |
|
gcasto (DO NOT USE)
2011/06/09 21:39:55
It might be nice to be able to easily distinguish
noelutz
2011/06/09 22:52:09
Done.
|
| +const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount"; |
| +const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount"; |
| +const char kUrlHistoryVisitCountMoreThan24hAgo[] = |
| + "UrlHistoryVisitCountMoreThan24hAgo"; |
| +const char kHttpHostVisitCount[] = "HttpHostVisitCount"; |
| +const char kHttpsHostVisitCount[] = "HttpsHostVisitCount"; |
| +const char kFirstHttpHostVisitMoreThan24hAgo[] = |
| + "FirstHttpHostVisitMoreThan24hAgo"; |
| +const char kFirstHttpsHostVisitMoreThan24hAgo[] = |
| + "FirstHttpsHostVisitMoreThan24hAgo"; |
| +} // namespace features |
| + |
| +BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab) |
| + : tab_(tab), |
| + ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { |
| + DCHECK(tab); |
| +} |
| + |
| +BrowserFeatureExtractor::~BrowserFeatureExtractor() { |
| + method_factory_.RevokeAll(); |
| + // Delete all the pending extractions (delete callback and request objects). |
| + STLDeleteContainerPairPointers(pending_extractions_.begin(), |
| + pending_extractions_.end()); |
| + // Also cancel all the pending history service queries. |
| + HistoryService* history; |
| + DCHECK(GetHistoryService(&history) || pending_queries_.size() == 0); |
| + if (history) { |
| + // Cancel all the pending history lookups and cleanup the memory. |
| + for (PendingQueriesMap::iterator it = pending_queries_.begin(); |
| + it != pending_queries_.end(); ++it) { |
| + history->CancelRequest(it->first); |
| + } |
| + } |
| + // Once we cancelled all the pending queries to the history service we also |
| + // need to cleanup the request and callback objects. |
| + for (PendingQueriesMap::iterator it = pending_queries_.begin(); |
|
gcasto (DO NOT USE)
2011/06/09 21:39:55
I think that it would be cleaner for this to be me
noelutz
2011/06/09 22:52:09
Done.
|
| + it != pending_queries_.end(); ++it) { |
| + ExtractionData& extraction = it->second; |
| + delete extraction.first; // delete request |
| + delete extraction.second; // delete callback |
| + } |
| +} |
| + |
| +void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request, |
| + DoneCallback* callback) { |
| + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| + DCHECK(request); |
| + DCHECK(request->url().find("http:") == 0); |
| + DCHECK(callback); |
| + if (!callback) { |
| + DLOG(ERROR) << "ExtractFeatures called without a callback object"; |
| + return; |
| + } |
| + pending_extractions_.insert(std::make_pair(request, callback)); |
| + MessageLoop::current()->PostTask( |
| + FROM_HERE, |
| + method_factory_.NewRunnableMethod( |
| + &BrowserFeatureExtractor::StartExtractFeatures, |
| + request, callback)); |
| +} |
| + |
| +void BrowserFeatureExtractor::StartExtractFeatures( |
| + ClientPhishingRequest* request, |
| + DoneCallback* callback) { |
| + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| + ExtractionData extraction = std::make_pair(request, callback); |
| + DCHECK_EQ(1U, pending_extractions_.erase(extraction)); |
| + HistoryService* history; |
| + if (!request || !request->IsInitialized() || !GetHistoryService(&history)) { |
| + callback->Run(false, request); |
| + return; |
| + } |
| + CancelableRequestProvider::Handle handle = history->QueryURL( |
| + GURL(request->url()), |
| + true /* wants_visits */, |
| + &request_consumer_, |
| + NewCallback(this, |
| + &BrowserFeatureExtractor::QueryUrlHistoryDone)); |
| + |
| + StorePendingQuery(handle, request, callback); |
| +} |
| + |
| +void BrowserFeatureExtractor::QueryUrlHistoryDone( |
| + CancelableRequestProvider::Handle handle, |
| + bool success, |
| + const history::URLRow* row, |
| + history::VisitVector* visits) { |
| + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| + ClientPhishingRequest* request; |
| + DoneCallback* callback; |
| + if (!GetPendingQuery(handle, &request, &callback)) { |
| + DLOG(FATAL) << "No pending history query found"; |
| + return; |
| + } |
| + DCHECK(request); |
| + DCHECK(callback); |
| + if (!success) { |
| + // URL is not found in the history. In practice this should not |
| + // happen (unless there is a real error) because we just visited |
| + // that URL. |
|
gcasto (DO NOT USE)
2011/06/09 21:39:55
Just to make sure, is the store that the browser d
noelutz
2011/06/09 22:52:09
It looks like adding to the history service is asy
|
| + callback->Run(false, request); |
| + return; |
| + } |
| + ClientPhishingRequest::Feature* feature = request->add_feature_map(); |
|
gcasto (DO NOT USE)
2011/06/09 21:39:55
Might be worth making these 4 lines a function, Ad
noelutz
2011/06/09 22:52:09
Done.
|
| + feature->set_name(features::kUrlHistoryVisitCount); |
| + feature->set_value(static_cast<double>(row->visit_count())); |
| + VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); |
| + |
| + base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1); |
| + int num_visits_24h_ago = 0; |
| + int num_visits_typed = 0; |
| + int num_visits_link = 0; |
| + for (history::VisitVector::const_iterator it = visits->begin(); |
| + it != visits->end(); ++it) { |
| + if (!PageTransition::IsMainFrame(it->transition)) { |
| + continue; |
| + } |
| + if (it->visit_time < threshold) { |
| + ++num_visits_24h_ago; |
| + } |
| + PageTransition::Type transition = PageTransition::StripQualifier( |
| + it->transition); |
| + if (transition == PageTransition::TYPED) { |
| + ++num_visits_typed; |
| + } else if (transition == PageTransition::LINK) { |
| + ++num_visits_link; |
| + } |
| + } |
| + feature = request->add_feature_map(); |
| + feature->set_name(features::kUrlHistoryVisitCountMoreThan24hAgo); |
| + feature->set_value(static_cast<double>(num_visits_24h_ago)); |
| + VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); |
| + |
| + feature = request->add_feature_map(); |
| + feature->set_name(features::kUrlHistoryTypedCount); |
| + feature->set_value(static_cast<double>(num_visits_typed)); |
| + VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); |
| + |
| + feature = request->add_feature_map(); |
| + feature->set_name(features::kUrlHistoryLinkCount); |
| + feature->set_value(static_cast<double>(num_visits_link)); |
| + VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); |
| + |
| + // Issue next history lookup for hist visits. |
|
gcasto (DO NOT USE)
2011/06/09 21:39:55
hist -> host
noelutz
2011/06/09 22:52:09
Done.
|
| + HistoryService* history; |
| + if (!GetHistoryService(&history)) { |
| + callback->Run(false, request); |
| + return; |
| + } |
| + CancelableRequestProvider::Handle next_handle = |
| + history->GetVisibleVisitCountToHost( |
| + GURL(request->url()), |
| + &request_consumer_, |
| + NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone)); |
| + StorePendingQuery(next_handle, request, callback); |
| +} |
| + |
| +void BrowserFeatureExtractor::QueryHttpHostVisitsDone( |
| + CancelableRequestProvider::Handle handle, |
| + bool success, |
| + int num_visits, |
| + base::Time first_visit) { |
| + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| + ClientPhishingRequest* request; |
| + DoneCallback* callback; |
| + if (!GetPendingQuery(handle, &request, &callback)) { |
| + DLOG(FATAL) << "No pending history query found"; |
| + return; |
| + } |
| + DCHECK(request); |
| + DCHECK(callback); |
| + if (!success) { |
| + callback->Run(false, request); |
| + return; |
| + } |
| + SetHostVisitsFeatures(num_visits, first_visit, true, request); |
| + |
| + // Same lookup but for the HTTPS URL. |
| + HistoryService* history; |
| + if (!GetHistoryService(&history)) { |
| + callback->Run(false, request); |
| + return; |
| + } |
| + std::string https_url = request->url(); |
| + CancelableRequestProvider::Handle next_handle = |
| + history->GetVisibleVisitCountToHost( |
| + GURL(https_url.replace(0, 5, "https:")), |
| + &request_consumer_, |
| + NewCallback(this, |
| + &BrowserFeatureExtractor::QueryHttpsHostVisitsDone)); |
| + StorePendingQuery(next_handle, request, callback); |
| +} |
| + |
| +void BrowserFeatureExtractor::QueryHttpsHostVisitsDone( |
| + CancelableRequestProvider::Handle handle, |
| + bool success, |
| + int num_visits, |
| + base::Time first_visit) { |
| + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| + ClientPhishingRequest* request; |
| + DoneCallback* callback; |
| + if (!GetPendingQuery(handle, &request, &callback)) { |
| + DLOG(FATAL) << "No pending history query found"; |
| + return; |
| + } |
| + DCHECK(request); |
| + DCHECK(callback); |
| + if (!success) { |
| + callback->Run(false, request); |
| + return; |
| + } |
| + SetHostVisitsFeatures(num_visits, first_visit, false, request); |
| + callback->Run(true, request); // We're done with all the history lookups. |
| +} |
| + |
| +void BrowserFeatureExtractor::SetHostVisitsFeatures( |
| + int num_visits, |
| + base::Time first_visit, |
| + bool is_http_query, |
| + ClientPhishingRequest* request) { |
| + DCHECK(request); |
| + ClientPhishingRequest::Feature* feature = request->add_feature_map(); |
| + feature->set_name(is_http_query ? |
| + features::kHttpHostVisitCount : |
| + features::kHttpsHostVisitCount); |
| + feature->set_value(static_cast<double>(num_visits)); |
| + VLOG(2) << "Browser feature: " << feature->name() << " " |
| + << feature->value(); |
| + |
| + feature = request->add_feature_map(); |
| + feature->set_name(is_http_query ? |
| + features::kFirstHttpHostVisitMoreThan24hAgo : |
| + features::kFirstHttpsHostVisitMoreThan24hAgo); |
| + if (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) { |
| + feature->set_value(1.0); |
| + } else { |
| + feature->set_value(0.0); |
| + } |
| + VLOG(2) << "Browser feature: " << feature->name() << " " |
| + << feature->value(); |
| +} |
| + |
| +void BrowserFeatureExtractor::StorePendingQuery( |
| + CancelableRequestProvider::Handle handle, |
| + ClientPhishingRequest* request, |
| + DoneCallback* callback) { |
| + DCHECK(0 == pending_queries_.count(handle)); |
| + pending_queries_[handle] = std::make_pair(request, callback); |
| +} |
| + |
| +bool BrowserFeatureExtractor::GetPendingQuery( |
| + CancelableRequestProvider::Handle handle, |
| + ClientPhishingRequest** request, |
| + DoneCallback** callback) { |
| + PendingQueriesMap::iterator it = pending_queries_.find(handle); |
| + DCHECK(it != pending_queries_.end()); |
| + if (it != pending_queries_.end()) { |
| + *request = it->second.first; |
| + *callback = it->second.second; |
| + pending_queries_.erase(it); |
| + return true; |
| + } |
| + return false; |
| +} |
| +bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) { |
| + *history = NULL; |
| + if (tab_ && tab_->profile()) { |
| + *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS); |
| + if (*history) { |
| + return true; |
| + } |
| + } |
| + VLOG(2) << "Unable to query history. No history service available."; |
| + return false; |
| +} |
| +}; // namespace safe_browsing |