Index: chrome/browser/safe_browsing/browser_feature_extractor.h |
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.h b/chrome/browser/safe_browsing/browser_feature_extractor.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..802afb5f3cac55af17c6302261300bbb117092dd |
--- /dev/null |
+++ b/chrome/browser/safe_browsing/browser_feature_extractor.h |
@@ -0,0 +1,161 @@ |
+// Copyright (c) 2011 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+// |
+// BrowserFeatureExtractor computes various browser features for client-side |
+// phishing detection. For now it does a bunch of lookups in the history |
+// service to see whether a particular URL has been visited before by the |
+// user. |
+ |
+#ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
+#define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
+#pragma once |
+ |
+#include <map> |
+#include <set> |
+#include <utility> |
+ |
+#include "base/basictypes.h" |
+#include "base/callback_old.h" |
+#include "base/task.h" |
+#include "base/time.h" |
+#include "chrome/browser/history/history_types.h" |
+#include "content/browser/cancelable_request.h" |
+ |
+class HistoryService; |
+class TabContents; |
+ |
+namespace safe_browsing { |
+class ClientPhishingRequest; |
+ |
+namespace features { |
+ |
+// TODO(noelutz): move renderer/safe_browsing/features.h to common. |
+//////////////////////////////////////////////////// |
+// History features. |
+//////////////////////////////////////////////////// |
+ |
+// Number of visits to that URL stored in the browser history. |
+// Should always be an integer larger than 1 because by the time |
+// we lookup the history the current URL should already be stored there. |
+extern const char kUrlHistoryVisitCount[]; |
+ |
+// Number of times the URL was typed in the Omnibox. |
+extern const char kUrlHistoryTypedCount[]; |
+ |
+// Number of times the URL was reached by clicking a link. |
+extern const char kUrlHistoryLinkCount[]; |
+ |
+// Number of times URL was visited more than 24h ago. |
+extern const char kUrlHistoryVisitCountMoreThan24hAgo[]; |
+ |
+// Number of user-visible visits to all URLs on the same host/port as |
+// the URL for HTTP and HTTPs. |
+extern const char kHttpHostVisitCount[]; |
+extern const char kHttpsHostVisitCount[]; |
+ |
+// Boolean feature which is true if the host was visited for the first |
+// time more than 24h ago (only considers user-visible visits like above). |
+extern const char kFirstHttpHostVisitMoreThan24hAgo[]; |
+extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; |
+} // namespace features |
+ |
+// All methods of this class must be called on the UI thread (including |
+// the constructor). |
+class BrowserFeatureExtractor { |
+ public: |
+ // Called when feature extraction is done. The first argument will be |
+ // true iff feature extraction succeeded. The second argument is the |
+ // phishing request which was modified by the feature extractor. The |
+ // DoneCallback takes ownership of the request object. |
+ typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; |
+ |
+ // The caller keeps ownership of the tab object and is responsible for |
+ // ensuring that it stays valid for the entire lifetime of this object. |
+ explicit BrowserFeatureExtractor(TabContents* tab); |
+ |
+ // The destructor will cancel any pending requests. |
+ virtual ~BrowserFeatureExtractor(); |
+ |
+ // Begins extraction of the browser features. We take ownership |
+ // of the request object until |callback| is called (see DoneCallback above) |
+ // and will write the extracted features to the feature map. Once the |
+ // feature extraction is complete, |callback| is run on the UI thread. We |
+ // take ownership of the |callback| object. This method must run on the UI |
+ // thread. |
+ virtual void ExtractFeatures(ClientPhishingRequest* request, |
+ DoneCallback* callback); |
+ |
+ private: |
+ friend class DeleteTask<BrowserFeatureExtractor>; |
+ typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; |
+ typedef std::map<CancelableRequestProvider::Handle, |
+ ExtractionData> PendingQueriesMap; |
+ |
+ // Actually starts feature extraction (does the real work). |
+ void StartExtractFeatures(ClientPhishingRequest* request, |
+ DoneCallback* callback); |
+ |
+ // HistoryService callback which is called when we're done querying URL visits |
+ // in the history. |
+ void QueryUrlHistoryDone(CancelableRequestProvider::Handle handle, |
+ bool success, |
+ const history::URLRow* row, |
+ history::VisitVector* visits); |
+ |
+ // HistoryService callback which is called when we're done querying HTTP host |
+ // visits in the history. |
+ void QueryHttpHostVisitsDone(CancelableRequestProvider::Handle handle, |
+ bool success, |
+ int num_visits, |
+ base::Time first_visit); |
+ |
+ // HistoryService callback which is called when we're done querying HTTPS host |
+ // visits in the history. |
+ void QueryHttpsHostVisitsDone(CancelableRequestProvider::Handle handle, |
+ bool success, |
+ int num_visits, |
+ base::Time first_visit); |
+ |
+ // Helper function which sets the host history features given the |
+ // number of host visits and the time of the fist host visit. Set |
+ // |is_http_query| to true if the URL scheme is HTTP and to false if |
+ // the scheme is HTTPS. |
+ void SetHostVisitsFeatures(int num_visits, |
+ base::Time first_visit, |
+ bool is_http_query, |
+ ClientPhishingRequest* request); |
+ |
+ // Helper function which stores the request and callback while the history |
+ // query is being processed. |
+ void StorePendingQuery(CancelableRequestProvider::Handle handle, |
+ ClientPhishingRequest* request, |
+ DoneCallback* callback); |
+ |
+ // Helper function which is the counterpart of StorePendingQuery. If there |
+ // is a pending query for the given handle it will return false and set both |
+ // the request and cb pointers. Otherwise, it will return false. |
+ bool GetPendingQuery(CancelableRequestProvider::Handle handle, |
+ ClientPhishingRequest** request, |
+ DoneCallback** callback); |
+ |
+ // Helper function which gets the history server if possible. If the pointer |
+ // is set it will return true and false otherwise. |
+ bool GetHistoryService(HistoryService** history); |
+ |
+ TabContents* tab_; |
+ CancelableRequestConsumer request_consumer_; |
+ ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_; |
+ |
+ // Set of pending extractions (i.e. extractions for which ExtractFeatures was |
+ // called but not StartExtractFeatures). |
+ std::set<ExtractionData> pending_extractions_; |
+ |
+ // Set of pending queries (i.e., where history->Query...() was called but |
+ // the history callback hasn't been invoked yet). |
+ PendingQueriesMap pending_queries_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); |
+}; |
+} // namespace safe_browsing |
+#endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |