Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2694)

Unified Diff: chrome/browser/safe_browsing/browser_feature_extractor.h

Issue 7119003: Create a browser feature extractor that runs after the renderer has (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Add new files Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | chrome/browser/safe_browsing/browser_feature_extractor.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/browser/safe_browsing/browser_feature_extractor.h
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.h b/chrome/browser/safe_browsing/browser_feature_extractor.h
new file mode 100644
index 0000000000000000000000000000000000000000..802afb5f3cac55af17c6302261300bbb117092dd
--- /dev/null
+++ b/chrome/browser/safe_browsing/browser_feature_extractor.h
@@ -0,0 +1,161 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// BrowserFeatureExtractor computes various browser features for client-side
+// phishing detection. For now it does a bunch of lookups in the history
+// service to see whether a particular URL has been visited before by the
+// user.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
+#define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
+#pragma once
+
+#include <map>
+#include <set>
+#include <utility>
+
+#include "base/basictypes.h"
+#include "base/callback_old.h"
+#include "base/task.h"
+#include "base/time.h"
+#include "chrome/browser/history/history_types.h"
+#include "content/browser/cancelable_request.h"
+
+class HistoryService;
+class TabContents;
+
+namespace safe_browsing {
+class ClientPhishingRequest;
+
+namespace features {
+
+// TODO(noelutz): move renderer/safe_browsing/features.h to common.
+////////////////////////////////////////////////////
+// History features.
+////////////////////////////////////////////////////
+
+// Number of visits to that URL stored in the browser history.
+// Should always be an integer larger than 1 because by the time
+// we lookup the history the current URL should already be stored there.
+extern const char kUrlHistoryVisitCount[];
+
+// Number of times the URL was typed in the Omnibox.
+extern const char kUrlHistoryTypedCount[];
+
+// Number of times the URL was reached by clicking a link.
+extern const char kUrlHistoryLinkCount[];
+
+// Number of times URL was visited more than 24h ago.
+extern const char kUrlHistoryVisitCountMoreThan24hAgo[];
+
+// Number of user-visible visits to all URLs on the same host/port as
+// the URL for HTTP and HTTPs.
+extern const char kHttpHostVisitCount[];
+extern const char kHttpsHostVisitCount[];
+
+// Boolean feature which is true if the host was visited for the first
+// time more than 24h ago (only considers user-visible visits like above).
+extern const char kFirstHttpHostVisitMoreThan24hAgo[];
+extern const char kFirstHttpsHostVisitMoreThan24hAgo[];
+} // namespace features
+
+// All methods of this class must be called on the UI thread (including
+// the constructor).
+class BrowserFeatureExtractor {
+ public:
+ // Called when feature extraction is done. The first argument will be
+ // true iff feature extraction succeeded. The second argument is the
+ // phishing request which was modified by the feature extractor. The
+ // DoneCallback takes ownership of the request object.
+ typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback;
+
+ // The caller keeps ownership of the tab object and is responsible for
+ // ensuring that it stays valid for the entire lifetime of this object.
+ explicit BrowserFeatureExtractor(TabContents* tab);
+
+ // The destructor will cancel any pending requests.
+ virtual ~BrowserFeatureExtractor();
+
+ // Begins extraction of the browser features. We take ownership
+ // of the request object until |callback| is called (see DoneCallback above)
+ // and will write the extracted features to the feature map. Once the
+ // feature extraction is complete, |callback| is run on the UI thread. We
+ // take ownership of the |callback| object. This method must run on the UI
+ // thread.
+ virtual void ExtractFeatures(ClientPhishingRequest* request,
+ DoneCallback* callback);
+
+ private:
+ friend class DeleteTask<BrowserFeatureExtractor>;
+ typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData;
+ typedef std::map<CancelableRequestProvider::Handle,
+ ExtractionData> PendingQueriesMap;
+
+ // Actually starts feature extraction (does the real work).
+ void StartExtractFeatures(ClientPhishingRequest* request,
+ DoneCallback* callback);
+
+ // HistoryService callback which is called when we're done querying URL visits
+ // in the history.
+ void QueryUrlHistoryDone(CancelableRequestProvider::Handle handle,
+ bool success,
+ const history::URLRow* row,
+ history::VisitVector* visits);
+
+ // HistoryService callback which is called when we're done querying HTTP host
+ // visits in the history.
+ void QueryHttpHostVisitsDone(CancelableRequestProvider::Handle handle,
+ bool success,
+ int num_visits,
+ base::Time first_visit);
+
+ // HistoryService callback which is called when we're done querying HTTPS host
+ // visits in the history.
+ void QueryHttpsHostVisitsDone(CancelableRequestProvider::Handle handle,
+ bool success,
+ int num_visits,
+ base::Time first_visit);
+
+ // Helper function which sets the host history features given the
+ // number of host visits and the time of the fist host visit. Set
+ // |is_http_query| to true if the URL scheme is HTTP and to false if
+ // the scheme is HTTPS.
+ void SetHostVisitsFeatures(int num_visits,
+ base::Time first_visit,
+ bool is_http_query,
+ ClientPhishingRequest* request);
+
+ // Helper function which stores the request and callback while the history
+ // query is being processed.
+ void StorePendingQuery(CancelableRequestProvider::Handle handle,
+ ClientPhishingRequest* request,
+ DoneCallback* callback);
+
+ // Helper function which is the counterpart of StorePendingQuery. If there
+ // is a pending query for the given handle it will return false and set both
+ // the request and cb pointers. Otherwise, it will return false.
+ bool GetPendingQuery(CancelableRequestProvider::Handle handle,
+ ClientPhishingRequest** request,
+ DoneCallback** callback);
+
+ // Helper function which gets the history server if possible. If the pointer
+ // is set it will return true and false otherwise.
+ bool GetHistoryService(HistoryService** history);
+
+ TabContents* tab_;
+ CancelableRequestConsumer request_consumer_;
+ ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_;
+
+ // Set of pending extractions (i.e. extractions for which ExtractFeatures was
+ // called but not StartExtractFeatures).
+ std::set<ExtractionData> pending_extractions_;
+
+ // Set of pending queries (i.e., where history->Query...() was called but
+ // the history callback hasn't been invoked yet).
+ PendingQueriesMap pending_queries_;
+
+ DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor);
+};
+} // namespace safe_browsing
+#endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
« no previous file with comments | « no previous file | chrome/browser/safe_browsing/browser_feature_extractor.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698