OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 // |
| 5 // BrowserFeatureExtractor computes various browser features for client-side |
| 6 // phishing detection. For now it does a bunch of lookups in the history |
| 7 // service to see whether a particular URL has been visited before by the |
| 8 // user. |
| 9 |
| 10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
| 11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
| 12 #pragma once |
| 13 |
| 14 #include <map> |
| 15 #include <set> |
| 16 #include <utility> |
| 17 |
| 18 #include "base/basictypes.h" |
| 19 #include "base/callback_old.h" |
| 20 #include "base/task.h" |
| 21 #include "base/time.h" |
| 22 #include "chrome/browser/history/history_types.h" |
| 23 #include "content/browser/cancelable_request.h" |
| 24 |
| 25 class HistoryService; |
| 26 class TabContents; |
| 27 |
| 28 namespace safe_browsing { |
| 29 class ClientPhishingRequest; |
| 30 |
| 31 namespace features { |
| 32 |
| 33 // TODO(noelutz): move renderer/safe_browsing/features.h to common. |
| 34 //////////////////////////////////////////////////// |
| 35 // History features. |
| 36 //////////////////////////////////////////////////// |
| 37 |
| 38 // Number of visits to that URL stored in the browser history. |
| 39 // Should always be an integer larger than 1 because by the time |
| 40 // we lookup the history the current URL should already be stored there. |
| 41 extern const char kUrlHistoryVisitCount[]; |
| 42 |
| 43 // Number of times the URL was typed in the Omnibox. |
| 44 extern const char kUrlHistoryTypedCount[]; |
| 45 |
| 46 // Number of times the URL was reached by clicking a link. |
| 47 extern const char kUrlHistoryLinkCount[]; |
| 48 |
| 49 // Number of times URL was visited more than 24h ago. |
| 50 extern const char kUrlHistoryVisitCountMoreThan24hAgo[]; |
| 51 |
| 52 // Number of user-visible visits to all URLs on the same host/port as |
| 53 // the URL for HTTP and HTTPs. |
| 54 extern const char kHttpHostVisitCount[]; |
| 55 extern const char kHttpsHostVisitCount[]; |
| 56 |
| 57 // Boolean feature which is true if the host was visited for the first |
| 58 // time more than 24h ago (only considers user-visible visits like above). |
| 59 extern const char kFirstHttpHostVisitMoreThan24hAgo[]; |
| 60 extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; |
| 61 } // namespace features |
| 62 |
| 63 // All methods of this class must be called on the UI thread (including |
| 64 // the constructor). |
| 65 class BrowserFeatureExtractor { |
| 66 public: |
| 67 // Called when feature extraction is done. The first argument will be |
| 68 // true iff feature extraction succeeded. The second argument is the |
| 69 // phishing request which was modified by the feature extractor. The |
| 70 // DoneCallback takes ownership of the request object. |
| 71 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; |
| 72 |
| 73 // The caller keeps ownership of the tab object and is responsible for |
| 74 // ensuring that it stays valid for the entire lifetime of this object. |
| 75 explicit BrowserFeatureExtractor(TabContents* tab); |
| 76 |
| 77 // The destructor will cancel any pending requests. |
| 78 virtual ~BrowserFeatureExtractor(); |
| 79 |
| 80 // Begins extraction of the browser features. We take ownership |
| 81 // of the request object until |callback| is called (see DoneCallback above) |
| 82 // and will write the extracted features to the feature map. Once the |
| 83 // feature extraction is complete, |callback| is run on the UI thread. We |
| 84 // take ownership of the |callback| object. This method must run on the UI |
| 85 // thread. |
| 86 virtual void ExtractFeatures(ClientPhishingRequest* request, |
| 87 DoneCallback* callback); |
| 88 |
| 89 private: |
| 90 friend class DeleteTask<BrowserFeatureExtractor>; |
| 91 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; |
| 92 typedef std::map<CancelableRequestProvider::Handle, |
| 93 ExtractionData> PendingQueriesMap; |
| 94 |
| 95 // Actually starts feature extraction (does the real work). |
| 96 void StartExtractFeatures(ClientPhishingRequest* request, |
| 97 DoneCallback* callback); |
| 98 |
| 99 // HistoryService callback which is called when we're done querying URL visits |
| 100 // in the history. |
| 101 void QueryUrlHistoryDone(CancelableRequestProvider::Handle handle, |
| 102 bool success, |
| 103 const history::URLRow* row, |
| 104 history::VisitVector* visits); |
| 105 |
| 106 // HistoryService callback which is called when we're done querying HTTP host |
| 107 // visits in the history. |
| 108 void QueryHttpHostVisitsDone(CancelableRequestProvider::Handle handle, |
| 109 bool success, |
| 110 int num_visits, |
| 111 base::Time first_visit); |
| 112 |
| 113 // HistoryService callback which is called when we're done querying HTTPS host |
| 114 // visits in the history. |
| 115 void QueryHttpsHostVisitsDone(CancelableRequestProvider::Handle handle, |
| 116 bool success, |
| 117 int num_visits, |
| 118 base::Time first_visit); |
| 119 |
| 120 // Helper function which sets the host history features given the |
| 121 // number of host visits and the time of the fist host visit. Set |
| 122 // |is_http_query| to true if the URL scheme is HTTP and to false if |
| 123 // the scheme is HTTPS. |
| 124 void SetHostVisitsFeatures(int num_visits, |
| 125 base::Time first_visit, |
| 126 bool is_http_query, |
| 127 ClientPhishingRequest* request); |
| 128 |
| 129 // Helper function which stores the request and callback while the history |
| 130 // query is being processed. |
| 131 void StorePendingQuery(CancelableRequestProvider::Handle handle, |
| 132 ClientPhishingRequest* request, |
| 133 DoneCallback* callback); |
| 134 |
| 135 // Helper function which is the counterpart of StorePendingQuery. If there |
| 136 // is a pending query for the given handle it will return false and set both |
| 137 // the request and cb pointers. Otherwise, it will return false. |
| 138 bool GetPendingQuery(CancelableRequestProvider::Handle handle, |
| 139 ClientPhishingRequest** request, |
| 140 DoneCallback** callback); |
| 141 |
| 142 // Helper function which gets the history server if possible. If the pointer |
| 143 // is set it will return true and false otherwise. |
| 144 bool GetHistoryService(HistoryService** history); |
| 145 |
| 146 TabContents* tab_; |
| 147 CancelableRequestConsumer request_consumer_; |
| 148 ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_; |
| 149 |
| 150 // Set of pending extractions (i.e. extractions for which ExtractFeatures was |
| 151 // called but not StartExtractFeatures). |
| 152 std::set<ExtractionData> pending_extractions_; |
| 153 |
| 154 // Set of pending queries (i.e., where history->Query...() was called but |
| 155 // the history callback hasn't been invoked yet). |
| 156 PendingQueriesMap pending_queries_; |
| 157 |
| 158 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); |
| 159 }; |
| 160 } // namespace safe_browsing |
| 161 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
OLD | NEW |