| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // BrowserFeatureExtractor computes various browser features for client-side | 5 // BrowserFeatureExtractor computes various browser features for client-side |
| 6 // phishing detection. For now it does a bunch of lookups in the history | 6 // phishing detection. For now it does a bunch of lookups in the history |
| 7 // service to see whether a particular URL has been visited before by the | 7 // service to see whether a particular URL has been visited before by the |
| 8 // user. | 8 // user. |
| 9 | 9 |
| 10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
| 11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
| 12 #pragma once | 12 #pragma once |
| 13 | 13 |
| 14 #include <map> | 14 #include <map> |
| 15 #include <set> | 15 #include <set> |
| 16 #include <string> |
| 16 #include <utility> | 17 #include <utility> |
| 17 | 18 |
| 18 #include "base/basictypes.h" | 19 #include "base/basictypes.h" |
| 19 #include "base/callback_old.h" | 20 #include "base/callback_old.h" |
| 20 #include "base/memory/scoped_ptr.h" | 21 #include "base/memory/scoped_ptr.h" |
| 21 #include "base/task.h" | 22 #include "base/task.h" |
| 22 #include "base/time.h" | 23 #include "base/time.h" |
| 23 #include "chrome/browser/history/history_types.h" | 24 #include "chrome/browser/history/history_types.h" |
| 24 #include "chrome/browser/safe_browsing/safe_browsing_service.h" | 25 #include "chrome/browser/safe_browsing/safe_browsing_service.h" |
| 25 #include "content/browser/cancelable_request.h" | 26 #include "content/browser/cancelable_request.h" |
| (...skipping 11 matching lines...) Expand all Loading... |
| 37 std::set<std::string> ips; | 38 std::set<std::string> ips; |
| 38 | 39 |
| 39 // If a SafeBrowsing interstitial was shown for the current URL | 40 // If a SafeBrowsing interstitial was shown for the current URL |
| 40 // this will contain the UnsafeResource struct for that URL. | 41 // this will contain the UnsafeResource struct for that URL. |
| 41 scoped_ptr<SafeBrowsingService::UnsafeResource> unsafe_resource; | 42 scoped_ptr<SafeBrowsingService::UnsafeResource> unsafe_resource; |
| 42 | 43 |
| 43 BrowseInfo(); | 44 BrowseInfo(); |
| 44 ~BrowseInfo(); | 45 ~BrowseInfo(); |
| 45 }; | 46 }; |
| 46 | 47 |
| 47 namespace features { | |
| 48 | |
| 49 // TODO(noelutz): move renderer/safe_browsing/features.h to common. | |
| 50 //////////////////////////////////////////////////// | |
| 51 // History features. | |
| 52 //////////////////////////////////////////////////// | |
| 53 | |
| 54 // Number of visits to that URL stored in the browser history. | |
| 55 // Should always be an integer larger than 1 because by the time | |
| 56 // we lookup the history the current URL should already be stored there. | |
| 57 extern const char kUrlHistoryVisitCount[]; | |
| 58 | |
| 59 // Number of times the URL was typed in the Omnibox. | |
| 60 extern const char kUrlHistoryTypedCount[]; | |
| 61 | |
| 62 // Number of times the URL was reached by clicking a link. | |
| 63 extern const char kUrlHistoryLinkCount[]; | |
| 64 | |
| 65 // Number of times URL was visited more than 24h ago. | |
| 66 extern const char kUrlHistoryVisitCountMoreThan24hAgo[]; | |
| 67 | |
| 68 // Number of user-visible visits to all URLs on the same host/port as | |
| 69 // the URL for HTTP and HTTPs. | |
| 70 extern const char kHttpHostVisitCount[]; | |
| 71 extern const char kHttpsHostVisitCount[]; | |
| 72 | |
| 73 // Boolean feature which is true if the host was visited for the first | |
| 74 // time more than 24h ago (only considers user-visible visits like above). | |
| 75 extern const char kFirstHttpHostVisitMoreThan24hAgo[]; | |
| 76 extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; | |
| 77 | |
| 78 //////////////////////////////////////////////////// | |
| 79 // Browse features. | |
| 80 //////////////////////////////////////////////////// | |
| 81 // Note that these features may have the following prefixes appended to them | |
| 82 // that tell for which page type the feature pertains. | |
| 83 extern const char kHostPrefix[]; | |
| 84 extern const char kRedirectPrefix[]; | |
| 85 | |
| 86 // Referrer | |
| 87 extern const char kReferrer[]; | |
| 88 // True if the referrer was stripped because it is an SSL referrer. | |
| 89 extern const char kHasSSLReferrer[]; | |
| 90 // Stores the page transition. See: PageTransition. We strip the qualifier. | |
| 91 extern const char kPageTransitionType[]; | |
| 92 // True if this navigation is the first for this tab. | |
| 93 extern const char kIsFirstNavigation[]; | |
| 94 | |
| 95 // Resource was fetched from a known bad IP address. | |
| 96 extern const char kBadIpFetch[]; | |
| 97 | |
| 98 // SafeBrowsing related featues. Fields from the UnsafeResource if there is | |
| 99 // any. | |
| 100 extern const char kSafeBrowsingMaliciousUrl[]; | |
| 101 extern const char kSafeBrowsingOriginalUrl[]; | |
| 102 extern const char kSafeBrowsingIsSubresource[]; | |
| 103 extern const char kSafeBrowsingThreatType[]; | |
| 104 } // namespace features | |
| 105 | |
| 106 // All methods of this class must be called on the UI thread (including | 48 // All methods of this class must be called on the UI thread (including |
| 107 // the constructor). | 49 // the constructor). |
| 108 class BrowserFeatureExtractor { | 50 class BrowserFeatureExtractor { |
| 109 public: | 51 public: |
| 110 // Called when feature extraction is done. The first argument will be | 52 // Called when feature extraction is done. The first argument will be |
| 111 // true iff feature extraction succeeded. The second argument is the | 53 // true iff feature extraction succeeded. The second argument is the |
| 112 // phishing request which was modified by the feature extractor. The | 54 // phishing request which was modified by the feature extractor. The |
| 113 // DoneCallback takes ownership of the request object. | 55 // DoneCallback takes ownership of the request object. |
| 114 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; | 56 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; |
| 115 | 57 |
| 116 // The caller keeps ownership of the tab and service objects and is | 58 // The caller keeps ownership of the tab and service objects and is |
| 117 // responsible for ensuring that they stay valid for the entire | 59 // responsible for ensuring that they stay valid for the entire |
| 118 // lifetime of this object. | 60 // lifetime of this object. |
| 119 BrowserFeatureExtractor(TabContents* tab, | 61 BrowserFeatureExtractor(TabContents* tab, |
| 120 ClientSideDetectionService* service); | 62 ClientSideDetectionService* service); |
| 121 | 63 |
| 122 // The destructor will cancel any pending requests. | 64 // The destructor will cancel any pending requests. |
| 123 virtual ~BrowserFeatureExtractor(); | 65 virtual ~BrowserFeatureExtractor(); |
| 124 | 66 |
| 125 // Begins extraction of the browser features. We take ownership | 67 // Begins extraction of the browser features. We take ownership |
| 126 // of the request object until |callback| is called (see DoneCallback above) | 68 // of the request object until |callback| is called (see DoneCallback above) |
| 127 // and will write the extracted features to the feature map. Once the | 69 // and will write the extracted features to the feature map. Once the |
| 128 // feature extraction is complete, |callback| is run on the UI thread. We | 70 // feature extraction is complete, |callback| is run on the UI thread. We |
| 129 // take ownership of the |callback| object. |info| may not be valid after | 71 // take ownership of the |callback| object. |info| may not be valid after |
| 130 // ExtractFeatures returns. This method must run on the UI thread. | 72 // ExtractFeatures returns. This method must run on the UI thread. |
| 131 virtual void ExtractFeatures(const BrowseInfo* info, | 73 virtual void ExtractFeatures(const BrowseInfo* info, |
| 132 ClientPhishingRequest* request, | 74 ClientPhishingRequest* request, |
| 133 DoneCallback* callback); | 75 DoneCallback* callback); |
| 134 | 76 |
| 77 // The size of hash prefix to use for |
| 78 // ClientPhishingRequest.suffix_prefix_hash. Public for testing. |
| 79 static const int kSuffixPrefixHashLength; |
| 80 |
| 135 private: | 81 private: |
| 136 friend class DeleteTask<BrowserFeatureExtractor>; | 82 friend class DeleteTask<BrowserFeatureExtractor>; |
| 137 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; | 83 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; |
| 138 typedef std::map<CancelableRequestProvider::Handle, | 84 typedef std::map<CancelableRequestProvider::Handle, |
| 139 ExtractionData> PendingQueriesMap; | 85 ExtractionData> PendingQueriesMap; |
| 140 | 86 |
| 141 // Synchronous browser feature extraction. | 87 // Synchronous browser feature extraction. |
| 142 void ExtractBrowseInfoFeatures(const BrowseInfo& info, | 88 void ExtractBrowseInfoFeatures(const BrowseInfo& info, |
| 143 ClientPhishingRequest* request); | 89 ClientPhishingRequest* request); |
| 144 | 90 |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 186 // is a pending query for the given handle it will return false and set both | 132 // is a pending query for the given handle it will return false and set both |
| 187 // the request and cb pointers. Otherwise, it will return false. | 133 // the request and cb pointers. Otherwise, it will return false. |
| 188 bool GetPendingQuery(CancelableRequestProvider::Handle handle, | 134 bool GetPendingQuery(CancelableRequestProvider::Handle handle, |
| 189 ClientPhishingRequest** request, | 135 ClientPhishingRequest** request, |
| 190 DoneCallback** callback); | 136 DoneCallback** callback); |
| 191 | 137 |
| 192 // Helper function which gets the history server if possible. If the pointer | 138 // Helper function which gets the history server if possible. If the pointer |
| 193 // is set it will return true and false otherwise. | 139 // is set it will return true and false otherwise. |
| 194 bool GetHistoryService(HistoryService** history); | 140 bool GetHistoryService(HistoryService** history); |
| 195 | 141 |
| 142 // Computes the SHA-256 hash prefix for the URL and adds it to the |
| 143 // ClientPhishingRequest. |
| 144 void ComputeURLHash(ClientPhishingRequest* request); |
| 145 |
| 196 TabContents* tab_; | 146 TabContents* tab_; |
| 197 ClientSideDetectionService* service_; | 147 ClientSideDetectionService* service_; |
| 198 CancelableRequestConsumer request_consumer_; | 148 CancelableRequestConsumer request_consumer_; |
| 199 ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_; | 149 ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_; |
| 200 | 150 |
| 201 // Set of pending extractions (i.e. extractions for which ExtractFeatures was | 151 // Set of pending extractions (i.e. extractions for which ExtractFeatures was |
| 202 // called but not StartExtractFeatures). | 152 // called but not StartExtractFeatures). |
| 203 std::set<ExtractionData> pending_extractions_; | 153 std::set<ExtractionData> pending_extractions_; |
| 204 | 154 |
| 205 // Set of pending queries (i.e., where history->Query...() was called but | 155 // Set of pending queries (i.e., where history->Query...() was called but |
| 206 // the history callback hasn't been invoked yet). | 156 // the history callback hasn't been invoked yet). |
| 207 PendingQueriesMap pending_queries_; | 157 PendingQueriesMap pending_queries_; |
| 208 | 158 |
| 209 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); | 159 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); |
| 210 }; | 160 }; |
| 211 } // namespace safe_browsing | 161 } // namespace safe_browsing |
| 212 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 162 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
| OLD | NEW |