OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // BrowserFeatureExtractor computes various browser features for client-side | 5 // BrowserFeatureExtractor computes various browser features for client-side |
6 // phishing detection. For now it does a bunch of lookups in the history | 6 // phishing detection. For now it does a bunch of lookups in the history |
7 // service to see whether a particular URL has been visited before by the | 7 // service to see whether a particular URL has been visited before by the |
8 // user. | 8 // user. |
9 | 9 |
10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
12 #pragma once | 12 #pragma once |
13 | 13 |
14 #include <map> | 14 #include <map> |
15 #include <set> | 15 #include <set> |
| 16 #include <string> |
16 #include <utility> | 17 #include <utility> |
17 | 18 |
18 #include "base/basictypes.h" | 19 #include "base/basictypes.h" |
19 #include "base/callback_old.h" | 20 #include "base/callback_old.h" |
20 #include "base/memory/scoped_ptr.h" | 21 #include "base/memory/scoped_ptr.h" |
21 #include "base/task.h" | 22 #include "base/task.h" |
22 #include "base/time.h" | 23 #include "base/time.h" |
23 #include "chrome/browser/history/history_types.h" | 24 #include "chrome/browser/history/history_types.h" |
24 #include "chrome/browser/safe_browsing/safe_browsing_service.h" | 25 #include "chrome/browser/safe_browsing/safe_browsing_service.h" |
25 #include "content/browser/cancelable_request.h" | 26 #include "content/browser/cancelable_request.h" |
(...skipping 11 matching lines...) Expand all Loading... |
37 std::set<std::string> ips; | 38 std::set<std::string> ips; |
38 | 39 |
39 // If a SafeBrowsing interstitial was shown for the current URL | 40 // If a SafeBrowsing interstitial was shown for the current URL |
40 // this will contain the UnsafeResource struct for that URL. | 41 // this will contain the UnsafeResource struct for that URL. |
41 scoped_ptr<SafeBrowsingService::UnsafeResource> unsafe_resource; | 42 scoped_ptr<SafeBrowsingService::UnsafeResource> unsafe_resource; |
42 | 43 |
43 BrowseInfo(); | 44 BrowseInfo(); |
44 ~BrowseInfo(); | 45 ~BrowseInfo(); |
45 }; | 46 }; |
46 | 47 |
47 namespace features { | |
48 | |
49 // TODO(noelutz): move renderer/safe_browsing/features.h to common. | |
50 //////////////////////////////////////////////////// | |
51 // History features. | |
52 //////////////////////////////////////////////////// | |
53 | |
54 // Number of visits to that URL stored in the browser history. | |
55 // Should always be an integer larger than 1 because by the time | |
56 // we lookup the history the current URL should already be stored there. | |
57 extern const char kUrlHistoryVisitCount[]; | |
58 | |
59 // Number of times the URL was typed in the Omnibox. | |
60 extern const char kUrlHistoryTypedCount[]; | |
61 | |
62 // Number of times the URL was reached by clicking a link. | |
63 extern const char kUrlHistoryLinkCount[]; | |
64 | |
65 // Number of times URL was visited more than 24h ago. | |
66 extern const char kUrlHistoryVisitCountMoreThan24hAgo[]; | |
67 | |
68 // Number of user-visible visits to all URLs on the same host/port as | |
69 // the URL for HTTP and HTTPs. | |
70 extern const char kHttpHostVisitCount[]; | |
71 extern const char kHttpsHostVisitCount[]; | |
72 | |
73 // Boolean feature which is true if the host was visited for the first | |
74 // time more than 24h ago (only considers user-visible visits like above). | |
75 extern const char kFirstHttpHostVisitMoreThan24hAgo[]; | |
76 extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; | |
77 | |
78 //////////////////////////////////////////////////// | |
79 // Browse features. | |
80 //////////////////////////////////////////////////// | |
81 // Note that these features may have the following prefixes appended to them | |
82 // that tell for which page type the feature pertains. | |
83 extern const char kHostPrefix[]; | |
84 extern const char kRedirectPrefix[]; | |
85 | |
86 // Referrer | |
87 extern const char kReferrer[]; | |
88 // True if the referrer was stripped because it is an SSL referrer. | |
89 extern const char kHasSSLReferrer[]; | |
90 // Stores the page transition. See: PageTransition. We strip the qualifier. | |
91 extern const char kPageTransitionType[]; | |
92 // True if this navigation is the first for this tab. | |
93 extern const char kIsFirstNavigation[]; | |
94 | |
95 // Resource was fetched from a known bad IP address. | |
96 extern const char kBadIpFetch[]; | |
97 | |
98 // SafeBrowsing related featues. Fields from the UnsafeResource if there is | |
99 // any. | |
100 extern const char kSafeBrowsingMaliciousUrl[]; | |
101 extern const char kSafeBrowsingOriginalUrl[]; | |
102 extern const char kSafeBrowsingIsSubresource[]; | |
103 extern const char kSafeBrowsingThreatType[]; | |
104 } // namespace features | |
105 | |
106 // All methods of this class must be called on the UI thread (including | 48 // All methods of this class must be called on the UI thread (including |
107 // the constructor). | 49 // the constructor). |
108 class BrowserFeatureExtractor { | 50 class BrowserFeatureExtractor { |
109 public: | 51 public: |
110 // Called when feature extraction is done. The first argument will be | 52 // Called when feature extraction is done. The first argument will be |
111 // true iff feature extraction succeeded. The second argument is the | 53 // true iff feature extraction succeeded. The second argument is the |
112 // phishing request which was modified by the feature extractor. The | 54 // phishing request which was modified by the feature extractor. The |
113 // DoneCallback takes ownership of the request object. | 55 // DoneCallback takes ownership of the request object. |
114 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; | 56 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; |
115 | 57 |
116 // The caller keeps ownership of the tab and service objects and is | 58 // The caller keeps ownership of the tab and service objects and is |
117 // responsible for ensuring that they stay valid for the entire | 59 // responsible for ensuring that they stay valid for the entire |
118 // lifetime of this object. | 60 // lifetime of this object. |
119 BrowserFeatureExtractor(TabContents* tab, | 61 BrowserFeatureExtractor(TabContents* tab, |
120 ClientSideDetectionService* service); | 62 ClientSideDetectionService* service); |
121 | 63 |
122 // The destructor will cancel any pending requests. | 64 // The destructor will cancel any pending requests. |
123 virtual ~BrowserFeatureExtractor(); | 65 virtual ~BrowserFeatureExtractor(); |
124 | 66 |
125 // Begins extraction of the browser features. We take ownership | 67 // Begins extraction of the browser features. We take ownership |
126 // of the request object until |callback| is called (see DoneCallback above) | 68 // of the request object until |callback| is called (see DoneCallback above) |
127 // and will write the extracted features to the feature map. Once the | 69 // and will write the extracted features to the feature map. Once the |
128 // feature extraction is complete, |callback| is run on the UI thread. We | 70 // feature extraction is complete, |callback| is run on the UI thread. We |
129 // take ownership of the |callback| object. |info| may not be valid after | 71 // take ownership of the |callback| object. |info| may not be valid after |
130 // ExtractFeatures returns. This method must run on the UI thread. | 72 // ExtractFeatures returns. This method must run on the UI thread. |
131 virtual void ExtractFeatures(const BrowseInfo* info, | 73 virtual void ExtractFeatures(const BrowseInfo* info, |
132 ClientPhishingRequest* request, | 74 ClientPhishingRequest* request, |
133 DoneCallback* callback); | 75 DoneCallback* callback); |
134 | 76 |
| 77 // The size of hash prefix to use for |
| 78 // ClientPhishingRequest.suffix_prefix_hash. Public for testing. |
| 79 static const int kSuffixPrefixHashLength; |
| 80 |
135 private: | 81 private: |
136 friend class DeleteTask<BrowserFeatureExtractor>; | 82 friend class DeleteTask<BrowserFeatureExtractor>; |
137 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; | 83 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; |
138 typedef std::map<CancelableRequestProvider::Handle, | 84 typedef std::map<CancelableRequestProvider::Handle, |
139 ExtractionData> PendingQueriesMap; | 85 ExtractionData> PendingQueriesMap; |
140 | 86 |
141 // Synchronous browser feature extraction. | 87 // Synchronous browser feature extraction. |
142 void ExtractBrowseInfoFeatures(const BrowseInfo& info, | 88 void ExtractBrowseInfoFeatures(const BrowseInfo& info, |
143 ClientPhishingRequest* request); | 89 ClientPhishingRequest* request); |
144 | 90 |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
186 // is a pending query for the given handle it will return false and set both | 132 // is a pending query for the given handle it will return false and set both |
187 // the request and cb pointers. Otherwise, it will return false. | 133 // the request and cb pointers. Otherwise, it will return false. |
188 bool GetPendingQuery(CancelableRequestProvider::Handle handle, | 134 bool GetPendingQuery(CancelableRequestProvider::Handle handle, |
189 ClientPhishingRequest** request, | 135 ClientPhishingRequest** request, |
190 DoneCallback** callback); | 136 DoneCallback** callback); |
191 | 137 |
192 // Helper function which gets the history server if possible. If the pointer | 138 // Helper function which gets the history server if possible. If the pointer |
193 // is set it will return true and false otherwise. | 139 // is set it will return true and false otherwise. |
194 bool GetHistoryService(HistoryService** history); | 140 bool GetHistoryService(HistoryService** history); |
195 | 141 |
| 142 // Computes the SHA-256 hash prefix for the URL and adds it to the |
| 143 // ClientPhishingRequest. |
| 144 void ComputeURLHash(ClientPhishingRequest* request); |
| 145 |
196 TabContents* tab_; | 146 TabContents* tab_; |
197 ClientSideDetectionService* service_; | 147 ClientSideDetectionService* service_; |
198 CancelableRequestConsumer request_consumer_; | 148 CancelableRequestConsumer request_consumer_; |
199 ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_; | 149 ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_; |
200 | 150 |
201 // Set of pending extractions (i.e. extractions for which ExtractFeatures was | 151 // Set of pending extractions (i.e. extractions for which ExtractFeatures was |
202 // called but not StartExtractFeatures). | 152 // called but not StartExtractFeatures). |
203 std::set<ExtractionData> pending_extractions_; | 153 std::set<ExtractionData> pending_extractions_; |
204 | 154 |
205 // Set of pending queries (i.e., where history->Query...() was called but | 155 // Set of pending queries (i.e., where history->Query...() was called but |
206 // the history callback hasn't been invoked yet). | 156 // the history callback hasn't been invoked yet). |
207 PendingQueriesMap pending_queries_; | 157 PendingQueriesMap pending_queries_; |
208 | 158 |
209 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); | 159 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); |
210 }; | 160 }; |
211 } // namespace safe_browsing | 161 } // namespace safe_browsing |
212 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 162 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
OLD | NEW |