OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // BrowserFeatureExtractor computes various browser features for client-side | 5 // BrowserFeatureExtractor computes various browser features for client-side |
6 // phishing detection. For now it does a bunch of lookups in the history | 6 // phishing detection. For now it does a bunch of lookups in the history |
7 // service to see whether a particular URL has been visited before by the | 7 // service to see whether a particular URL has been visited before by the |
8 // user. | 8 // user. |
9 | 9 |
10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
12 #pragma once | 12 #pragma once |
13 | 13 |
14 #include <map> | 14 #include <map> |
15 #include <set> | 15 #include <set> |
16 #include <string> | |
17 #include <utility> | 16 #include <utility> |
18 | 17 |
19 #include "base/basictypes.h" | 18 #include "base/basictypes.h" |
20 #include "base/callback_old.h" | 19 #include "base/callback_old.h" |
21 #include "base/memory/scoped_ptr.h" | 20 #include "base/memory/scoped_ptr.h" |
22 #include "base/task.h" | 21 #include "base/task.h" |
23 #include "base/time.h" | 22 #include "base/time.h" |
24 #include "chrome/browser/history/history_types.h" | 23 #include "chrome/browser/history/history_types.h" |
25 #include "chrome/browser/safe_browsing/safe_browsing_service.h" | 24 #include "chrome/browser/safe_browsing/safe_browsing_service.h" |
26 #include "content/browser/cancelable_request.h" | 25 #include "content/browser/cancelable_request.h" |
(...skipping 11 matching lines...) Expand all Loading... |
38 std::set<std::string> ips; | 37 std::set<std::string> ips; |
39 | 38 |
40 // If a SafeBrowsing interstitial was shown for the current URL | 39 // If a SafeBrowsing interstitial was shown for the current URL |
41 // this will contain the UnsafeResource struct for that URL. | 40 // this will contain the UnsafeResource struct for that URL. |
42 scoped_ptr<SafeBrowsingService::UnsafeResource> unsafe_resource; | 41 scoped_ptr<SafeBrowsingService::UnsafeResource> unsafe_resource; |
43 | 42 |
44 BrowseInfo(); | 43 BrowseInfo(); |
45 ~BrowseInfo(); | 44 ~BrowseInfo(); |
46 }; | 45 }; |
47 | 46 |
| 47 namespace features { |
| 48 |
| 49 // TODO(noelutz): move renderer/safe_browsing/features.h to common. |
| 50 //////////////////////////////////////////////////// |
| 51 // History features. |
| 52 //////////////////////////////////////////////////// |
| 53 |
| 54 // Number of visits to that URL stored in the browser history. |
| 55 // Should always be an integer larger than 1 because by the time |
| 56 // we lookup the history the current URL should already be stored there. |
| 57 extern const char kUrlHistoryVisitCount[]; |
| 58 |
| 59 // Number of times the URL was typed in the Omnibox. |
| 60 extern const char kUrlHistoryTypedCount[]; |
| 61 |
| 62 // Number of times the URL was reached by clicking a link. |
| 63 extern const char kUrlHistoryLinkCount[]; |
| 64 |
| 65 // Number of times URL was visited more than 24h ago. |
| 66 extern const char kUrlHistoryVisitCountMoreThan24hAgo[]; |
| 67 |
| 68 // Number of user-visible visits to all URLs on the same host/port as |
| 69 // the URL for HTTP and HTTPs. |
| 70 extern const char kHttpHostVisitCount[]; |
| 71 extern const char kHttpsHostVisitCount[]; |
| 72 |
| 73 // Boolean feature which is true if the host was visited for the first |
| 74 // time more than 24h ago (only considers user-visible visits like above). |
| 75 extern const char kFirstHttpHostVisitMoreThan24hAgo[]; |
| 76 extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; |
| 77 |
| 78 //////////////////////////////////////////////////// |
| 79 // Browse features. |
| 80 //////////////////////////////////////////////////// |
| 81 // Note that these features may have the following prefixes appended to them |
| 82 // that tell for which page type the feature pertains. |
| 83 extern const char kHostPrefix[]; |
| 84 extern const char kRedirectPrefix[]; |
| 85 |
| 86 // Referrer |
| 87 extern const char kReferrer[]; |
| 88 // True if the referrer was stripped because it is an SSL referrer. |
| 89 extern const char kHasSSLReferrer[]; |
| 90 // Stores the page transition. See: PageTransition. We strip the qualifier. |
| 91 extern const char kPageTransitionType[]; |
| 92 // True if this navigation is the first for this tab. |
| 93 extern const char kIsFirstNavigation[]; |
| 94 |
| 95 // Resource was fetched from a known bad IP address. |
| 96 extern const char kBadIpFetch[]; |
| 97 |
| 98 // SafeBrowsing related featues. Fields from the UnsafeResource if there is |
| 99 // any. |
| 100 extern const char kSafeBrowsingMaliciousUrl[]; |
| 101 extern const char kSafeBrowsingOriginalUrl[]; |
| 102 extern const char kSafeBrowsingIsSubresource[]; |
| 103 extern const char kSafeBrowsingThreatType[]; |
| 104 } // namespace features |
| 105 |
48 // All methods of this class must be called on the UI thread (including | 106 // All methods of this class must be called on the UI thread (including |
49 // the constructor). | 107 // the constructor). |
50 class BrowserFeatureExtractor { | 108 class BrowserFeatureExtractor { |
51 public: | 109 public: |
52 // Called when feature extraction is done. The first argument will be | 110 // Called when feature extraction is done. The first argument will be |
53 // true iff feature extraction succeeded. The second argument is the | 111 // true iff feature extraction succeeded. The second argument is the |
54 // phishing request which was modified by the feature extractor. The | 112 // phishing request which was modified by the feature extractor. The |
55 // DoneCallback takes ownership of the request object. | 113 // DoneCallback takes ownership of the request object. |
56 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; | 114 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; |
57 | 115 |
58 // The caller keeps ownership of the tab and service objects and is | 116 // The caller keeps ownership of the tab and service objects and is |
59 // responsible for ensuring that they stay valid for the entire | 117 // responsible for ensuring that they stay valid for the entire |
60 // lifetime of this object. | 118 // lifetime of this object. |
61 BrowserFeatureExtractor(TabContents* tab, | 119 BrowserFeatureExtractor(TabContents* tab, |
62 ClientSideDetectionService* service); | 120 ClientSideDetectionService* service); |
63 | 121 |
64 // The destructor will cancel any pending requests. | 122 // The destructor will cancel any pending requests. |
65 virtual ~BrowserFeatureExtractor(); | 123 virtual ~BrowserFeatureExtractor(); |
66 | 124 |
67 // Begins extraction of the browser features. We take ownership | 125 // Begins extraction of the browser features. We take ownership |
68 // of the request object until |callback| is called (see DoneCallback above) | 126 // of the request object until |callback| is called (see DoneCallback above) |
69 // and will write the extracted features to the feature map. Once the | 127 // and will write the extracted features to the feature map. Once the |
70 // feature extraction is complete, |callback| is run on the UI thread. We | 128 // feature extraction is complete, |callback| is run on the UI thread. We |
71 // take ownership of the |callback| object. |info| may not be valid after | 129 // take ownership of the |callback| object. |info| may not be valid after |
72 // ExtractFeatures returns. This method must run on the UI thread. | 130 // ExtractFeatures returns. This method must run on the UI thread. |
73 virtual void ExtractFeatures(const BrowseInfo* info, | 131 virtual void ExtractFeatures(const BrowseInfo* info, |
74 ClientPhishingRequest* request, | 132 ClientPhishingRequest* request, |
75 DoneCallback* callback); | 133 DoneCallback* callback); |
76 | 134 |
77 // The size of hash prefix to use for | |
78 // ClientPhishingRequest.suffix_prefix_hash. Public for testing. | |
79 static const int kSuffixPrefixHashLength; | |
80 | |
81 private: | 135 private: |
82 friend class DeleteTask<BrowserFeatureExtractor>; | 136 friend class DeleteTask<BrowserFeatureExtractor>; |
83 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; | 137 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; |
84 typedef std::map<CancelableRequestProvider::Handle, | 138 typedef std::map<CancelableRequestProvider::Handle, |
85 ExtractionData> PendingQueriesMap; | 139 ExtractionData> PendingQueriesMap; |
86 | 140 |
87 // Synchronous browser feature extraction. | 141 // Synchronous browser feature extraction. |
88 void ExtractBrowseInfoFeatures(const BrowseInfo& info, | 142 void ExtractBrowseInfoFeatures(const BrowseInfo& info, |
89 ClientPhishingRequest* request); | 143 ClientPhishingRequest* request); |
90 | 144 |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
132 // is a pending query for the given handle it will return false and set both | 186 // is a pending query for the given handle it will return false and set both |
133 // the request and cb pointers. Otherwise, it will return false. | 187 // the request and cb pointers. Otherwise, it will return false. |
134 bool GetPendingQuery(CancelableRequestProvider::Handle handle, | 188 bool GetPendingQuery(CancelableRequestProvider::Handle handle, |
135 ClientPhishingRequest** request, | 189 ClientPhishingRequest** request, |
136 DoneCallback** callback); | 190 DoneCallback** callback); |
137 | 191 |
138 // Helper function which gets the history server if possible. If the pointer | 192 // Helper function which gets the history server if possible. If the pointer |
139 // is set it will return true and false otherwise. | 193 // is set it will return true and false otherwise. |
140 bool GetHistoryService(HistoryService** history); | 194 bool GetHistoryService(HistoryService** history); |
141 | 195 |
142 // Computes the SHA-256 hash prefix for the URL and adds it to the | |
143 // ClientPhishingRequest. | |
144 void ComputeURLHash(ClientPhishingRequest* request); | |
145 | |
146 TabContents* tab_; | 196 TabContents* tab_; |
147 ClientSideDetectionService* service_; | 197 ClientSideDetectionService* service_; |
148 CancelableRequestConsumer request_consumer_; | 198 CancelableRequestConsumer request_consumer_; |
149 ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_; | 199 ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_; |
150 | 200 |
151 // Set of pending extractions (i.e. extractions for which ExtractFeatures was | 201 // Set of pending extractions (i.e. extractions for which ExtractFeatures was |
152 // called but not StartExtractFeatures). | 202 // called but not StartExtractFeatures). |
153 std::set<ExtractionData> pending_extractions_; | 203 std::set<ExtractionData> pending_extractions_; |
154 | 204 |
155 // Set of pending queries (i.e., where history->Query...() was called but | 205 // Set of pending queries (i.e., where history->Query...() was called but |
156 // the history callback hasn't been invoked yet). | 206 // the history callback hasn't been invoked yet). |
157 PendingQueriesMap pending_queries_; | 207 PendingQueriesMap pending_queries_; |
158 | 208 |
159 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); | 209 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); |
160 }; | 210 }; |
161 } // namespace safe_browsing | 211 } // namespace safe_browsing |
162 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 212 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
OLD | NEW |