| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // BrowserFeatureExtractor computes various browser features for client-side | 5 // BrowserFeatureExtractor computes various browser features for client-side |
| 6 // phishing detection. For now it does a bunch of lookups in the history | 6 // phishing detection. For now it does a bunch of lookups in the history |
| 7 // service to see whether a particular URL has been visited before by the | 7 // service to see whether a particular URL has been visited before by the |
| 8 // user. | 8 // user. |
| 9 | 9 |
| 10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
| 11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
| 12 #pragma once | 12 #pragma once |
| 13 | 13 |
| 14 #include <map> | 14 #include <map> |
| 15 #include <set> | 15 #include <set> |
| 16 #include <utility> | 16 #include <utility> |
| 17 | 17 |
| 18 #include "base/basictypes.h" | 18 #include "base/basictypes.h" |
| 19 #include "base/callback_old.h" | 19 #include "base/callback_old.h" |
| 20 #include "base/task.h" | 20 #include "base/task.h" |
| 21 #include "base/time.h" | 21 #include "base/time.h" |
| 22 #include "chrome/browser/history/history_types.h" | 22 #include "chrome/browser/history/history_types.h" |
| 23 #include "content/browser/cancelable_request.h" | 23 #include "content/browser/cancelable_request.h" |
| 24 #include "content/common/page_transition_types.h" |
| 25 #include "googleurl/src/gurl.h" |
| 24 | 26 |
| 25 class HistoryService; | 27 class HistoryService; |
| 26 class TabContents; | 28 class TabContents; |
| 27 | 29 |
| 28 namespace safe_browsing { | 30 namespace safe_browsing { |
| 29 class ClientPhishingRequest; | 31 class ClientPhishingRequest; |
| 30 | 32 |
| 33 struct BrowseInfo { |
| 34 // The URL that is being classified. This is redundant information but |
| 35 // we keep it around to verify that the URL that comes back from the |
| 36 // renderer is unchanged. |
| 37 GURL url; |
| 38 |
| 39 // The referrer URL. |
| 40 GURL referrer; |
| 41 |
| 42 // How did we get to the URL? |
| 43 PageTransition::Type transition; |
| 44 }; |
| 45 |
| 31 namespace features { | 46 namespace features { |
| 32 | 47 |
| 33 // TODO(noelutz): move renderer/safe_browsing/features.h to common. | 48 // TODO(noelutz): move renderer/safe_browsing/features.h to common. |
| 34 //////////////////////////////////////////////////// | 49 //////////////////////////////////////////////////// |
| 35 // History features. | 50 // History features. |
| 36 //////////////////////////////////////////////////// | 51 //////////////////////////////////////////////////// |
| 37 | 52 |
| 38 // Number of visits to that URL stored in the browser history. | 53 // Number of visits to that URL stored in the browser history. |
| 39 // Should always be an integer larger than 1 because by the time | 54 // Should always be an integer larger than 1 because by the time |
| 40 // we lookup the history the current URL should already be stored there. | 55 // we lookup the history the current URL should already be stored there. |
| (...skipping 10 matching lines...) Expand all Loading... |
| 51 | 66 |
| 52 // Number of user-visible visits to all URLs on the same host/port as | 67 // Number of user-visible visits to all URLs on the same host/port as |
| 53 // the URL for HTTP and HTTPs. | 68 // the URL for HTTP and HTTPs. |
| 54 extern const char kHttpHostVisitCount[]; | 69 extern const char kHttpHostVisitCount[]; |
| 55 extern const char kHttpsHostVisitCount[]; | 70 extern const char kHttpsHostVisitCount[]; |
| 56 | 71 |
| 57 // Boolean feature which is true if the host was visited for the first | 72 // Boolean feature which is true if the host was visited for the first |
| 58 // time more than 24h ago (only considers user-visible visits like above). | 73 // time more than 24h ago (only considers user-visible visits like above). |
| 59 extern const char kFirstHttpHostVisitMoreThan24hAgo[]; | 74 extern const char kFirstHttpHostVisitMoreThan24hAgo[]; |
| 60 extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; | 75 extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; |
| 76 |
| 77 //////////////////////////////////////////////////// |
| 78 // Browse features. |
| 79 //////////////////////////////////////////////////// |
| 80 // True if the referrer was stripped because it is an SSL referrer. |
| 81 extern const char kHasSSLReferrer[]; |
| 82 // Stores the page transition. See: PageTransition. We strip the qualifier. |
| 83 extern const char kPageTransitionType[]; |
| 61 } // namespace features | 84 } // namespace features |
| 62 | 85 |
| 63 // All methods of this class must be called on the UI thread (including | 86 // All methods of this class must be called on the UI thread (including |
| 64 // the constructor). | 87 // the constructor). |
| 65 class BrowserFeatureExtractor { | 88 class BrowserFeatureExtractor { |
| 66 public: | 89 public: |
| 67 // Called when feature extraction is done. The first argument will be | 90 // Called when feature extraction is done. The first argument will be |
| 68 // true iff feature extraction succeeded. The second argument is the | 91 // true iff feature extraction succeeded. The second argument is the |
| 69 // phishing request which was modified by the feature extractor. The | 92 // phishing request which was modified by the feature extractor. The |
| 70 // DoneCallback takes ownership of the request object. | 93 // DoneCallback takes ownership of the request object. |
| 71 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; | 94 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; |
| 72 | 95 |
| 73 // The caller keeps ownership of the tab object and is responsible for | 96 // The caller keeps ownership of the tab object and is responsible for |
| 74 // ensuring that it stays valid for the entire lifetime of this object. | 97 // ensuring that it stays valid for the entire lifetime of this object. |
| 75 explicit BrowserFeatureExtractor(TabContents* tab); | 98 explicit BrowserFeatureExtractor(TabContents* tab); |
| 76 | 99 |
| 77 // The destructor will cancel any pending requests. | 100 // The destructor will cancel any pending requests. |
| 78 virtual ~BrowserFeatureExtractor(); | 101 virtual ~BrowserFeatureExtractor(); |
| 79 | 102 |
| 80 // Begins extraction of the browser features. We take ownership | 103 // Begins extraction of the browser features. We take ownership |
| 81 // of the request object until |callback| is called (see DoneCallback above) | 104 // of the request object until |callback| is called (see DoneCallback above) |
| 82 // and will write the extracted features to the feature map. Once the | 105 // and will write the extracted features to the feature map. Once the |
| 83 // feature extraction is complete, |callback| is run on the UI thread. We | 106 // feature extraction is complete, |callback| is run on the UI thread. We |
| 84 // take ownership of the |callback| object. This method must run on the UI | 107 // take ownership of the |callback| object. This method must run on the UI |
| 85 // thread. | 108 // thread. |
| 86 virtual void ExtractFeatures(ClientPhishingRequest* request, | 109 virtual void ExtractFeatures(const BrowseInfo& info, |
| 110 ClientPhishingRequest* request, |
| 87 DoneCallback* callback); | 111 DoneCallback* callback); |
| 88 | 112 |
| 89 private: | 113 private: |
| 90 friend class DeleteTask<BrowserFeatureExtractor>; | 114 friend class DeleteTask<BrowserFeatureExtractor>; |
| 91 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; | 115 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; |
| 92 typedef std::map<CancelableRequestProvider::Handle, | 116 typedef std::map<CancelableRequestProvider::Handle, |
| 93 ExtractionData> PendingQueriesMap; | 117 ExtractionData> PendingQueriesMap; |
| 94 | 118 |
| 95 // Actually starts feature extraction (does the real work). | 119 // Actually starts feature extraction (does the real work). |
| 96 void StartExtractFeatures(ClientPhishingRequest* request, | 120 void StartExtractFeatures(ClientPhishingRequest* request, |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 152 std::set<ExtractionData> pending_extractions_; | 176 std::set<ExtractionData> pending_extractions_; |
| 153 | 177 |
| 154 // Set of pending queries (i.e., where history->Query...() was called but | 178 // Set of pending queries (i.e., where history->Query...() was called but |
| 155 // the history callback hasn't been invoked yet). | 179 // the history callback hasn't been invoked yet). |
| 156 PendingQueriesMap pending_queries_; | 180 PendingQueriesMap pending_queries_; |
| 157 | 181 |
| 158 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); | 182 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); |
| 159 }; | 183 }; |
| 160 } // namespace safe_browsing | 184 } // namespace safe_browsing |
| 161 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 185 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
| OLD | NEW |