OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // BrowserFeatureExtractor computes various browser features for client-side | 5 // BrowserFeatureExtractor computes various browser features for client-side |
6 // phishing detection. For now it does a bunch of lookups in the history | 6 // phishing detection. For now it does a bunch of lookups in the history |
7 // service to see whether a particular URL has been visited before by the | 7 // service to see whether a particular URL has been visited before by the |
8 // user. | 8 // user. |
9 | 9 |
10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
12 #pragma once | 12 #pragma once |
13 | 13 |
14 #include <map> | 14 #include <map> |
15 #include <set> | 15 #include <set> |
16 #include <utility> | 16 #include <utility> |
17 | 17 |
18 #include "base/basictypes.h" | 18 #include "base/basictypes.h" |
19 #include "base/callback_old.h" | 19 #include "base/callback_old.h" |
20 #include "base/task.h" | 20 #include "base/task.h" |
21 #include "base/time.h" | 21 #include "base/time.h" |
22 #include "chrome/browser/history/history_types.h" | 22 #include "chrome/browser/history/history_types.h" |
23 #include "content/browser/cancelable_request.h" | 23 #include "content/browser/cancelable_request.h" |
| 24 #include "content/common/page_transition_types.h" |
| 25 #include "googleurl/src/gurl.h" |
24 | 26 |
25 class HistoryService; | 27 class HistoryService; |
26 class TabContents; | 28 class TabContents; |
27 | 29 |
28 namespace safe_browsing { | 30 namespace safe_browsing { |
29 class ClientPhishingRequest; | 31 class ClientPhishingRequest; |
30 | 32 |
| 33 struct BrowseInfo { |
| 34 // The URL that is being classified. This is redundant information but |
| 35 // we keep it around to verify that the URL that comes back from the |
| 36 // renderer is unchanged. |
| 37 GURL url; |
| 38 |
| 39 // The referrer URL. |
| 40 GURL referrer; |
| 41 |
| 42 // How did we get to the URL? |
| 43 PageTransition::Type transition; |
| 44 }; |
| 45 |
31 namespace features { | 46 namespace features { |
32 | 47 |
33 // TODO(noelutz): move renderer/safe_browsing/features.h to common. | 48 // TODO(noelutz): move renderer/safe_browsing/features.h to common. |
34 //////////////////////////////////////////////////// | 49 //////////////////////////////////////////////////// |
35 // History features. | 50 // History features. |
36 //////////////////////////////////////////////////// | 51 //////////////////////////////////////////////////// |
37 | 52 |
38 // Number of visits to that URL stored in the browser history. | 53 // Number of visits to that URL stored in the browser history. |
39 // Should always be an integer larger than 1 because by the time | 54 // Should always be an integer larger than 1 because by the time |
40 // we lookup the history the current URL should already be stored there. | 55 // we lookup the history the current URL should already be stored there. |
(...skipping 10 matching lines...) Expand all Loading... |
51 | 66 |
52 // Number of user-visible visits to all URLs on the same host/port as | 67 // Number of user-visible visits to all URLs on the same host/port as |
53 // the URL for HTTP and HTTPs. | 68 // the URL for HTTP and HTTPs. |
54 extern const char kHttpHostVisitCount[]; | 69 extern const char kHttpHostVisitCount[]; |
55 extern const char kHttpsHostVisitCount[]; | 70 extern const char kHttpsHostVisitCount[]; |
56 | 71 |
57 // Boolean feature which is true if the host was visited for the first | 72 // Boolean feature which is true if the host was visited for the first |
58 // time more than 24h ago (only considers user-visible visits like above). | 73 // time more than 24h ago (only considers user-visible visits like above). |
59 extern const char kFirstHttpHostVisitMoreThan24hAgo[]; | 74 extern const char kFirstHttpHostVisitMoreThan24hAgo[]; |
60 extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; | 75 extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; |
| 76 |
| 77 //////////////////////////////////////////////////// |
| 78 // Browse features. |
| 79 //////////////////////////////////////////////////// |
| 80 // True if the referrer was stripped because it is an SSL referrer. |
| 81 extern const char kHasSSLReferrer[]; |
| 82 // Stores the page transition. See: PageTransition. We strip the qualifier. |
| 83 extern const char kPageTransitionType[]; |
61 } // namespace features | 84 } // namespace features |
62 | 85 |
63 // All methods of this class must be called on the UI thread (including | 86 // All methods of this class must be called on the UI thread (including |
64 // the constructor). | 87 // the constructor). |
65 class BrowserFeatureExtractor { | 88 class BrowserFeatureExtractor { |
66 public: | 89 public: |
67 // Called when feature extraction is done. The first argument will be | 90 // Called when feature extraction is done. The first argument will be |
68 // true iff feature extraction succeeded. The second argument is the | 91 // true iff feature extraction succeeded. The second argument is the |
69 // phishing request which was modified by the feature extractor. The | 92 // phishing request which was modified by the feature extractor. The |
70 // DoneCallback takes ownership of the request object. | 93 // DoneCallback takes ownership of the request object. |
71 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; | 94 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; |
72 | 95 |
73 // The caller keeps ownership of the tab object and is responsible for | 96 // The caller keeps ownership of the tab object and is responsible for |
74 // ensuring that it stays valid for the entire lifetime of this object. | 97 // ensuring that it stays valid for the entire lifetime of this object. |
75 explicit BrowserFeatureExtractor(TabContents* tab); | 98 explicit BrowserFeatureExtractor(TabContents* tab); |
76 | 99 |
77 // The destructor will cancel any pending requests. | 100 // The destructor will cancel any pending requests. |
78 virtual ~BrowserFeatureExtractor(); | 101 virtual ~BrowserFeatureExtractor(); |
79 | 102 |
80 // Begins extraction of the browser features. We take ownership | 103 // Begins extraction of the browser features. We take ownership |
81 // of the request object until |callback| is called (see DoneCallback above) | 104 // of the request object until |callback| is called (see DoneCallback above) |
82 // and will write the extracted features to the feature map. Once the | 105 // and will write the extracted features to the feature map. Once the |
83 // feature extraction is complete, |callback| is run on the UI thread. We | 106 // feature extraction is complete, |callback| is run on the UI thread. We |
84 // take ownership of the |callback| object. This method must run on the UI | 107 // take ownership of the |callback| object. This method must run on the UI |
85 // thread. | 108 // thread. |
86 virtual void ExtractFeatures(ClientPhishingRequest* request, | 109 virtual void ExtractFeatures(const BrowseInfo& info, |
| 110 ClientPhishingRequest* request, |
87 DoneCallback* callback); | 111 DoneCallback* callback); |
88 | 112 |
89 private: | 113 private: |
90 friend class DeleteTask<BrowserFeatureExtractor>; | 114 friend class DeleteTask<BrowserFeatureExtractor>; |
91 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; | 115 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; |
92 typedef std::map<CancelableRequestProvider::Handle, | 116 typedef std::map<CancelableRequestProvider::Handle, |
93 ExtractionData> PendingQueriesMap; | 117 ExtractionData> PendingQueriesMap; |
94 | 118 |
95 // Actually starts feature extraction (does the real work). | 119 // Actually starts feature extraction (does the real work). |
96 void StartExtractFeatures(ClientPhishingRequest* request, | 120 void StartExtractFeatures(ClientPhishingRequest* request, |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
152 std::set<ExtractionData> pending_extractions_; | 176 std::set<ExtractionData> pending_extractions_; |
153 | 177 |
154 // Set of pending queries (i.e., where history->Query...() was called but | 178 // Set of pending queries (i.e., where history->Query...() was called but |
155 // the history callback hasn't been invoked yet). | 179 // the history callback hasn't been invoked yet). |
156 PendingQueriesMap pending_queries_; | 180 PendingQueriesMap pending_queries_; |
157 | 181 |
158 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); | 182 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); |
159 }; | 183 }; |
160 } // namespace safe_browsing | 184 } // namespace safe_browsing |
161 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | 185 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ |
OLD | NEW |