OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 // | |
5 // BrowserFeatureExtractor computes various browser features for client-side | |
6 // phishing detection. For now it does a bunch of lookups in the history | |
7 // service to see whether a particular URL has been visited before by the | |
8 // user. | |
9 | |
10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | |
11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | |
12 #pragma once | |
13 | |
14 #include <map> | |
15 #include <set> | |
16 #include <utility> | |
17 | |
18 #include "base/basictypes.h" | |
19 #include "base/callback_old.h" | |
20 #include "base/task.h" | |
21 #include "base/time.h" | |
22 #include "chrome/browser/history/history_types.h" | |
23 #include "content/browser/cancelable_request.h" | |
24 | |
25 class HistoryService; | |
26 class TabContents; | |
27 | |
28 namespace safe_browsing { | |
29 class ClientPhishingRequest; | |
30 | |
31 namespace features { | |
32 | |
33 // TODO(noelutz): move renderer/safe_browsing/features.h to common. | |
34 //////////////////////////////////////////////////// | |
35 // History features. | |
36 //////////////////////////////////////////////////// | |
37 | |
38 // Number of visits to that URL stored in the browser history. | |
39 // Should always be an integer larger than 1 because by the time | |
40 // we lookup the history the current URL should already be stored there. | |
41 extern const char kUrlHistoryVisitCount[]; | |
42 | |
43 // Number of times the URL was typed in the Omnibox. | |
44 extern const char kUrlHistoryTypedCount[]; | |
45 | |
46 // Number of times the URL was reached by clicking a link. | |
47 extern const char kUrlHistoryLinkCount[]; | |
48 | |
49 // Number of times URL was visited more than 24h ago. | |
50 extern const char kUrlHistoryVisitCountMoreThan24hAgo[]; | |
51 | |
52 // Number of user-visible visits to all URLs on the same host/port as | |
53 // the URL for HTTP and HTTPs. | |
54 extern const char kHttpHostVisitCount[]; | |
55 extern const char kHttpsHostVisitCount[]; | |
56 | |
57 // Boolean feature which is true if the host was visited for the first | |
58 // time more than 24h ago (only considers user-visible visits like above). | |
59 extern const char kFirstHttpHostVisitMoreThan24hAgo[]; | |
60 extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; | |
61 } // namespace features | |
62 | |
63 // All methods of this class must be called on the UI thread (including | |
64 // the constructor). | |
65 class BrowserFeatureExtractor { | |
66 public: | |
67 // Called when feature extraction is done. The first argument will be | |
68 // true iff feature extraction succeeded. The second argument is the | |
69 // phishing request which was modified by the feature extractor. The | |
70 // DoneCallback takes ownership of the request object. | |
71 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; | |
72 | |
73 // The caller keeps ownership of the tab object and is responsible for | |
74 // ensuring that it stays valid for the entire lifetime of this object. | |
75 explicit BrowserFeatureExtractor(TabContents* tab); | |
76 | |
77 // The destructor will cancel any pending requests. | |
78 ~BrowserFeatureExtractor(); | |
79 | |
80 // Begins extraction of the browser features. We take ownership | |
81 // of the request object until |callback| is called (see DoneCallback above) | |
82 // and will write the extracted features to the feature map. Once the | |
83 // feature extraction is complete, |callback| is run on the UI thread. We | |
84 // take ownership of the |callback| object. This method must run on the UI | |
85 // thread. | |
86 void ExtractFeatures(ClientPhishingRequest* request, | |
87 DoneCallback* callback); | |
88 | |
89 private: | |
90 friend class DeleteTask<BrowserFeatureExtractor>; | |
91 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; | |
92 typedef std::map<CancelableRequestProvider::Handle, | |
93 ExtractionData> PendingQueriesMap; | |
94 | |
95 // Actually starts feature extraction (does the real work). | |
96 void StartExtractFeatures(ClientPhishingRequest* request, | |
97 DoneCallback* callback); | |
98 | |
99 // HistoryService callback which is called when we're done querying URL visits | |
100 // in the history. | |
101 void QueryUrlHistoryDone(CancelableRequestProvider::Handle handle, | |
102 bool success, | |
103 const history::URLRow* row, | |
104 history::VisitVector* visits); | |
105 | |
106 // HistoryService callback which is called when we're done querying HTTP host | |
107 // visits in the history. | |
108 void QueryHttpHostVisitsDone(CancelableRequestProvider::Handle handle, | |
109 bool success, | |
110 int num_visits, | |
111 base::Time first_visit); | |
112 | |
113 // HistoryService callback which is called when we're done querying HTTPS host | |
114 // visits in the history. | |
115 void QueryHttpsHostVisitsDone(CancelableRequestProvider::Handle handle, | |
116 bool success, | |
117 int num_visits, | |
118 base::Time first_visit); | |
119 | |
120 // Helper function which sets the host history features given the | |
121 // number of host visits and the time of the fist host visit. Set | |
122 // |is_http_query| to true if the URL scheme is HTTP and to false if | |
123 // the scheme is HTTPS. | |
124 void SetHostVisitsFeatures(int num_visits, | |
125 base::Time first_visit, | |
126 bool is_http_query, | |
127 ClientPhishingRequest* request); | |
128 | |
129 // Helper function which stores the request and callback while the history | |
130 // query is being processed. | |
131 void StorePendingQuery(CancelableRequestProvider::Handle handle, | |
132 ClientPhishingRequest* request, | |
133 DoneCallback* callback); | |
134 | |
135 // Helper function which is the counterpart of StorePendingQuery. If there | |
136 // is a pending query for the given handle it will return false and set both | |
137 // the request and cb pointers. Otherwise, it will return false. | |
138 bool GetPendingQuery(CancelableRequestProvider::Handle handle, | |
139 ClientPhishingRequest** request, | |
140 DoneCallback** callback); | |
141 | |
142 // Helper function which gets the history server if possible. If the pointer | |
143 // is set it will return true and false otherwise. | |
144 bool GetHistoryService(HistoryService** history); | |
145 | |
146 TabContents* tab_; | |
147 CancelableRequestConsumer request_consumer_; | |
148 ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_; | |
149 | |
150 // Set of pending extractions (i.e. extractions for which ExtractFeatures was | |
151 // called but not StartExtractFeatures). | |
152 std::set<ExtractionData> pending_extractions_; | |
153 | |
154 // Set of pending queries (i.e., where history->Query...() was called but | |
155 // the history callback hasn't been invoked yet). | |
156 PendingQueriesMap pending_queries_; | |
157 | |
158 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); | |
159 }; | |
160 } // namespace safe_browsing | |
161 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ | |
OLD | NEW |