Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(671)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.h

Issue 7119003: Create a browser feature extractor that runs after the renderer has (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Second try. Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // BrowserFeatureExtractor computes various browser features for client-side
6 // phishing detection. For now it does a bunch of lookups in the history
7 // service to see whether a particular URL has been visited before by the
8 // user.
9
10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
12 #pragma once
13
14 #include <map>
15 #include <set>
16 #include <utility>
17
18 #include "base/basictypes.h"
19 #include "base/callback_old.h"
20 #include "base/task.h"
21 #include "base/time.h"
22 #include "chrome/browser/history/history_types.h"
23 #include "content/browser/cancelable_request.h"
24 #include "content/common/notification_observer.h"
25 #include "content/common/notification_registrar.h"
26
27 class HistoryService;
28 class TabContents;
29
30 namespace safe_browsing {
31 class ClientPhishingRequest;
32
33 namespace features {
34
35 // TODO(noelutz): move renderer/safe_browsing/features.h to common.
36 ////////////////////////////////////////////////////
37 // History features.
38 ////////////////////////////////////////////////////
39
40 // Number of visits to that URL stored in the browser history.
41 // Should always be an integer larger than 1 because by the time
42 // we lookup the history the current URL should already be stored there.
43 extern const char kUrlHistoryVisitCount[];
44
45 // Number of times the URL was typed in the Omnibox.
46 extern const char kUrlHistoryTypedCount[];
47
48 // Number of times the URL was reached by clicking a link.
49 extern const char kUrlHistoryLinkCount[];
50
51 // Number of times URL was visited more than 24h ago.
52 extern const char kUrlHistoryVisitCountMoreThan24hAgo[];
53
54 // Number of user-visible visits to all URLs on the same host/port as
55 // the URL for HTTP and HTTPs.
56 extern const char kHttpHostVisitCount[];
57 extern const char kHttpsHostVisitCount[];
58
59 // Boolean feature which is true if the host was visited for the first
60 // time more than 24h ago (only considers user-visible visits like above).
61 extern const char kFirstHttpHostVisitMoreThan24hAgo[];
62 extern const char kFirstHttpsHostVisitMoreThan24hAgo[];
63 } // namespace features
64
65 // All methods of this class must be called on the UI thread (including
66 // the constructor).
67 class BrowserFeatureExtractor : public NotificationObserver {
68 public:
69 // Called when feature extraction is done. The first argument will be
70 // true iff feature extraction succeeded. The second argument is the
71 // phishing request which was modified by the feature extractor. The
72 // DoneCallback takes ownership of the request object.
73 typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback;
74
75 // The caller keeps ownership of the tab object and is responsible for
76 // ensuring that it stays valid for the entire lifetime of this object.
77 explicit BrowserFeatureExtractor(TabContents* tab);
78
79 // The destructor will cancel any pending requests.
80 ~BrowserFeatureExtractor();
81
82 // Begins extraction of the browser features. We take ownership
83 // of the request object until |callback| is called (see DoneCallback above)
84 // and will write the extracted features to the feature map. Once the
85 // feature extraction is complete, |callback| is run on the UI thread. We
86 // take ownership of the |callback| object. This method must run on the UI
87 // thread.
88 void ExtractFeatures(ClientPhishingRequest* request,
89 DoneCallback* callback);
90
91 private:
92 friend class DeleteTask<BrowserFeatureExtractor>;
93 typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData;
94 typedef std::map<CancelableRequestProvider::Handle,
95 ExtractionData> PendingQueriesMap;
96
97 // Actually starts feature extraction (does the real work).
98 void StartExtractFeatures(ClientPhishingRequest* request,
99 DoneCallback* callback);
100
101 // HistoryService callback which is called when we're done querying URL visits
102 // in the history.
103 void QueryUrlHistoryDone(CancelableRequestProvider::Handle handle,
104 bool success,
105 const history::URLRow* row,
106 history::VisitVector* visits);
107
108 // HistoryService callback which is called when we're done querying HTTP host
109 // visits in the history.
110 void QueryHttpHostVisitsDone(CancelableRequestProvider::Handle handle,
111 bool success,
112 int num_visits,
113 base::Time first_visit);
114
115 // HistoryService callback which is called when we're done querying HTTPS host
116 // visits in the history.
117 void QueryHttpsHostVisitsDone(CancelableRequestProvider::Handle handle,
118 bool success,
119 int num_visits,
120 base::Time first_visit);
121
122 // Helper function which sets the host history features given the
123 // number of host visits and the time of the fist host visit. Set
124 // |is_http_query| to true if the URL scheme is HTTP and to false if
125 // the scheme is HTTPS.
126 void SetHostVisitsFeatures(int num_visits,
127 base::Time first_visit,
128 bool is_http_query,
129 ClientPhishingRequest* request);
130
131 // Helper function which stores the request and callback while the history
132 // query is being processed.
133 void StorePendingQuery(CancelableRequestProvider::Handle handle,
134 ClientPhishingRequest* request,
135 DoneCallback* callback);
136
137 // Helper function which is the counterpart of StorePendingQuery. If there
138 // is a pending query for the given handle it will return false and set both
139 // the request and cb pointers. Otherwise, it will return false.
140 bool GetPendingQuery(CancelableRequestProvider::Handle handle,
141 ClientPhishingRequest** request,
142 DoneCallback** callback);
143
144 // Helper function which gets the history server if possible. If the pointer
145 // is set it will return true and false otherwise.
146 bool GetHistoryService(HistoryService** history);
147
148 // Called when the tab goes away (in which case we cancel everything).
149 virtual void Observe(NotificationType type,
150 const NotificationSource& source,
151 const NotificationDetails& details);
152
153 void Cancel();
154
155 TabContents* tab_;
156 CancelableRequestConsumer request_consumer_;
157 ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_;
158 bool cancelled_;
159 NotificationRegistrar registrar_;
160
161 // Set of pending extractions (i.e. extractions for which ExtractFeatures was
162 // called but not StartExtractFeatures).
163 std::set<ExtractionData> pending_extractions_;
164
165 // Set of pending queries (i.e., where history->Query...() was called but
166 // the history callback hasn't been invoked yet).
167 PendingQueriesMap pending_queries_;
168
169 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor);
170 };
171 } // namespace safe_browsing
172 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698