OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" | 5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" |
6 | 6 |
7 #include <set> | 7 #include <set> |
8 | 8 |
9 #include "base/callback.h" | 9 #include "base/callback.h" |
10 #include "base/lazy_instance.h" | 10 #include "base/lazy_instance.h" |
11 #include "base/logging.h" | 11 #include "base/logging.h" |
12 #include "base/scoped_callback_factory.h" | 12 #include "base/scoped_callback_factory.h" |
13 #include "chrome/common/render_messages.h" | 13 #include "chrome/common/render_messages.h" |
14 #include "chrome/renderer/navigation_state.h" | 14 #include "chrome/renderer/navigation_state.h" |
15 #include "chrome/renderer/render_thread.h" | 15 #include "chrome/renderer/render_thread.h" |
16 #include "chrome/renderer/render_view.h" | 16 #include "chrome/renderer/render_view.h" |
17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" | 17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" |
18 #include "chrome/renderer/safe_browsing/phishing_classifier.h" | 18 #include "chrome/renderer/safe_browsing/phishing_classifier.h" |
19 #include "chrome/renderer/safe_browsing/scorer.h" | 19 #include "chrome/renderer/safe_browsing/scorer.h" |
20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" | 20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" |
21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" | 21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" |
22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" | 22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" |
23 | 23 |
24 namespace safe_browsing { | 24 namespace safe_browsing { |
25 | 25 |
| 26 |
| 27 static GURL StripRef(const GURL& url) { |
| 28 GURL::Replacements replacements; |
| 29 replacements.ClearRef(); |
| 30 return url.ReplaceComponents(replacements); |
| 31 } |
| 32 |
26 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates; | 33 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates; |
27 static base::LazyInstance<PhishingClassifierDelegates> | 34 static base::LazyInstance<PhishingClassifierDelegates> |
28 g_delegates(base::LINKER_INITIALIZED); | 35 g_delegates(base::LINKER_INITIALIZED); |
29 | 36 |
30 static base::LazyInstance<scoped_ptr<const safe_browsing::Scorer> > | 37 static base::LazyInstance<scoped_ptr<const safe_browsing::Scorer> > |
31 g_phishing_scorer(base::LINKER_INITIALIZED); | 38 g_phishing_scorer(base::LINKER_INITIALIZED); |
32 | 39 |
33 class ScorerCallback { | 40 class ScorerCallback { |
34 public: | 41 public: |
35 static Scorer::CreationCallback* CreateCallback() { | 42 static Scorer::CreationCallback* CreateCallback() { |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
68 safe_browsing::Scorer::CreateFromFile( | 75 safe_browsing::Scorer::CreateFromFile( |
69 IPC::PlatformFileForTransitToPlatformFile(model_file), | 76 IPC::PlatformFileForTransitToPlatformFile(model_file), |
70 RenderThread::current()->GetFileThreadMessageLoopProxy(), | 77 RenderThread::current()->GetFileThreadMessageLoopProxy(), |
71 ScorerCallback::CreateCallback()); | 78 ScorerCallback::CreateCallback()); |
72 } | 79 } |
73 | 80 |
74 PhishingClassifierDelegate::PhishingClassifierDelegate( | 81 PhishingClassifierDelegate::PhishingClassifierDelegate( |
75 RenderView* render_view, | 82 RenderView* render_view, |
76 PhishingClassifier* classifier) | 83 PhishingClassifier* classifier) |
77 : RenderViewObserver(render_view), | 84 : RenderViewObserver(render_view), |
78 last_page_id_sent_to_classifier_(-1), | 85 last_finished_load_id_(-1), |
79 pending_classification_(false) { | 86 last_page_id_sent_to_classifier_(-1) { |
80 g_delegates.Get().insert(this); | 87 g_delegates.Get().insert(this); |
81 if (!classifier) { | 88 if (!classifier) { |
82 classifier = new PhishingClassifier(render_view, | 89 classifier = new PhishingClassifier(render_view, |
83 new FeatureExtractorClock()); | 90 new FeatureExtractorClock()); |
84 } | 91 } |
85 | 92 |
86 classifier_.reset(classifier); | 93 classifier_.reset(classifier); |
87 | 94 |
88 if (g_phishing_scorer.Get().get()) | 95 if (g_phishing_scorer.Get().get()) |
89 SetPhishingScorer(g_phishing_scorer.Get().get()); | 96 SetPhishingScorer(g_phishing_scorer.Get().get()); |
90 } | 97 } |
91 | 98 |
92 PhishingClassifierDelegate::~PhishingClassifierDelegate() { | 99 PhishingClassifierDelegate::~PhishingClassifierDelegate() { |
93 CancelPendingClassification(); | 100 CancelPendingClassification(); |
94 g_delegates.Get().erase(this); | 101 g_delegates.Get().erase(this); |
95 } | 102 } |
96 | 103 |
97 void PhishingClassifierDelegate::SetPhishingScorer( | 104 void PhishingClassifierDelegate::SetPhishingScorer( |
98 const safe_browsing::Scorer* scorer) { | 105 const safe_browsing::Scorer* scorer) { |
99 if (!render_view()->webview()) | 106 if (!render_view()->webview()) |
100 return; // RenderView is tearing down. | 107 return; // RenderView is tearing down. |
101 | 108 |
102 classifier_->set_phishing_scorer(scorer); | 109 classifier_->set_phishing_scorer(scorer); |
| 110 // Start classifying the current page if all conditions are met. |
| 111 // See MaybeStartClassification() for details. |
| 112 MaybeStartClassification(); |
| 113 } |
103 | 114 |
104 if (pending_classification_) { | 115 |
105 pending_classification_ = false; | 116 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) { |
106 // If we have a pending classificaton, it should always be true that the | 117 last_url_received_from_browser_ = StripRef(url); |
107 // main frame URL and page id have not changed since we queued the | 118 // Start classifying the current page if all conditions are met. |
108 // classification. This is because we stop any pending classification on | 119 // See MaybeStartClassification() for details. |
109 // main frame loads in RenderView::didCommitProvisionalLoad(). | 120 MaybeStartClassification(); |
110 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_); | |
111 DCHECK_EQ(render_view()->page_id(), last_page_id_sent_to_classifier_); | |
112 classifier_->BeginClassification( | |
113 &classifier_page_text_, | |
114 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); | |
115 } | |
116 } | 121 } |
117 | 122 |
118 void PhishingClassifierDelegate::DidCommitProvisionalLoad( | 123 void PhishingClassifierDelegate::DidCommitProvisionalLoad( |
119 WebKit::WebFrame* frame, bool is_new_navigation) { | 124 WebKit::WebFrame* frame, bool is_new_navigation) { |
120 // A new page is starting to load. Unless the load is a navigation within | 125 // A new page is starting to load. Unless the load is a navigation within |
121 // the same page, we need to cancel classification since the content will | 126 // the same page, we need to cancel classification since we may get an |
122 // now be inconsistent with the phishing model. | 127 // inconsistent result. |
123 NavigationState* state = NavigationState::FromDataSource( | 128 NavigationState* state = NavigationState::FromDataSource( |
124 frame->dataSource()); | 129 frame->dataSource()); |
125 if (!state->was_within_same_page()) { | 130 if (!state->was_within_same_page()) { |
126 CancelPendingClassification(); | 131 CancelPendingClassification(); |
127 } | 132 } |
128 } | 133 } |
129 | 134 |
130 void PhishingClassifierDelegate::PageCaptured(const string16& page_text) { | 135 void PhishingClassifierDelegate::PageCaptured(const string16& page_text) { |
131 // We check that the page id has incremented so that we don't reclassify | 136 last_finished_load_id_ = render_view()->page_id(); |
132 // pages as the user moves back and forward in session history. Note: we | 137 last_finished_load_url_ = StripToplevelUrl(); |
133 // don't send every page id to the classifier, only those where the toplevel | |
134 // URL changed. | |
135 int load_id = render_view()->page_id(); | |
136 if (load_id <= last_page_id_sent_to_classifier_) { | |
137 return; | |
138 } | |
139 | |
140 GURL url_without_ref = StripToplevelUrl(); | |
141 if (url_without_ref == last_url_sent_to_classifier_) { | |
142 // The toplevle URL is the same, except for the ref. | |
143 // Update the last page id we sent, but don't trigger a new classification. | |
144 last_page_id_sent_to_classifier_ = load_id; | |
145 return; | |
146 } | |
147 | |
148 last_url_sent_to_classifier_ = url_without_ref; | |
149 last_page_id_sent_to_classifier_ = load_id; | |
150 classifier_page_text_ = page_text; | 138 classifier_page_text_ = page_text; |
151 | 139 MaybeStartClassification(); |
152 if (classifier_->is_ready()) { | |
153 classifier_->BeginClassification( | |
154 &classifier_page_text_, | |
155 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); | |
156 } else { | |
157 // If there is no phishing classifier yet, we'll begin classification once | |
158 // SetPhishingScorer() is called by the RenderView. | |
159 pending_classification_ = true; | |
160 } | |
161 } | 140 } |
162 | 141 |
163 void PhishingClassifierDelegate::CancelPendingClassification() { | 142 void PhishingClassifierDelegate::CancelPendingClassification() { |
164 if (classifier_->is_ready()) { | 143 if (classifier_->is_ready()) { |
165 classifier_->CancelPendingClassification(); | 144 classifier_->CancelPendingClassification(); |
166 } | 145 } |
167 classifier_page_text_.clear(); | 146 classifier_page_text_.clear(); |
168 pending_classification_ = false; | |
169 } | 147 } |
170 | 148 |
171 bool PhishingClassifierDelegate::OnMessageReceived( | 149 bool PhishingClassifierDelegate::OnMessageReceived( |
172 const IPC::Message& message) { | 150 const IPC::Message& message) { |
173 /* | |
174 bool handled = true; | 151 bool handled = true; |
175 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message) | 152 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message) |
| 153 IPC_MESSAGE_HANDLER(ViewMsg_StartPhishingDetection, |
| 154 OnStartPhishingDetection) |
176 IPC_MESSAGE_UNHANDLED(handled = false) | 155 IPC_MESSAGE_UNHANDLED(handled = false) |
177 IPC_END_MESSAGE_MAP() | 156 IPC_END_MESSAGE_MAP() |
178 return handled; | 157 return handled; |
179 */ | |
180 return false; | |
181 } | 158 } |
182 | 159 |
183 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, | 160 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, |
184 double phishy_score) { | 161 double phishy_score) { |
185 // We no longer need the page text. | 162 // We no longer need the page text. |
186 classifier_page_text_.clear(); | 163 classifier_page_text_.clear(); |
187 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; | 164 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; |
188 if (!is_phishy) { | 165 if (!is_phishy) { |
189 return; | 166 return; |
190 } | 167 } |
191 | 168 |
192 render_view()->Send(new ViewHostMsg_DetectedPhishingSite( | 169 render_view()->Send(new ViewHostMsg_DetectedPhishingSite( |
193 render_view()->routing_id(), | 170 render_view()->routing_id(), |
194 last_url_sent_to_classifier_, | 171 last_url_sent_to_classifier_, |
195 phishy_score)); | 172 phishy_score)); |
196 } | 173 } |
197 | 174 |
198 GURL PhishingClassifierDelegate::StripToplevelUrl() { | 175 GURL PhishingClassifierDelegate::StripToplevelUrl() { |
199 GURL toplevel_url = render_view()->webview()->mainFrame()->url(); | 176 return StripRef(render_view()->webview()->mainFrame()->url()); |
200 GURL::Replacements replacements; | 177 } |
201 replacements.ClearRef(); | 178 |
202 return toplevel_url.ReplaceComponents(replacements); | 179 void PhishingClassifierDelegate::MaybeStartClassification() { |
| 180 // We can begin phishing classification when the following conditions are |
| 181 // met: |
| 182 // 1. A Scorer has been created |
| 183 // 2. The browser has sent a StartPhishingDetection message for the current |
| 184 // toplevel URL. |
| 185 // 3. The page has finished loading and the page text has been extracted. |
| 186 // 4. The load is a new navigation (not a session history navigation). |
| 187 // 5. The toplevel URL has not already been classified. |
| 188 // |
| 189 // Note that if we determine that this particular navigation should not be |
| 190 // classified at all (as opposed to deferring it until we get an IPC or the |
| 191 // load completes), we discard the page text since it won't be needed. |
| 192 if (!classifier_->is_ready()) { |
| 193 VLOG(2) << "Not starting classification, no Scorer created."; |
| 194 // Keep classifier_page_text_, in case a Scorer is set later. |
| 195 return; |
| 196 } |
| 197 |
| 198 if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) { |
| 199 // Skip loads from session history navigation. |
| 200 VLOG(2) << "Not starting classification, last finished load id is " |
| 201 << last_finished_load_id_ << " but we have classified up to " |
| 202 << "load id " << last_page_id_sent_to_classifier_; |
| 203 classifier_page_text_.clear(); // we won't need this. |
| 204 return; |
| 205 } |
| 206 |
| 207 if (last_finished_load_id_ != render_view()->page_id()) { |
| 208 VLOG(2) << "Render view page has changed, not starting classification"; |
| 209 classifier_page_text_.clear(); // we won't need this. |
| 210 return; |
| 211 } |
| 212 // If the page id is unchanged, the toplevel URL should also be unchanged. |
| 213 DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_); |
| 214 |
| 215 if (last_finished_load_url_ == last_url_sent_to_classifier_) { |
| 216 // We've already classified this toplevel URL, so this was likely an |
| 217 // in-page navigation or a subframe navigation. The browser should not |
| 218 // send a StartPhishingDetection IPC in this case. |
| 219 VLOG(2) << "Toplevel URL is unchanged, not starting classification."; |
| 220 classifier_page_text_.clear(); // we won't need this. |
| 221 return; |
| 222 } |
| 223 |
| 224 if (last_url_received_from_browser_ != last_finished_load_url_) { |
| 225 // The browser has not yet confirmed that this URL should be classified, |
| 226 // so defer classification for now. |
| 227 VLOG(2) << "Not starting classification, last url from browser is " |
| 228 << last_url_received_from_browser_ << ", last finished load is " |
| 229 << last_finished_load_url_; |
| 230 // Keep classifier_page_text_, in case the browser notifies us later that |
| 231 // we should classify the URL. |
| 232 return; |
| 233 } |
| 234 |
| 235 VLOG(2) << "Starting classification for " << last_finished_load_url_; |
| 236 last_url_sent_to_classifier_ = last_finished_load_url_; |
| 237 last_page_id_sent_to_classifier_ = last_finished_load_id_; |
| 238 classifier_->BeginClassification( |
| 239 &classifier_page_text_, |
| 240 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
203 } | 241 } |
204 | 242 |
205 } // namespace safe_browsing | 243 } // namespace safe_browsing |
OLD | NEW |