| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" | |
| 6 | |
| 7 #include <memory> | |
| 8 #include <set> | |
| 9 | |
| 10 #include "base/bind.h" | |
| 11 #include "base/callback.h" | |
| 12 #include "base/lazy_instance.h" | |
| 13 #include "base/logging.h" | |
| 14 #include "base/metrics/histogram_macros.h" | |
| 15 #include "chrome/common/safe_browsing/csd.pb.h" | |
| 16 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" | |
| 17 #include "chrome/renderer/safe_browsing/phishing_classifier.h" | |
| 18 #include "chrome/renderer/safe_browsing/scorer.h" | |
| 19 #include "components/safe_browsing/common/safebrowsing_messages.h" | |
| 20 #include "content/public/renderer/document_state.h" | |
| 21 #include "content/public/renderer/navigation_state.h" | |
| 22 #include "content/public/renderer/render_frame.h" | |
| 23 #include "content/public/renderer/render_thread.h" | |
| 24 #include "third_party/WebKit/public/platform/WebURL.h" | |
| 25 #include "third_party/WebKit/public/web/WebDocument.h" | |
| 26 #include "third_party/WebKit/public/web/WebLocalFrame.h" | |
| 27 #include "third_party/WebKit/public/web/WebView.h" | |
| 28 | |
| 29 using content::DocumentState; | |
| 30 using content::NavigationState; | |
| 31 using content::RenderThread; | |
| 32 | |
| 33 namespace safe_browsing { | |
| 34 | |
| 35 static GURL StripRef(const GURL& url) { | |
| 36 GURL::Replacements replacements; | |
| 37 replacements.ClearRef(); | |
| 38 return url.ReplaceComponents(replacements); | |
| 39 } | |
| 40 | |
| 41 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates; | |
| 42 static base::LazyInstance<PhishingClassifierDelegates> | |
| 43 g_delegates = LAZY_INSTANCE_INITIALIZER; | |
| 44 | |
| 45 static base::LazyInstance<std::unique_ptr<const safe_browsing::Scorer>> | |
| 46 g_phishing_scorer = LAZY_INSTANCE_INITIALIZER; | |
| 47 | |
| 48 // static | |
| 49 PhishingClassifierFilter* PhishingClassifierFilter::Create() { | |
| 50 // Private constructor and public static Create() method to facilitate | |
| 51 // stubbing out this class for binary-size reduction purposes. | |
| 52 return new PhishingClassifierFilter(); | |
| 53 } | |
| 54 | |
| 55 PhishingClassifierFilter::PhishingClassifierFilter() | |
| 56 : RenderThreadObserver() {} | |
| 57 | |
| 58 PhishingClassifierFilter::~PhishingClassifierFilter() {} | |
| 59 | |
| 60 bool PhishingClassifierFilter::OnControlMessageReceived( | |
| 61 const IPC::Message& message) { | |
| 62 bool handled = true; | |
| 63 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierFilter, message) | |
| 64 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_SetPhishingModel, OnSetPhishingModel) | |
| 65 IPC_MESSAGE_UNHANDLED(handled = false) | |
| 66 IPC_END_MESSAGE_MAP() | |
| 67 return handled; | |
| 68 } | |
| 69 | |
| 70 void PhishingClassifierFilter::OnSetPhishingModel(const std::string& model) { | |
| 71 safe_browsing::Scorer* scorer = NULL; | |
| 72 // An empty model string means we should disable client-side phishing | |
| 73 // detection. | |
| 74 if (!model.empty()) { | |
| 75 scorer = safe_browsing::Scorer::Create(model); | |
| 76 if (!scorer) { | |
| 77 DLOG(ERROR) << "Unable to create a PhishingScorer - corrupt model?"; | |
| 78 return; | |
| 79 } | |
| 80 } | |
| 81 PhishingClassifierDelegates::iterator i; | |
| 82 for (i = g_delegates.Get().begin(); i != g_delegates.Get().end(); ++i) { | |
| 83 (*i)->SetPhishingScorer(scorer); | |
| 84 } | |
| 85 g_phishing_scorer.Get().reset(scorer); | |
| 86 } | |
| 87 | |
| 88 // static | |
| 89 PhishingClassifierDelegate* PhishingClassifierDelegate::Create( | |
| 90 content::RenderFrame* render_frame, | |
| 91 PhishingClassifier* classifier) { | |
| 92 // Private constructor and public static Create() method to facilitate | |
| 93 // stubbing out this class for binary-size reduction purposes. | |
| 94 return new PhishingClassifierDelegate(render_frame, classifier); | |
| 95 } | |
| 96 | |
| 97 PhishingClassifierDelegate::PhishingClassifierDelegate( | |
| 98 content::RenderFrame* render_frame, | |
| 99 PhishingClassifier* classifier) | |
| 100 : content::RenderFrameObserver(render_frame), | |
| 101 last_main_frame_transition_(ui::PAGE_TRANSITION_LINK), | |
| 102 have_page_text_(false), | |
| 103 is_classifying_(false) { | |
| 104 g_delegates.Get().insert(this); | |
| 105 if (!classifier) { | |
| 106 classifier = | |
| 107 new PhishingClassifier(render_frame, new FeatureExtractorClock()); | |
| 108 } | |
| 109 | |
| 110 classifier_.reset(classifier); | |
| 111 | |
| 112 if (g_phishing_scorer.Get().get()) | |
| 113 SetPhishingScorer(g_phishing_scorer.Get().get()); | |
| 114 } | |
| 115 | |
| 116 PhishingClassifierDelegate::~PhishingClassifierDelegate() { | |
| 117 CancelPendingClassification(SHUTDOWN); | |
| 118 g_delegates.Get().erase(this); | |
| 119 } | |
| 120 | |
| 121 void PhishingClassifierDelegate::SetPhishingScorer( | |
| 122 const safe_browsing::Scorer* scorer) { | |
| 123 if (is_classifying_) { | |
| 124 // If there is a classification going on right now it means we're | |
| 125 // actually replacing an existing scorer with a new model. In | |
| 126 // this case we simply cancel the current classification. | |
| 127 // TODO(noelutz): if this happens too frequently we could also | |
| 128 // replace the old scorer with the new one once classification is done | |
| 129 // but this would complicate the code somewhat. | |
| 130 CancelPendingClassification(NEW_PHISHING_SCORER); | |
| 131 } | |
| 132 classifier_->set_phishing_scorer(scorer); | |
| 133 // Start classifying the current page if all conditions are met. | |
| 134 // See MaybeStartClassification() for details. | |
| 135 MaybeStartClassification(); | |
| 136 } | |
| 137 | |
| 138 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) { | |
| 139 last_url_received_from_browser_ = StripRef(url); | |
| 140 // Start classifying the current page if all conditions are met. | |
| 141 // See MaybeStartClassification() for details. | |
| 142 MaybeStartClassification(); | |
| 143 } | |
| 144 | |
| 145 void PhishingClassifierDelegate::DidCommitProvisionalLoad( | |
| 146 bool is_new_navigation, | |
| 147 bool is_same_page_navigation) { | |
| 148 blink::WebLocalFrame* frame = render_frame()->GetWebFrame(); | |
| 149 // A new page is starting to load, so cancel classificaiton. | |
| 150 // | |
| 151 // TODO(bryner): We shouldn't need to cancel classification if the navigation | |
| 152 // is within the same page. However, if we let classification continue in | |
| 153 // this case, we need to properly deal with the fact that PageCaptured will | |
| 154 // be called again for the in-page navigation. We need to be sure not to | |
| 155 // swap out the page text while the term feature extractor is still running. | |
| 156 DocumentState* document_state = DocumentState::FromDataSource( | |
| 157 frame->dataSource()); | |
| 158 NavigationState* navigation_state = document_state->navigation_state(); | |
| 159 CancelPendingClassification(navigation_state->WasWithinSamePage() | |
| 160 ? NAVIGATE_WITHIN_PAGE | |
| 161 : NAVIGATE_AWAY); | |
| 162 if (frame->parent()) | |
| 163 return; | |
| 164 | |
| 165 last_main_frame_transition_ = navigation_state->GetTransitionType(); | |
| 166 } | |
| 167 | |
| 168 void PhishingClassifierDelegate::PageCaptured(base::string16* page_text, | |
| 169 bool preliminary_capture) { | |
| 170 if (preliminary_capture) { | |
| 171 return; | |
| 172 } | |
| 173 // Make sure there's no classification in progress. We don't want to swap | |
| 174 // out the page text string from underneath the term feature extractor. | |
| 175 // | |
| 176 // Note: Currently, if the url hasn't changed, we won't restart | |
| 177 // classification in this case. We may want to adjust this. | |
| 178 CancelPendingClassification(PAGE_RECAPTURED); | |
| 179 last_finished_load_url_ = render_frame()->GetWebFrame()->document().url(); | |
| 180 classifier_page_text_.swap(*page_text); | |
| 181 have_page_text_ = true; | |
| 182 MaybeStartClassification(); | |
| 183 } | |
| 184 | |
| 185 void PhishingClassifierDelegate::CancelPendingClassification( | |
| 186 CancelClassificationReason reason) { | |
| 187 if (is_classifying_) { | |
| 188 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.CancelClassificationReason", | |
| 189 reason, | |
| 190 CANCEL_CLASSIFICATION_MAX); | |
| 191 is_classifying_ = false; | |
| 192 } | |
| 193 if (classifier_->is_ready()) { | |
| 194 classifier_->CancelPendingClassification(); | |
| 195 } | |
| 196 classifier_page_text_.clear(); | |
| 197 have_page_text_ = false; | |
| 198 } | |
| 199 | |
| 200 bool PhishingClassifierDelegate::OnMessageReceived( | |
| 201 const IPC::Message& message) { | |
| 202 bool handled = true; | |
| 203 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message) | |
| 204 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_StartPhishingDetection, | |
| 205 OnStartPhishingDetection) | |
| 206 IPC_MESSAGE_UNHANDLED(handled = false) | |
| 207 IPC_END_MESSAGE_MAP() | |
| 208 return handled; | |
| 209 } | |
| 210 | |
| 211 void PhishingClassifierDelegate::ClassificationDone( | |
| 212 const ClientPhishingRequest& verdict) { | |
| 213 // We no longer need the page text. | |
| 214 classifier_page_text_.clear(); | |
| 215 DVLOG(2) << "Phishy verdict = " << verdict.is_phishing() | |
| 216 << " score = " << verdict.client_score(); | |
| 217 if (verdict.client_score() != PhishingClassifier::kInvalidScore) { | |
| 218 DCHECK_EQ(last_url_sent_to_classifier_.spec(), verdict.url()); | |
| 219 RenderThread::Get()->Send(new SafeBrowsingHostMsg_PhishingDetectionDone( | |
| 220 routing_id(), verdict.SerializeAsString())); | |
| 221 } | |
| 222 } | |
| 223 | |
| 224 void PhishingClassifierDelegate::MaybeStartClassification() { | |
| 225 // We can begin phishing classification when the following conditions are | |
| 226 // met: | |
| 227 // 1. A Scorer has been created | |
| 228 // 2. The browser has sent a StartPhishingDetection message for the current | |
| 229 // toplevel URL. | |
| 230 // 3. The page has finished loading and the page text has been extracted. | |
| 231 // 4. The load is a new navigation (not a session history navigation). | |
| 232 // 5. The toplevel URL has not already been classified. | |
| 233 // | |
| 234 // Note that if we determine that this particular navigation should not be | |
| 235 // classified at all (as opposed to deferring it until we get an IPC or the | |
| 236 // load completes), we discard the page text since it won't be needed. | |
| 237 if (!classifier_->is_ready()) { | |
| 238 DVLOG(2) << "Not starting classification, no Scorer created."; | |
| 239 // Keep classifier_page_text_, in case a Scorer is set later. | |
| 240 return; | |
| 241 } | |
| 242 | |
| 243 if (last_main_frame_transition_ & ui::PAGE_TRANSITION_FORWARD_BACK) { | |
| 244 // Skip loads from session history navigation. However, update the | |
| 245 // last URL sent to the classifier, so that we'll properly detect | |
| 246 // in-page navigations. | |
| 247 DVLOG(2) << "Not starting classification for back/forward navigation"; | |
| 248 last_url_sent_to_classifier_ = last_finished_load_url_; | |
| 249 classifier_page_text_.clear(); // we won't need this. | |
| 250 have_page_text_ = false; | |
| 251 return; | |
| 252 } | |
| 253 | |
| 254 GURL stripped_last_load_url(StripRef(last_finished_load_url_)); | |
| 255 if (stripped_last_load_url == StripRef(last_url_sent_to_classifier_)) { | |
| 256 // We've already classified this toplevel URL, so this was likely an | |
| 257 // in-page navigation or a subframe navigation. The browser should not | |
| 258 // send a StartPhishingDetection IPC in this case. | |
| 259 DVLOG(2) << "Toplevel URL is unchanged, not starting classification."; | |
| 260 classifier_page_text_.clear(); // we won't need this. | |
| 261 have_page_text_ = false; | |
| 262 return; | |
| 263 } | |
| 264 | |
| 265 if (!have_page_text_) { | |
| 266 DVLOG(2) << "Not starting classification, there is no page text ready."; | |
| 267 return; | |
| 268 } | |
| 269 | |
| 270 if (last_url_received_from_browser_ != stripped_last_load_url) { | |
| 271 // The browser has not yet confirmed that this URL should be classified, | |
| 272 // so defer classification for now. Note: the ref does not affect | |
| 273 // any of the browser's preclassification checks, so we don't require it | |
| 274 // to match. | |
| 275 DVLOG(2) << "Not starting classification, last url from browser is " | |
| 276 << last_url_received_from_browser_ << ", last finished load is " | |
| 277 << last_finished_load_url_; | |
| 278 // Keep classifier_page_text_, in case the browser notifies us later that | |
| 279 // we should classify the URL. | |
| 280 return; | |
| 281 } | |
| 282 | |
| 283 DVLOG(2) << "Starting classification for " << last_finished_load_url_; | |
| 284 last_url_sent_to_classifier_ = last_finished_load_url_; | |
| 285 is_classifying_ = true; | |
| 286 classifier_->BeginClassification( | |
| 287 &classifier_page_text_, | |
| 288 base::Bind(&PhishingClassifierDelegate::ClassificationDone, | |
| 289 base::Unretained(this))); | |
| 290 } | |
| 291 | |
| 292 void PhishingClassifierDelegate::OnDestruct() { | |
| 293 delete this; | |
| 294 } | |
| 295 | |
| 296 } // namespace safe_browsing | |
| OLD | NEW |