OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" | 5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" |
6 | 6 |
7 #include "base/callback.h" | 7 #include "base/callback.h" |
8 #include "base/logging.h" | 8 #include "base/logging.h" |
9 #include "chrome/common/render_messages.h" | 9 #include "chrome/common/render_messages.h" |
10 #include "chrome/renderer/navigation_state.h" | 10 #include "chrome/renderer/navigation_state.h" |
11 #include "chrome/renderer/render_view.h" | 11 #include "chrome/renderer/render_view.h" |
12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" | 12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" |
13 #include "chrome/renderer/safe_browsing/phishing_classifier.h" | 13 #include "chrome/renderer/safe_browsing/phishing_classifier.h" |
14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" | 14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" |
15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" | 15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" |
16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" | 16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" |
17 | 17 |
18 namespace safe_browsing { | 18 namespace safe_browsing { |
19 | 19 |
| 20 namespace { |
| 21 GURL StripRef(const GURL& url) { |
| 22 GURL::Replacements replacements; |
| 23 replacements.ClearRef(); |
| 24 return url.ReplaceComponents(replacements); |
| 25 } |
| 26 } |
| 27 |
20 PhishingClassifierDelegate::PhishingClassifierDelegate( | 28 PhishingClassifierDelegate::PhishingClassifierDelegate( |
21 RenderView* render_view, | 29 RenderView* render_view, |
22 PhishingClassifier* classifier) | 30 PhishingClassifier* classifier) |
23 : render_view_(render_view), | 31 : render_view_(render_view), |
24 last_page_id_sent_to_classifier_(-1), | 32 last_finished_load_id_(-1), |
25 pending_classification_(false) { | 33 last_page_id_sent_to_classifier_(-1) { |
26 if (!classifier) { | 34 if (!classifier) { |
27 classifier = new PhishingClassifier(render_view_, | 35 classifier = new PhishingClassifier(render_view_, |
28 new FeatureExtractorClock()); | 36 new FeatureExtractorClock()); |
29 } | 37 } |
30 classifier_.reset(classifier); | 38 classifier_.reset(classifier); |
31 } | 39 } |
32 | 40 |
33 PhishingClassifierDelegate::~PhishingClassifierDelegate() { | 41 PhishingClassifierDelegate::~PhishingClassifierDelegate() { |
34 CancelPendingClassification(); | 42 CancelPendingClassification(); |
35 } | 43 } |
36 | 44 |
37 void PhishingClassifierDelegate::SetPhishingScorer( | 45 void PhishingClassifierDelegate::SetPhishingScorer( |
38 const safe_browsing::Scorer* scorer) { | 46 const safe_browsing::Scorer* scorer) { |
39 classifier_->set_phishing_scorer(scorer); | 47 classifier_->set_phishing_scorer(scorer); |
| 48 // Start classifying the current page if all conditions are met. |
| 49 // See MaybeStartClassification() for details. |
| 50 MaybeStartClassification(); |
| 51 } |
40 | 52 |
41 if (pending_classification_) { | 53 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) { |
42 pending_classification_ = false; | 54 last_url_received_from_browser_ = StripRef(url); |
43 // If we have a pending classificaton, it should always be true that the | 55 // Start classifying the current page if all conditions are met. |
44 // main frame URL and page id have not changed since we queued the | 56 // See MaybeStartClassification() for details. |
45 // classification. This is because we stop any pending classification on | 57 MaybeStartClassification(); |
46 // main frame loads in RenderView::didCommitProvisionalLoad(). | |
47 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_); | |
48 DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_); | |
49 classifier_->BeginClassification( | |
50 &classifier_page_text_, | |
51 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); | |
52 } | |
53 } | 58 } |
54 | 59 |
55 void PhishingClassifierDelegate::CommittedLoadInFrame( | 60 void PhishingClassifierDelegate::CommittedLoadInFrame( |
56 WebKit::WebFrame* frame) { | 61 WebKit::WebFrame* frame) { |
57 // A new page is starting to load. Unless the load is a navigation within | 62 // A new page is starting to load. Unless the load is a navigation within |
58 // the same page, we need to cancel classification since the content will | 63 // the same page, we need to cancel classification since we may get an |
59 // now be inconsistent with the phishing model. | 64 // inconsistent result. |
60 NavigationState* state = NavigationState::FromDataSource( | 65 NavigationState* state = NavigationState::FromDataSource( |
61 frame->dataSource()); | 66 frame->dataSource()); |
62 if (!state->was_within_same_page()) { | 67 if (!state->was_within_same_page()) { |
63 CancelPendingClassification(); | 68 CancelPendingClassification(); |
64 } | 69 } |
65 } | 70 } |
66 | 71 |
67 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) { | 72 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) { |
68 // We check that the page id has incremented so that we don't reclassify | 73 last_finished_load_id_ = render_view_->page_id(); |
69 // pages as the user moves back and forward in session history. Note: we | 74 last_finished_load_url_ = StripToplevelUrl(); |
70 // don't send every page id to the classifier, only those where the toplevel | |
71 // URL changed. | |
72 int load_id = render_view_->page_id(); | |
73 if (load_id <= last_page_id_sent_to_classifier_) { | |
74 return; | |
75 } | |
76 | |
77 GURL url_without_ref = StripToplevelUrl(); | |
78 if (url_without_ref == last_url_sent_to_classifier_) { | |
79 // The toplevle URL is the same, except for the ref. | |
80 // Update the last page id we sent, but don't trigger a new classification. | |
81 last_page_id_sent_to_classifier_ = load_id; | |
82 return; | |
83 } | |
84 | |
85 last_url_sent_to_classifier_ = url_without_ref; | |
86 last_page_id_sent_to_classifier_ = load_id; | |
87 classifier_page_text_.swap(*page_text); | 75 classifier_page_text_.swap(*page_text); |
88 | 76 MaybeStartClassification(); |
89 if (classifier_->is_ready()) { | |
90 classifier_->BeginClassification( | |
91 &classifier_page_text_, | |
92 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); | |
93 } else { | |
94 // If there is no phishing classifier yet, we'll begin classification once | |
95 // SetPhishingScorer() is called by the RenderView. | |
96 pending_classification_ = true; | |
97 } | |
98 } | 77 } |
99 | 78 |
100 void PhishingClassifierDelegate::CancelPendingClassification() { | 79 void PhishingClassifierDelegate::CancelPendingClassification() { |
101 if (classifier_->is_ready()) { | 80 if (classifier_->is_ready()) { |
102 classifier_->CancelPendingClassification(); | 81 classifier_->CancelPendingClassification(); |
103 } | 82 } |
104 classifier_page_text_.clear(); | 83 classifier_page_text_.clear(); |
105 pending_classification_ = false; | |
106 } | 84 } |
107 | 85 |
108 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, | 86 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, |
109 double phishy_score) { | 87 double phishy_score) { |
110 // We no longer need the page text. | 88 // We no longer need the page text. |
111 classifier_page_text_.clear(); | 89 classifier_page_text_.clear(); |
112 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; | 90 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; |
113 if (!is_phishy) { | 91 if (!is_phishy) { |
114 return; | 92 return; |
115 } | 93 } |
116 | 94 |
117 render_view_->Send(new ViewHostMsg_DetectedPhishingSite( | 95 render_view_->Send(new ViewHostMsg_DetectedPhishingSite( |
118 render_view_->routing_id(), | 96 render_view_->routing_id(), |
119 last_url_sent_to_classifier_, | 97 last_url_sent_to_classifier_, |
120 phishy_score)); | 98 phishy_score)); |
121 } | 99 } |
122 | 100 |
123 GURL PhishingClassifierDelegate::StripToplevelUrl() { | 101 GURL PhishingClassifierDelegate::StripToplevelUrl() { |
124 GURL toplevel_url = render_view_->webview()->mainFrame()->url(); | 102 return StripRef(render_view_->webview()->mainFrame()->url()); |
125 GURL::Replacements replacements; | 103 } |
126 replacements.ClearRef(); | 104 |
127 return toplevel_url.ReplaceComponents(replacements); | 105 void PhishingClassifierDelegate::MaybeStartClassification() { |
| 106 // We can begin phishing classification when the following conditions are |
| 107 // met: |
| 108 // 1. A Scorer has been created |
| 109 // 2. The browser has sent a StartPhishingDetection message for the current |
| 110 // toplevel URL. |
| 111 // 3. The page has finished loading and the page text has been extracted. |
| 112 // 4. The load is a new navigation (not a session history navigation). |
| 113 // 5. The toplevel URL has not already been classified. |
| 114 if (!classifier_->is_ready()) { |
| 115 VLOG(2) << "Not starting classification, no Scorer created."; |
| 116 // Keep classifier_page_text_, in case a Scorer is set later. |
| 117 return; |
| 118 } |
| 119 |
| 120 if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) { |
| 121 // Skip loads from session history navigation. |
| 122 VLOG(2) << "Not starting classification, last finished load id is " |
| 123 << last_finished_load_id_ << " but we have classified up to " |
| 124 << "load id " << last_page_id_sent_to_classifier_; |
| 125 classifier_page_text_.clear(); // we won't need this. |
| 126 return; |
| 127 } |
| 128 |
| 129 if (last_finished_load_id_ != render_view_->page_id()) { |
| 130 VLOG(2) << "Render view page has changed, not starting classification"; |
| 131 classifier_page_text_.clear(); // we won't need this. |
| 132 return; |
| 133 } |
| 134 // If the page id is unchanged, the toplevel URL should also be unchanged. |
| 135 DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_); |
| 136 |
| 137 if (last_url_received_from_browser_ != last_finished_load_url_) { |
| 138 VLOG(2) << "Not starting classification, last url from browser is " |
| 139 << last_url_received_from_browser_ << ", last finished load is " |
| 140 << last_finished_load_url_; |
| 141 // Keep classifier_page_text_, in case the browser notifies us later that |
| 142 // we should classify the URL. |
| 143 return; |
| 144 } |
| 145 |
| 146 if (last_finished_load_url_ == last_url_sent_to_classifier_) { |
| 147 // We've already classified this toplevel URL, so this was likely an |
| 148 // in-page navigation or a subframe navigation. Don't classify the page a |
| 149 // second time, but update the last classified page id for the session |
| 150 // history check above. |
| 151 VLOG(2) << "Toplevel URL is unchanged, not starting classification " |
| 152 << "but updating last classified page id to " |
| 153 << last_finished_load_id_; |
| 154 last_page_id_sent_to_classifier_ = last_finished_load_id_; |
| 155 classifier_page_text_.clear(); // we won't need this. |
| 156 return; |
| 157 } |
| 158 |
| 159 VLOG(2) << "Starting classification for " << last_finished_load_url_; |
| 160 last_url_sent_to_classifier_ = last_finished_load_url_; |
| 161 last_page_id_sent_to_classifier_ = last_finished_load_id_; |
| 162 classifier_->BeginClassification( |
| 163 &classifier_page_text_, |
| 164 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
128 } | 165 } |
129 | 166 |
130 } // namespace safe_browsing | 167 } // namespace safe_browsing |
OLD | NEW |