OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_classifier.h" | 5 #include "chrome/renderer/safe_browsing/phishing_classifier.h" |
6 | 6 |
7 #include <string> | 7 #include <string> |
8 | 8 |
9 #include "base/bind.h" | 9 #include "base/bind.h" |
10 #include "base/callback.h" | 10 #include "base/callback.h" |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
56 scorer_ = scorer; | 56 scorer_ = scorer; |
57 if (scorer_) { | 57 if (scorer_) { |
58 url_extractor_.reset(new PhishingUrlFeatureExtractor); | 58 url_extractor_.reset(new PhishingUrlFeatureExtractor); |
59 dom_extractor_.reset( | 59 dom_extractor_.reset( |
60 new PhishingDOMFeatureExtractor(render_view_, clock_.get())); | 60 new PhishingDOMFeatureExtractor(render_view_, clock_.get())); |
61 term_extractor_.reset(new PhishingTermFeatureExtractor( | 61 term_extractor_.reset(new PhishingTermFeatureExtractor( |
62 &scorer_->page_terms(), | 62 &scorer_->page_terms(), |
63 &scorer_->page_words(), | 63 &scorer_->page_words(), |
64 scorer_->max_words_per_term(), | 64 scorer_->max_words_per_term(), |
65 scorer_->murmurhash3_seed(), | 65 scorer_->murmurhash3_seed(), |
| 66 scorer_->max_shingles_per_page(), |
| 67 scorer_->shingle_size(), |
66 clock_.get())); | 68 clock_.get())); |
67 } else { | 69 } else { |
68 // We're disabling client-side phishing detection, so tear down all | 70 // We're disabling client-side phishing detection, so tear down all |
69 // of the relevant objects. | 71 // of the relevant objects. |
70 url_extractor_.reset(); | 72 url_extractor_.reset(); |
71 dom_extractor_.reset(); | 73 dom_extractor_.reset(); |
72 term_extractor_.reset(); | 74 term_extractor_.reset(); |
73 } | 75 } |
74 } | 76 } |
75 | 77 |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
147 // Note that cancelling the feature extractors is simply a no-op if they | 149 // Note that cancelling the feature extractors is simply a no-op if they |
148 // were not running. | 150 // were not running. |
149 DCHECK(is_ready()); | 151 DCHECK(is_ready()); |
150 dom_extractor_->CancelPendingExtraction(); | 152 dom_extractor_->CancelPendingExtraction(); |
151 term_extractor_->CancelPendingExtraction(); | 153 term_extractor_->CancelPendingExtraction(); |
152 weak_factory_.InvalidateWeakPtrs(); | 154 weak_factory_.InvalidateWeakPtrs(); |
153 Clear(); | 155 Clear(); |
154 } | 156 } |
155 | 157 |
156 void PhishingClassifier::DOMExtractionFinished(bool success) { | 158 void PhishingClassifier::DOMExtractionFinished(bool success) { |
| 159 shingle_hashes_.reset(new std::set<uint32>); |
157 if (success) { | 160 if (success) { |
158 // Term feature extraction can take awhile, so it runs asynchronously | 161 // Term feature extraction can take awhile, so it runs asynchronously |
159 // in several chunks of work and invokes the callback when finished. | 162 // in several chunks of work and invokes the callback when finished. |
160 term_extractor_->ExtractFeatures( | 163 term_extractor_->ExtractFeatures( |
161 page_text_, | 164 page_text_, |
162 features_.get(), | 165 features_.get(), |
| 166 shingle_hashes_.get(), |
163 base::Bind(&PhishingClassifier::TermExtractionFinished, | 167 base::Bind(&PhishingClassifier::TermExtractionFinished, |
164 base::Unretained(this))); | 168 base::Unretained(this))); |
165 } else { | 169 } else { |
166 RunFailureCallback(); | 170 RunFailureCallback(); |
167 } | 171 } |
168 } | 172 } |
169 | 173 |
170 void PhishingClassifier::TermExtractionFinished(bool success) { | 174 void PhishingClassifier::TermExtractionFinished(bool success) { |
171 if (success) { | 175 if (success) { |
172 blink::WebView* web_view = render_view_->GetWebView(); | 176 blink::WebView* web_view = render_view_->GetWebView(); |
(...skipping 17 matching lines...) Expand all Loading... |
190 features_->features().begin(); | 194 features_->features().begin(); |
191 it != features_->features().end(); ++it) { | 195 it != features_->features().end(); ++it) { |
192 VLOG(2) << "Feature: " << it->first << " = " << it->second; | 196 VLOG(2) << "Feature: " << it->first << " = " << it->second; |
193 bool result = hashed_features.AddRealFeature( | 197 bool result = hashed_features.AddRealFeature( |
194 crypto::SHA256HashString(it->first), it->second); | 198 crypto::SHA256HashString(it->first), it->second); |
195 DCHECK(result); | 199 DCHECK(result); |
196 ClientPhishingRequest::Feature* feature = verdict.add_feature_map(); | 200 ClientPhishingRequest::Feature* feature = verdict.add_feature_map(); |
197 feature->set_name(it->first); | 201 feature->set_name(it->first); |
198 feature->set_value(it->second); | 202 feature->set_value(it->second); |
199 } | 203 } |
| 204 for (std::set<uint32>::const_iterator it = shingle_hashes_->begin(); |
| 205 it != shingle_hashes_->end(); ++it) { |
| 206 verdict.add_shingle_hashes(*it); |
| 207 } |
200 float score = static_cast<float>(scorer_->ComputeScore(hashed_features)); | 208 float score = static_cast<float>(scorer_->ComputeScore(hashed_features)); |
201 verdict.set_client_score(score); | 209 verdict.set_client_score(score); |
202 verdict.set_is_phishing(score >= kPhishyThreshold); | 210 verdict.set_is_phishing(score >= kPhishyThreshold); |
203 RunCallback(verdict); | 211 RunCallback(verdict); |
204 } else { | 212 } else { |
205 RunFailureCallback(); | 213 RunFailureCallback(); |
206 } | 214 } |
207 } | 215 } |
208 | 216 |
209 void PhishingClassifier::CheckNoPendingClassification() { | 217 void PhishingClassifier::CheckNoPendingClassification() { |
(...skipping 19 matching lines...) Expand all Loading... |
229 verdict.set_url(""); | 237 verdict.set_url(""); |
230 verdict.set_client_score(kInvalidScore); | 238 verdict.set_client_score(kInvalidScore); |
231 verdict.set_is_phishing(false); | 239 verdict.set_is_phishing(false); |
232 RunCallback(verdict); | 240 RunCallback(verdict); |
233 } | 241 } |
234 | 242 |
235 void PhishingClassifier::Clear() { | 243 void PhishingClassifier::Clear() { |
236 page_text_ = NULL; | 244 page_text_ = NULL; |
237 done_callback_.Reset(); | 245 done_callback_.Reset(); |
238 features_.reset(NULL); | 246 features_.reset(NULL); |
| 247 shingle_hashes_.reset(NULL); |
239 } | 248 } |
240 | 249 |
241 } // namespace safe_browsing | 250 } // namespace safe_browsing |
OLD | NEW |