Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7755)

Unified Diff: chrome/renderer/safe_browsing/phishing_classifier.cc

Issue 268673007: Extracting page shingle hashes for similarity detection. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix a reference problem. Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/renderer/safe_browsing/phishing_classifier.cc
diff --git a/chrome/renderer/safe_browsing/phishing_classifier.cc b/chrome/renderer/safe_browsing/phishing_classifier.cc
index f48e6c138c74e444e79c58d8edb32809d2b6daa7..cc7e623673183c90659383b735f82d6588e9ef3b 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier.cc
@@ -61,8 +61,10 @@ void PhishingClassifier::set_phishing_scorer(const Scorer* scorer) {
term_extractor_.reset(new PhishingTermFeatureExtractor(
&scorer_->page_terms(),
&scorer_->page_words(),
+ scorer_->max_hashes_per_page(),
scorer_->max_words_per_term(),
scorer_->murmurhash3_seed(),
+ scorer_->shingle_size(),
clock_.get()));
} else {
// We're disabling client-side phishing detection, so tear down all
@@ -154,12 +156,14 @@ void PhishingClassifier::CancelPendingClassification() {
}
void PhishingClassifier::DOMExtractionFinished(bool success) {
+ shingle_hashes_.reset(new std::set<uint32>);
if (success) {
// Term feature extraction can take awhile, so it runs asynchronously
// in several chunks of work and invokes the callback when finished.
term_extractor_->ExtractFeatures(
page_text_,
features_.get(),
+ shingle_hashes_.get(),
base::Bind(&PhishingClassifier::TermExtractionFinished,
base::Unretained(this)));
} else {
@@ -197,6 +201,10 @@ void PhishingClassifier::TermExtractionFinished(bool success) {
feature->set_name(it->first);
feature->set_value(it->second);
}
+ for (std::set<uint32>::const_iterator it = shingle_hashes_->begin();
+ it != shingle_hashes_->end(); ++it) {
+ verdict.add_shingle_hashes(*it);
+ }
float score = static_cast<float>(scorer_->ComputeScore(hashed_features));
verdict.set_client_score(score);
verdict.set_is_phishing(score >= kPhishyThreshold);
@@ -236,6 +244,7 @@ void PhishingClassifier::Clear() {
page_text_ = NULL;
done_callback_.Reset();
features_.reset(NULL);
+ shingle_hashes_.reset(NULL);
}
} // namespace safe_browsing

Powered by Google App Engine
This is Rietveld 408576698