Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10276)

Unified Diff: chrome/renderer/safe_browsing/phishing_classifier.cc

Issue 2667343006: Componentize safe_browsing [X+1] : move the renderer part to component.
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/renderer/safe_browsing/phishing_classifier.cc
diff --git a/chrome/renderer/safe_browsing/phishing_classifier.cc b/chrome/renderer/safe_browsing/phishing_classifier.cc
deleted file mode 100644
index a2246d52d41d8fe1287994d4d362edaa99975553..0000000000000000000000000000000000000000
--- a/chrome/renderer/safe_browsing/phishing_classifier.cc
+++ /dev/null
@@ -1,251 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/renderer/safe_browsing/phishing_classifier.h"
-
-#include <string>
-
-#include "base/bind.h"
-#include "base/callback.h"
-#include "base/compiler_specific.h"
-#include "base/location.h"
-#include "base/logging.h"
-#include "base/metrics/histogram_macros.h"
-#include "base/single_thread_task_runner.h"
-#include "base/strings/string_util.h"
-#include "base/threading/thread_task_runner_handle.h"
-#include "chrome/common/safe_browsing/csd.pb.h"
-#include "chrome/common/url_constants.h"
-#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
-#include "chrome/renderer/safe_browsing/features.h"
-#include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
-#include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h"
-#include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h"
-#include "chrome/renderer/safe_browsing/scorer.h"
-#include "content/public/renderer/render_frame.h"
-#include "crypto/sha2.h"
-#include "third_party/WebKit/public/platform/WebURL.h"
-#include "third_party/WebKit/public/platform/WebURLRequest.h"
-#include "third_party/WebKit/public/web/WebDataSource.h"
-#include "third_party/WebKit/public/web/WebDocument.h"
-#include "third_party/WebKit/public/web/WebLocalFrame.h"
-#include "third_party/WebKit/public/web/WebView.h"
-#include "url/gurl.h"
-
-namespace safe_browsing {
-
-const float PhishingClassifier::kInvalidScore = -1.0;
-const float PhishingClassifier::kPhishyThreshold = 0.5;
-
-namespace {
-// Used for UMA, do not reorder.
-enum SkipClassificationReason {
- CLASSIFICATION_PROCEED = 0,
- SKIP_HTTPS = 1,
- SKIP_NONE_GET = 2,
- SKIP_REASON_MAX
-};
-
-void RecordReasonForSkippingClassificationToUMA(
- SkipClassificationReason reason) {
- UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.SkipClassificationReason",
- reason,
- SKIP_REASON_MAX);
-}
-
-} // namespace
-
-PhishingClassifier::PhishingClassifier(content::RenderFrame* render_frame,
- FeatureExtractorClock* clock)
- : render_frame_(render_frame),
- scorer_(NULL),
- clock_(clock),
- weak_factory_(this) {
- Clear();
-}
-
-PhishingClassifier::~PhishingClassifier() {
- // The RenderView should have called CancelPendingClassification() before
- // we are destroyed.
- CheckNoPendingClassification();
-}
-
-void PhishingClassifier::set_phishing_scorer(const Scorer* scorer) {
- CheckNoPendingClassification();
- scorer_ = scorer;
- if (scorer_) {
- url_extractor_.reset(new PhishingUrlFeatureExtractor);
- dom_extractor_.reset(new PhishingDOMFeatureExtractor(clock_.get()));
- term_extractor_.reset(new PhishingTermFeatureExtractor(
- &scorer_->page_terms(),
- &scorer_->page_words(),
- scorer_->max_words_per_term(),
- scorer_->murmurhash3_seed(),
- scorer_->max_shingles_per_page(),
- scorer_->shingle_size(),
- clock_.get()));
- } else {
- // We're disabling client-side phishing detection, so tear down all
- // of the relevant objects.
- url_extractor_.reset();
- dom_extractor_.reset();
- term_extractor_.reset();
- }
-}
-
-bool PhishingClassifier::is_ready() const {
- return scorer_ != NULL;
-}
-
-void PhishingClassifier::BeginClassification(
- const base::string16* page_text,
- const DoneCallback& done_callback) {
- DCHECK(is_ready());
-
- // The RenderView should have called CancelPendingClassification() before
- // starting a new classification, so DCHECK this.
- CheckNoPendingClassification();
- // However, in an opt build, we will go ahead and clean up the pending
- // classification so that we can start in a known state.
- CancelPendingClassification();
-
- page_text_ = page_text;
- done_callback_ = done_callback;
-
- // For consistency, we always want to invoke the DoneCallback
- // asynchronously, rather than directly from this method. To ensure that
- // this is the case, post a task to begin feature extraction on the next
- // iteration of the message loop.
- base::ThreadTaskRunnerHandle::Get()->PostTask(
- FROM_HERE, base::Bind(&PhishingClassifier::BeginFeatureExtraction,
- weak_factory_.GetWeakPtr()));
-}
-
-void PhishingClassifier::BeginFeatureExtraction() {
- blink::WebLocalFrame* frame = render_frame_->GetWebFrame();
-
- // Check whether the URL is one that we should classify.
- // Currently, we only classify http: URLs that are GET requests.
- GURL url(frame->document().url());
- if (!url.SchemeIs(url::kHttpScheme)) {
- RecordReasonForSkippingClassificationToUMA(SKIP_HTTPS);
- RunFailureCallback();
- return;
- }
-
- blink::WebDataSource* ds = frame->dataSource();
- if (!ds || ds->getRequest().httpMethod().ascii() != "GET") {
- if (ds)
- RecordReasonForSkippingClassificationToUMA(SKIP_NONE_GET);
- RunFailureCallback();
- return;
- }
-
- RecordReasonForSkippingClassificationToUMA(CLASSIFICATION_PROCEED);
- features_.reset(new FeatureMap);
- if (!url_extractor_->ExtractFeatures(url, features_.get())) {
- RunFailureCallback();
- return;
- }
-
- // DOM feature extraction can take awhile, so it runs asynchronously
- // in several chunks of work and invokes the callback when finished.
- dom_extractor_->ExtractFeatures(
- frame->document(), features_.get(),
- base::Bind(&PhishingClassifier::DOMExtractionFinished,
- base::Unretained(this)));
-}
-
-void PhishingClassifier::CancelPendingClassification() {
- // Note that cancelling the feature extractors is simply a no-op if they
- // were not running.
- DCHECK(is_ready());
- dom_extractor_->CancelPendingExtraction();
- term_extractor_->CancelPendingExtraction();
- weak_factory_.InvalidateWeakPtrs();
- Clear();
-}
-
-void PhishingClassifier::DOMExtractionFinished(bool success) {
- shingle_hashes_.reset(new std::set<uint32_t>);
- if (success) {
- // Term feature extraction can take awhile, so it runs asynchronously
- // in several chunks of work and invokes the callback when finished.
- term_extractor_->ExtractFeatures(
- page_text_,
- features_.get(),
- shingle_hashes_.get(),
- base::Bind(&PhishingClassifier::TermExtractionFinished,
- base::Unretained(this)));
- } else {
- RunFailureCallback();
- }
-}
-
-void PhishingClassifier::TermExtractionFinished(bool success) {
- if (success) {
- blink::WebLocalFrame* main_frame = render_frame_->GetWebFrame();
-
- // Hash all of the features so that they match the model, then compute
- // the score.
- FeatureMap hashed_features;
- ClientPhishingRequest verdict;
- verdict.set_model_version(scorer_->model_version());
- verdict.set_url(main_frame->document().url().string().utf8());
- for (base::hash_map<std::string, double>::const_iterator it =
- features_->features().begin();
- it != features_->features().end(); ++it) {
- DVLOG(2) << "Feature: " << it->first << " = " << it->second;
- bool result = hashed_features.AddRealFeature(
- crypto::SHA256HashString(it->first), it->second);
- DCHECK(result);
- ClientPhishingRequest::Feature* feature = verdict.add_feature_map();
- feature->set_name(it->first);
- feature->set_value(it->second);
- }
- for (std::set<uint32_t>::const_iterator it = shingle_hashes_->begin();
- it != shingle_hashes_->end(); ++it) {
- verdict.add_shingle_hashes(*it);
- }
- float score = static_cast<float>(scorer_->ComputeScore(hashed_features));
- verdict.set_client_score(score);
- verdict.set_is_phishing(score >= kPhishyThreshold);
- RunCallback(verdict);
- } else {
- RunFailureCallback();
- }
-}
-
-void PhishingClassifier::CheckNoPendingClassification() {
- DCHECK(done_callback_.is_null());
- DCHECK(!page_text_);
- if (!done_callback_.is_null() || page_text_) {
- LOG(ERROR) << "Classification in progress, missing call to "
- << "CancelPendingClassification";
- }
-}
-
-void PhishingClassifier::RunCallback(const ClientPhishingRequest& verdict) {
- done_callback_.Run(verdict);
- Clear();
-}
-
-void PhishingClassifier::RunFailureCallback() {
- ClientPhishingRequest verdict;
- // In this case we're not guaranteed to have a valid URL. Just set it
- // to the empty string to make sure we have a valid protocol buffer.
- verdict.set_url("");
- verdict.set_client_score(kInvalidScore);
- verdict.set_is_phishing(false);
- RunCallback(verdict);
-}
-
-void PhishingClassifier::Clear() {
- page_text_ = NULL;
- done_callback_.Reset();
- features_.reset(NULL);
- shingle_hashes_.reset(NULL);
-}
-
-} // namespace safe_browsing
« no previous file with comments | « chrome/renderer/safe_browsing/phishing_classifier.h ('k') | chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698