chrome/renderer/safe_browsing/phishing_classifier.cc - Issue 2667343006: Componentize safe_browsing [X+1] : move the renderer part to component.

Unified Diff: chrome/renderer/safe_browsing/phishing_classifier.cc

Issue 2667343006: Componentize safe_browsing [X+1] : move the renderer part to component.

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « chrome/renderer/safe_browsing/phishing_classifier.h ('k') | chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/renderer/safe_browsing/phishing_classifier.cc

diff --git a/chrome/renderer/safe_browsing/phishing_classifier.cc b/chrome/renderer/safe_browsing/phishing_classifier.cc

deleted file mode 100644

index a2246d52d41d8fe1287994d4d362edaa99975553..0000000000000000000000000000000000000000

--- a/chrome/renderer/safe_browsing/phishing_classifier.cc

+++ /dev/null

@@ -1,251 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include "chrome/renderer/safe_browsing/phishing_classifier.h"

-#include <string>

-#include "base/bind.h"

-#include "base/callback.h"

-#include "base/compiler_specific.h"

-#include "base/location.h"

-#include "base/logging.h"

-#include "base/metrics/histogram_macros.h"

-#include "base/single_thread_task_runner.h"

-#include "base/strings/string_util.h"

-#include "base/threading/thread_task_runner_handle.h"

-#include "chrome/common/safe_browsing/csd.pb.h"

-#include "chrome/common/url_constants.h"

-#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"

-#include "chrome/renderer/safe_browsing/features.h"

-#include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"

-#include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h"

-#include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h"

-#include "chrome/renderer/safe_browsing/scorer.h"

-#include "content/public/renderer/render_frame.h"

-#include "crypto/sha2.h"

-#include "third_party/WebKit/public/platform/WebURL.h"

-#include "third_party/WebKit/public/platform/WebURLRequest.h"

-#include "third_party/WebKit/public/web/WebDataSource.h"

-#include "third_party/WebKit/public/web/WebDocument.h"

-#include "third_party/WebKit/public/web/WebLocalFrame.h"

-#include "third_party/WebKit/public/web/WebView.h"

-#include "url/gurl.h"

-namespace safe_browsing {

-const float PhishingClassifier::kInvalidScore = -1.0;

-const float PhishingClassifier::kPhishyThreshold = 0.5;

-namespace {

-// Used for UMA, do not reorder.

-enum SkipClassificationReason {

- CLASSIFICATION_PROCEED = 0,

- SKIP_HTTPS = 1,

- SKIP_NONE_GET = 2,

- SKIP_REASON_MAX

-};

-void RecordReasonForSkippingClassificationToUMA(

- SkipClassificationReason reason) {

- UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.SkipClassificationReason",

- reason,

- SKIP_REASON_MAX);

-} // namespace

-PhishingClassifier::PhishingClassifier(content::RenderFrame* render_frame,

- FeatureExtractorClock* clock)

- : render_frame_(render_frame),

- scorer_(NULL),

- clock_(clock),

- weak_factory_(this) {

- Clear();

-PhishingClassifier::~PhishingClassifier() {

- // The RenderView should have called CancelPendingClassification() before

- // we are destroyed.

- CheckNoPendingClassification();

-void PhishingClassifier::set_phishing_scorer(const Scorer* scorer) {

- CheckNoPendingClassification();

- scorer_ = scorer;

- if (scorer_) {

- url_extractor_.reset(new PhishingUrlFeatureExtractor);

- dom_extractor_.reset(new PhishingDOMFeatureExtractor(clock_.get()));

- term_extractor_.reset(new PhishingTermFeatureExtractor(

- &scorer_->page_terms(),

- &scorer_->page_words(),

- scorer_->max_words_per_term(),

- scorer_->murmurhash3_seed(),

- scorer_->max_shingles_per_page(),

- scorer_->shingle_size(),

- clock_.get()));

- } else {

- // We're disabling client-side phishing detection, so tear down all

- // of the relevant objects.

- url_extractor_.reset();

- dom_extractor_.reset();

- term_extractor_.reset();

- }

-bool PhishingClassifier::is_ready() const {

- return scorer_ != NULL;

-void PhishingClassifier::BeginClassification(

- const base::string16* page_text,

- const DoneCallback& done_callback) {

- DCHECK(is_ready());

- // The RenderView should have called CancelPendingClassification() before

- // starting a new classification, so DCHECK this.

- CheckNoPendingClassification();

- // However, in an opt build, we will go ahead and clean up the pending

- // classification so that we can start in a known state.

- CancelPendingClassification();

- page_text_ = page_text;

- done_callback_ = done_callback;

- // For consistency, we always want to invoke the DoneCallback

- // asynchronously, rather than directly from this method. To ensure that

- // this is the case, post a task to begin feature extraction on the next

- // iteration of the message loop.

- base::ThreadTaskRunnerHandle::Get()->PostTask(

- FROM_HERE, base::Bind(&PhishingClassifier::BeginFeatureExtraction,

- weak_factory_.GetWeakPtr()));

-void PhishingClassifier::BeginFeatureExtraction() {

- blink::WebLocalFrame* frame = render_frame_->GetWebFrame();

- // Check whether the URL is one that we should classify.

- // Currently, we only classify http: URLs that are GET requests.

- GURL url(frame->document().url());

- if (!url.SchemeIs(url::kHttpScheme)) {

- RecordReasonForSkippingClassificationToUMA(SKIP_HTTPS);

- RunFailureCallback();

- return;

- }

- blink::WebDataSource* ds = frame->dataSource();

- if (!ds || ds->getRequest().httpMethod().ascii() != "GET") {

- if (ds)

- RecordReasonForSkippingClassificationToUMA(SKIP_NONE_GET);

- RunFailureCallback();

- return;

- }

- RecordReasonForSkippingClassificationToUMA(CLASSIFICATION_PROCEED);

- features_.reset(new FeatureMap);

- if (!url_extractor_->ExtractFeatures(url, features_.get())) {

- RunFailureCallback();

- return;

- }

- // DOM feature extraction can take awhile, so it runs asynchronously

- // in several chunks of work and invokes the callback when finished.

- dom_extractor_->ExtractFeatures(

- frame->document(), features_.get(),

- base::Bind(&PhishingClassifier::DOMExtractionFinished,

- base::Unretained(this)));

-void PhishingClassifier::CancelPendingClassification() {

- // Note that cancelling the feature extractors is simply a no-op if they

- // were not running.

- DCHECK(is_ready());

- dom_extractor_->CancelPendingExtraction();

- term_extractor_->CancelPendingExtraction();

- weak_factory_.InvalidateWeakPtrs();

- Clear();

-void PhishingClassifier::DOMExtractionFinished(bool success) {

- shingle_hashes_.reset(new std::set<uint32_t>);

- if (success) {

- // Term feature extraction can take awhile, so it runs asynchronously

- // in several chunks of work and invokes the callback when finished.

- term_extractor_->ExtractFeatures(

- page_text_,

- features_.get(),

- shingle_hashes_.get(),

- base::Bind(&PhishingClassifier::TermExtractionFinished,

- base::Unretained(this)));

- } else {

- RunFailureCallback();

- }

-void PhishingClassifier::TermExtractionFinished(bool success) {

- if (success) {

- blink::WebLocalFrame* main_frame = render_frame_->GetWebFrame();

- // Hash all of the features so that they match the model, then compute

- // the score.

- FeatureMap hashed_features;

- ClientPhishingRequest verdict;

- verdict.set_model_version(scorer_->model_version());

- verdict.set_url(main_frame->document().url().string().utf8());

- for (base::hash_map<std::string, double>::const_iterator it =

- features_->features().begin();

- it != features_->features().end(); ++it) {

- DVLOG(2) << "Feature: " << it->first << " = " << it->second;

- bool result = hashed_features.AddRealFeature(

- crypto::SHA256HashString(it->first), it->second);

- DCHECK(result);

- ClientPhishingRequest::Feature* feature = verdict.add_feature_map();

- feature->set_name(it->first);

- feature->set_value(it->second);

- }

- for (std::set<uint32_t>::const_iterator it = shingle_hashes_->begin();

- it != shingle_hashes_->end(); ++it) {

- verdict.add_shingle_hashes(*it);

- }

- float score = static_cast<float>(scorer_->ComputeScore(hashed_features));

- verdict.set_client_score(score);

- verdict.set_is_phishing(score >= kPhishyThreshold);

- RunCallback(verdict);

- } else {

- RunFailureCallback();

- }

-void PhishingClassifier::CheckNoPendingClassification() {

- DCHECK(done_callback_.is_null());

- DCHECK(!page_text_);

- if (!done_callback_.is_null() || page_text_) {

- LOG(ERROR) << "Classification in progress, missing call to "

- << "CancelPendingClassification";

- }

-void PhishingClassifier::RunCallback(const ClientPhishingRequest& verdict) {

- done_callback_.Run(verdict);

- Clear();

-void PhishingClassifier::RunFailureCallback() {

- ClientPhishingRequest verdict;

- // In this case we're not guaranteed to have a valid URL. Just set it

- // to the empty string to make sure we have a valid protocol buffer.

- verdict.set_url("");

- verdict.set_client_score(kInvalidScore);

- verdict.set_is_phishing(false);

- RunCallback(verdict);

-void PhishingClassifier::Clear() {

- page_text_ = NULL;

- done_callback_.Reset();

- features_.reset(NULL);

- shingle_hashes_.reset(NULL);

-} // namespace safe_browsing