| Index: chrome/renderer/safe_browsing/phishing_classifier.h
|
| diff --git a/chrome/renderer/safe_browsing/phishing_classifier.h b/chrome/renderer/safe_browsing/phishing_classifier.h
|
| deleted file mode 100644
|
| index 13c4093ce13510d8b3e7a4b4ce5105fe2cbedd04..0000000000000000000000000000000000000000
|
| --- a/chrome/renderer/safe_browsing/phishing_classifier.h
|
| +++ /dev/null
|
| @@ -1,155 +0,0 @@
|
| -// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -//
|
| -// This class handles the process of extracting all of the features from a
|
| -// page and computing a phishyness score. The basic steps are:
|
| -// - Run each feature extractor over the page, building up a FeatureMap of
|
| -// feature -> value.
|
| -// - SHA-256 hash all of the feature names in the map so that they match the
|
| -// supplied model.
|
| -// - Hand the hashed map off to a Scorer, which computes the probability that
|
| -// the page is phishy.
|
| -// - If the page is phishy, run the supplied callback.
|
| -//
|
| -// For more details, see phishing_*_feature_extractor.h, scorer.h, and
|
| -// client_model.proto.
|
| -
|
| -#ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_
|
| -#define CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_
|
| -
|
| -#include <stdint.h>
|
| -
|
| -#include <memory>
|
| -#include <set>
|
| -
|
| -#include "base/callback.h"
|
| -#include "base/macros.h"
|
| -#include "base/memory/weak_ptr.h"
|
| -#include "base/strings/string16.h"
|
| -
|
| -namespace content {
|
| -class RenderFrame;
|
| -}
|
| -
|
| -namespace safe_browsing {
|
| -class ClientPhishingRequest;
|
| -class FeatureExtractorClock;
|
| -class FeatureMap;
|
| -class PhishingDOMFeatureExtractor;
|
| -class PhishingTermFeatureExtractor;
|
| -class PhishingUrlFeatureExtractor;
|
| -class Scorer;
|
| -
|
| -class PhishingClassifier {
|
| - public:
|
| - // Callback to be run when phishing classification finishes. The verdict
|
| - // is a ClientPhishingRequest which contains the verdict computed by the
|
| - // classifier as well as the extracted features. If the verdict.is_phishing()
|
| - // is true, the page is considered phishy by the client-side model,
|
| - // and the browser should ping back to get a final verdict. The
|
| - // verdict.client_score() is set to kInvalidScore if classification failed.
|
| - typedef base::Callback<void(const ClientPhishingRequest& /* verdict */)>
|
| - DoneCallback;
|
| -
|
| - static const float kInvalidScore;
|
| -
|
| - // Creates a new PhishingClassifier object that will operate on
|
| - // |render_view|. |clock| is used to time feature extractor operations, and
|
| - // the PhishingClassifier takes ownership of this object. Note that the
|
| - // classifier will not be 'ready' until set_phishing_scorer() is called.
|
| - PhishingClassifier(content::RenderFrame* render_frame,
|
| - FeatureExtractorClock* clock);
|
| - virtual ~PhishingClassifier();
|
| -
|
| - // Sets a scorer for the classifier to use in computing the phishiness score.
|
| - // This must live at least as long as the PhishingClassifier. The caller is
|
| - // expected to cancel any pending classification before setting a phishing
|
| - // scorer.
|
| - void set_phishing_scorer(const Scorer* scorer);
|
| -
|
| - // Returns true if the classifier is ready to classify pages, i.e. it
|
| - // has had a scorer set via set_phishing_scorer().
|
| - bool is_ready() const;
|
| -
|
| - // Called by the RenderView when a page has finished loading. This begins
|
| - // the feature extraction and scoring process. |page_text| should contain
|
| - // the plain text of a web page, including any subframes, as returned by
|
| - // RenderView::CaptureText(). |page_text| is owned by the caller, and must
|
| - // not be destroyed until either |done_callback| is run or
|
| - // CancelPendingClassification() is called.
|
| - //
|
| - // To avoid blocking the render thread for too long, phishing classification
|
| - // may run in several chunks of work, posting a task to the current
|
| - // MessageLoop to continue processing. Once the scoring process is complete,
|
| - // |done_callback| is run on the current thread. PhishingClassifier takes
|
| - // ownership of the callback.
|
| - //
|
| - // It is an error to call BeginClassification if the classifier is not yet
|
| - // ready.
|
| - virtual void BeginClassification(const base::string16* page_text,
|
| - const DoneCallback& callback);
|
| -
|
| - // Called by the RenderView (on the render thread) when a page is unloading
|
| - // or the RenderView is being destroyed. This cancels any extraction that
|
| - // is in progress. It is an error to call CancelPendingClassification if
|
| - // the classifier is not yet ready.
|
| - virtual void CancelPendingClassification();
|
| -
|
| - private:
|
| - // Any score equal to or above this value is considered phishy.
|
| - static const float kPhishyThreshold;
|
| -
|
| - // Begins the feature extraction process, by extracting URL features and
|
| - // beginning DOM feature extraction.
|
| - void BeginFeatureExtraction();
|
| -
|
| - // Callback to be run when DOM feature extraction is complete.
|
| - // If it was successful, begins term feature extraction, otherwise
|
| - // runs the DoneCallback with a non-phishy verdict.
|
| - void DOMExtractionFinished(bool success);
|
| -
|
| - // Callback to be run when term feature extraction is complete.
|
| - // If it was successful, computes a score and runs the DoneCallback.
|
| - // If extraction was unsuccessful, runs the DoneCallback with a
|
| - // non-phishy verdict.
|
| - void TermExtractionFinished(bool success);
|
| -
|
| - // Helper to verify that there is no pending phishing classification. Dies
|
| - // in debug builds if the state is not as expected. This is a no-op in
|
| - // release builds.
|
| - void CheckNoPendingClassification();
|
| -
|
| - // Helper method to run the DoneCallback and clear the state.
|
| - void RunCallback(const ClientPhishingRequest& verdict);
|
| -
|
| - // Helper to run the DoneCallback when feature extraction has failed.
|
| - // This always signals a non-phishy verdict for the page, with kInvalidScore.
|
| - void RunFailureCallback();
|
| -
|
| - // Clears the current state of the PhishingClassifier.
|
| - void Clear();
|
| -
|
| - content::RenderFrame* render_frame_; // owns us
|
| - const Scorer* scorer_; // owned by the caller
|
| - std::unique_ptr<FeatureExtractorClock> clock_;
|
| - std::unique_ptr<PhishingUrlFeatureExtractor> url_extractor_;
|
| - std::unique_ptr<PhishingDOMFeatureExtractor> dom_extractor_;
|
| - std::unique_ptr<PhishingTermFeatureExtractor> term_extractor_;
|
| -
|
| - // State for any in-progress extraction.
|
| - std::unique_ptr<FeatureMap> features_;
|
| - std::unique_ptr<std::set<uint32_t>> shingle_hashes_;
|
| - const base::string16* page_text_; // owned by the caller
|
| - DoneCallback done_callback_;
|
| -
|
| - // Used in scheduling BeginFeatureExtraction tasks.
|
| - // These pointers are invalidated if classification is cancelled.
|
| - base::WeakPtrFactory<PhishingClassifier> weak_factory_;
|
| -
|
| - DISALLOW_COPY_AND_ASSIGN(PhishingClassifier);
|
| -};
|
| -
|
| -} // namespace safe_browsing
|
| -
|
| -#endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_
|
|
|