Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2397)

Unified Diff: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h

Issue 2667343006: Componentize safe_browsing [X+1] : move the renderer part to component.
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h
diff --git a/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h
deleted file mode 100644
index 761c725517d8e630e9655e802ea4ce91fdb41c2a..0000000000000000000000000000000000000000
--- a/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// PhishingDOMFeatureExtractor handles computing DOM-based features for the
-// client-side phishing detection model. These include the presence of various
-// types of elements, ratios of external and secure links, and tokens for
-// external domains linked to.
-
-#ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
-#define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
-
-#include <memory>
-#include <string>
-
-#include "base/callback.h"
-#include "base/macros.h"
-#include "base/memory/weak_ptr.h"
-#include "third_party/WebKit/public/web/WebDocument.h"
-
-class GURL;
-
-namespace blink {
-class WebElement;
-}
-
-namespace safe_browsing {
-class FeatureExtractorClock;
-class FeatureMap;
-
-class PhishingDOMFeatureExtractor {
- public:
- // Callback to be run when feature extraction finishes. The callback
- // argument is true if extraction was successful, false otherwise.
- typedef base::Callback<void(bool)> DoneCallback;
-
- // Creates a PhishingDOMFeatureExtractor instance.
- // |clock| is used for timing feature extractor operations, and may be
- // mocked for testing. The caller maintains ownership of the clock.
- explicit PhishingDOMFeatureExtractor(FeatureExtractorClock* clock);
- ~PhishingDOMFeatureExtractor();
-
- // Begins extracting features into the given FeatureMap for the page.
- // To avoid blocking the render thread for too long, the feature extractor
- // may run in several chunks of work, posting a task to the current
- // MessageLoop to continue processing. Once feature extraction is complete,
- // |done_callback| is run on the current thread. PhishingDOMFeatureExtractor
- // takes ownership of the callback.
- void ExtractFeatures(blink::WebDocument document,
- FeatureMap* features,
- const DoneCallback& done_callback);
-
- // Cancels any pending feature extraction. The DoneCallback will not be run.
- // Must be called if there is a feature extraction in progress when the page
- // is unloaded or the PhishingDOMFeatureExtractor is destroyed.
- void CancelPendingExtraction();
-
- private:
- struct FrameData;
- struct PageFeatureState;
-
- // The maximum amount of wall time that we will spend on a single extraction
- // iteration before pausing to let other MessageLoop tasks run.
- static const int kMaxTimePerChunkMs;
-
- // The number of elements that we will process before checking to see whether
- // kMaxTimePerChunkMs has elapsed. Since checking the current time can be
- // slow, we don't do this on every element processed.
- static const int kClockCheckGranularity;
-
- // The maximum total amount of time that the feature extractor will run
- // before giving up on the current page.
- static const int kMaxTotalTimeMs;
-
- // Does the actual work of ExtractFeatures. ExtractFeaturesWithTimeout runs
- // until a predefined maximum amount of time has elapsed, then posts a task
- // to the current MessageLoop to continue extraction. When extraction
- // finishes, calls RunCallback().
- void ExtractFeaturesWithTimeout();
-
- // Handlers for the various HTML elements that we compute features for.
- // Since some of the features (such as ratios) cannot be computed until
- // feature extraction is finished, these handlers do not add to the feature
- // map directly. Instead, they update the values in the PageFeatureState.
- void HandleLink(const blink::WebElement& element);
- void HandleForm(const blink::WebElement& element);
- void HandleImage(const blink::WebElement& element);
- void HandleInput(const blink::WebElement& element);
- void HandleScript(const blink::WebElement& element);
-
- // Helper to verify that there is no pending feature extraction. Dies in
- // debug builds if the state is not as expected. This is a no-op in release
- // builds.
- void CheckNoPendingExtraction();
-
- // Runs |done_callback_| and then clears all internal state.
- void RunCallback(bool success);
-
- // Clears all internal feature extraction state.
- void Clear();
-
- // Called after advancing |cur_document_| to update the state in
- // |cur_frame_data_|.
- void ResetFrameData();
-
- // Returns the next document in frame-traversal order from cur_document_.
- // If there are no more documents, returns a null WebDocument.
- blink::WebDocument GetNextDocument();
-
- // Given a URL, checks whether the domain is different from the domain of
- // the current frame's URL. If so, stores the domain in |domain| and returns
- // true, otherwise returns false.
- virtual bool IsExternalDomain(const GURL& url, std::string* domain) const;
-
- // Given a partial URL, extend it to a full url based on the current frame's
- // URL.
- virtual blink::WebURL CompleteURL(const blink::WebElement& element,
- const blink::WebString& partial_url);
-
- // Called once all frames have been processed to compute features from the
- // PageFeatureState and add them to |features_|. See features.h for a
- // description of which features are computed.
- void InsertFeatures();
-
-
- // Non-owned pointer to our clock.
- FeatureExtractorClock* clock_;
-
- // The output parameters from the most recent call to ExtractFeatures().
- FeatureMap* features_; // The caller keeps ownership of this.
- DoneCallback done_callback_;
-
- // The current (sub-)document that we are processing. May be a null document
- // (isNull()) if we are not currently extracting features.
- blink::WebDocument cur_document_;
-
- // Stores extra state for |cur_document_| that will be persisted until we
- // advance to the next frame.
- std::unique_ptr<FrameData> cur_frame_data_;
-
- // Stores the intermediate data used to create features. This data is
- // accumulated across all frames in the RenderView.
- std::unique_ptr<PageFeatureState> page_feature_state_;
-
- // Used in scheduling ExtractFeaturesWithTimeout tasks.
- // These pointers are invalidated if extraction is cancelled.
- base::WeakPtrFactory<PhishingDOMFeatureExtractor> weak_factory_;
-
- DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor);
-};
-
-} // namespace safe_browsing
-
-#endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_

Powered by Google App Engine
This is Rietveld 408576698