| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // This class handles the process of extracting all of the features from a | 5 // This class handles the process of extracting all of the features from a |
| 6 // page and computing a phishyness score. The basic steps are: | 6 // page and computing a phishyness score. The basic steps are: |
| 7 // - Run each feature extractor over the page, building up a FeatureMap of | 7 // - Run each feature extractor over the page, building up a FeatureMap of |
| 8 // feature -> value. | 8 // feature -> value. |
| 9 // - SHA-256 hash all of the feature names in the map so that they match the | 9 // - SHA-256 hash all of the feature names in the map so that they match the |
| 10 // supplied model. | 10 // supplied model. |
| (...skipping 27 matching lines...) Expand all Loading... |
| 38 class Scorer; | 38 class Scorer; |
| 39 | 39 |
| 40 class PhishingClassifier { | 40 class PhishingClassifier { |
| 41 public: | 41 public: |
| 42 // Callback to be run when phishing classification finishes. The verdict | 42 // Callback to be run when phishing classification finishes. The verdict |
| 43 // is a ClientPhishingRequest which contains the verdict computed by the | 43 // is a ClientPhishingRequest which contains the verdict computed by the |
| 44 // classifier as well as the extracted features. If the verdict.is_phishing() | 44 // classifier as well as the extracted features. If the verdict.is_phishing() |
| 45 // is true, the page is considered phishy by the client-side model, | 45 // is true, the page is considered phishy by the client-side model, |
| 46 // and the browser should ping back to get a final verdict. The | 46 // and the browser should ping back to get a final verdict. The |
| 47 // verdict.client_score() is set to kInvalidScore if classification failed. | 47 // verdict.client_score() is set to kInvalidScore if classification failed. |
| 48 typedef Callback1<const ClientPhishingRequest& /* verdict */>::Type | 48 typedef base::Callback<void(const ClientPhishingRequest& /* verdict */)> |
| 49 DoneCallback; | 49 DoneCallback; |
| 50 | 50 |
| 51 static const float kInvalidScore; | 51 static const float kInvalidScore; |
| 52 | 52 |
| 53 // Creates a new PhishingClassifier object that will operate on | 53 // Creates a new PhishingClassifier object that will operate on |
| 54 // |render_view|. |clock| is used to time feature extractor operations, and | 54 // |render_view|. |clock| is used to time feature extractor operations, and |
| 55 // the PhishingClassifier takes ownership of this object. Note that the | 55 // the PhishingClassifier takes ownership of this object. Note that the |
| 56 // classifier will not be 'ready' until set_phishing_scorer() is called. | 56 // classifier will not be 'ready' until set_phishing_scorer() is called. |
| 57 PhishingClassifier(content::RenderView* render_view, | 57 PhishingClassifier(content::RenderView* render_view, |
| 58 FeatureExtractorClock* clock); | 58 FeatureExtractorClock* clock); |
| (...skipping 18 matching lines...) Expand all Loading... |
| 77 // | 77 // |
| 78 // To avoid blocking the render thread for too long, phishing classification | 78 // To avoid blocking the render thread for too long, phishing classification |
| 79 // may run in several chunks of work, posting a task to the current | 79 // may run in several chunks of work, posting a task to the current |
| 80 // MessageLoop to continue processing. Once the scoring process is complete, | 80 // MessageLoop to continue processing. Once the scoring process is complete, |
| 81 // |done_callback| is run on the current thread. PhishingClassifier takes | 81 // |done_callback| is run on the current thread. PhishingClassifier takes |
| 82 // ownership of the callback. | 82 // ownership of the callback. |
| 83 // | 83 // |
| 84 // It is an error to call BeginClassification if the classifier is not yet | 84 // It is an error to call BeginClassification if the classifier is not yet |
| 85 // ready. | 85 // ready. |
| 86 virtual void BeginClassification(const string16* page_text, | 86 virtual void BeginClassification(const string16* page_text, |
| 87 DoneCallback* callback); | 87 const DoneCallback& callback); |
| 88 | 88 |
| 89 // Called by the RenderView (on the render thread) when a page is unloading | 89 // Called by the RenderView (on the render thread) when a page is unloading |
| 90 // or the RenderView is being destroyed. This cancels any extraction that | 90 // or the RenderView is being destroyed. This cancels any extraction that |
| 91 // is in progress. It is an error to call CancelPendingClassification if | 91 // is in progress. It is an error to call CancelPendingClassification if |
| 92 // the classifier is not yet ready. | 92 // the classifier is not yet ready. |
| 93 virtual void CancelPendingClassification(); | 93 virtual void CancelPendingClassification(); |
| 94 | 94 |
| 95 private: | 95 private: |
| 96 // Any score equal to or above this value is considered phishy. | 96 // Any score equal to or above this value is considered phishy. |
| 97 static const float kPhishyThreshold; | 97 static const float kPhishyThreshold; |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 129 content::RenderView* render_view_; // owns us | 129 content::RenderView* render_view_; // owns us |
| 130 const Scorer* scorer_; // owned by the caller | 130 const Scorer* scorer_; // owned by the caller |
| 131 scoped_ptr<FeatureExtractorClock> clock_; | 131 scoped_ptr<FeatureExtractorClock> clock_; |
| 132 scoped_ptr<PhishingUrlFeatureExtractor> url_extractor_; | 132 scoped_ptr<PhishingUrlFeatureExtractor> url_extractor_; |
| 133 scoped_ptr<PhishingDOMFeatureExtractor> dom_extractor_; | 133 scoped_ptr<PhishingDOMFeatureExtractor> dom_extractor_; |
| 134 scoped_ptr<PhishingTermFeatureExtractor> term_extractor_; | 134 scoped_ptr<PhishingTermFeatureExtractor> term_extractor_; |
| 135 | 135 |
| 136 // State for any in-progress extraction. | 136 // State for any in-progress extraction. |
| 137 scoped_ptr<FeatureMap> features_; | 137 scoped_ptr<FeatureMap> features_; |
| 138 const string16* page_text_; // owned by the caller | 138 const string16* page_text_; // owned by the caller |
| 139 scoped_ptr<DoneCallback> done_callback_; | 139 DoneCallback done_callback_; |
| 140 | 140 |
| 141 // Used in scheduling BeginFeatureExtraction tasks. | 141 // Used in scheduling BeginFeatureExtraction tasks. |
| 142 // These pointers are invalidated if classification is cancelled. | 142 // These pointers are invalidated if classification is cancelled. |
| 143 base::WeakPtrFactory<PhishingClassifier> weak_factory_; | 143 base::WeakPtrFactory<PhishingClassifier> weak_factory_; |
| 144 | 144 |
| 145 DISALLOW_COPY_AND_ASSIGN(PhishingClassifier); | 145 DISALLOW_COPY_AND_ASSIGN(PhishingClassifier); |
| 146 }; | 146 }; |
| 147 | 147 |
| 148 } // namespace safe_browsing | 148 } // namespace safe_browsing |
| 149 | 149 |
| 150 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_ | 150 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_ |
| OLD | NEW |