Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1443)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_classifier.h

Issue 268673007: Extracting page shingle hashes for similarity detection. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix a nit Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // This class handles the process of extracting all of the features from a 5 // This class handles the process of extracting all of the features from a
6 // page and computing a phishyness score. The basic steps are: 6 // page and computing a phishyness score. The basic steps are:
7 // - Run each feature extractor over the page, building up a FeatureMap of 7 // - Run each feature extractor over the page, building up a FeatureMap of
8 // feature -> value. 8 // feature -> value.
9 // - SHA-256 hash all of the feature names in the map so that they match the 9 // - SHA-256 hash all of the feature names in the map so that they match the
10 // supplied model. 10 // supplied model.
11 // - Hand the hashed map off to a Scorer, which computes the probability that 11 // - Hand the hashed map off to a Scorer, which computes the probability that
12 // the page is phishy. 12 // the page is phishy.
13 // - If the page is phishy, run the supplied callback. 13 // - If the page is phishy, run the supplied callback.
14 // 14 //
15 // For more details, see phishing_*_feature_extractor.h, scorer.h, and 15 // For more details, see phishing_*_feature_extractor.h, scorer.h, and
16 // client_model.proto. 16 // client_model.proto.
17 17
18 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_ 18 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_
19 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_ 19 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_
20 20
21 #include <set>
22
21 #include "base/basictypes.h" 23 #include "base/basictypes.h"
22 #include "base/callback.h" 24 #include "base/callback.h"
23 #include "base/memory/scoped_ptr.h" 25 #include "base/memory/scoped_ptr.h"
24 #include "base/memory/weak_ptr.h" 26 #include "base/memory/weak_ptr.h"
25 #include "base/strings/string16.h" 27 #include "base/strings/string16.h"
26 28
27 namespace content { 29 namespace content {
28 class RenderView; 30 class RenderView;
29 } 31 }
30 32
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
128 130
129 content::RenderView* render_view_; // owns us 131 content::RenderView* render_view_; // owns us
130 const Scorer* scorer_; // owned by the caller 132 const Scorer* scorer_; // owned by the caller
131 scoped_ptr<FeatureExtractorClock> clock_; 133 scoped_ptr<FeatureExtractorClock> clock_;
132 scoped_ptr<PhishingUrlFeatureExtractor> url_extractor_; 134 scoped_ptr<PhishingUrlFeatureExtractor> url_extractor_;
133 scoped_ptr<PhishingDOMFeatureExtractor> dom_extractor_; 135 scoped_ptr<PhishingDOMFeatureExtractor> dom_extractor_;
134 scoped_ptr<PhishingTermFeatureExtractor> term_extractor_; 136 scoped_ptr<PhishingTermFeatureExtractor> term_extractor_;
135 137
136 // State for any in-progress extraction. 138 // State for any in-progress extraction.
137 scoped_ptr<FeatureMap> features_; 139 scoped_ptr<FeatureMap> features_;
140 scoped_ptr<std::set<uint32> > shingle_hashes_;
138 const base::string16* page_text_; // owned by the caller 141 const base::string16* page_text_; // owned by the caller
139 DoneCallback done_callback_; 142 DoneCallback done_callback_;
140 143
141 // Used in scheduling BeginFeatureExtraction tasks. 144 // Used in scheduling BeginFeatureExtraction tasks.
142 // These pointers are invalidated if classification is cancelled. 145 // These pointers are invalidated if classification is cancelled.
143 base::WeakPtrFactory<PhishingClassifier> weak_factory_; 146 base::WeakPtrFactory<PhishingClassifier> weak_factory_;
144 147
145 DISALLOW_COPY_AND_ASSIGN(PhishingClassifier); 148 DISALLOW_COPY_AND_ASSIGN(PhishingClassifier);
146 }; 149 };
147 150
148 } // namespace safe_browsing 151 } // namespace safe_browsing
149 152
150 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_ 153 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698