Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(86)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc

Issue 1548153002: Switch to standard integer types in chrome/. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h" 5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h"
6 6
7 #include <list> 7 #include <list>
8 #include <map> 8 #include <map>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 if (i->Init()) { 77 if (i->Init()) {
78 iterator = i.Pass(); 78 iterator = i.Pass();
79 } else { 79 } else {
80 DLOG(ERROR) << "failed to open iterator"; 80 DLOG(ERROR) << "failed to open iterator";
81 } 81 }
82 } 82 }
83 }; 83 };
84 84
85 PhishingTermFeatureExtractor::PhishingTermFeatureExtractor( 85 PhishingTermFeatureExtractor::PhishingTermFeatureExtractor(
86 const base::hash_set<std::string>* page_term_hashes, 86 const base::hash_set<std::string>* page_term_hashes,
87 const base::hash_set<uint32>* page_word_hashes, 87 const base::hash_set<uint32_t>* page_word_hashes,
88 size_t max_words_per_term, 88 size_t max_words_per_term,
89 uint32 murmurhash3_seed, 89 uint32_t murmurhash3_seed,
90 size_t max_shingles_per_page, 90 size_t max_shingles_per_page,
91 size_t shingle_size, 91 size_t shingle_size,
92 FeatureExtractorClock* clock) 92 FeatureExtractorClock* clock)
93 : page_term_hashes_(page_term_hashes), 93 : page_term_hashes_(page_term_hashes),
94 page_word_hashes_(page_word_hashes), 94 page_word_hashes_(page_word_hashes),
95 max_words_per_term_(max_words_per_term), 95 max_words_per_term_(max_words_per_term),
96 murmurhash3_seed_(murmurhash3_seed), 96 murmurhash3_seed_(murmurhash3_seed),
97 max_shingles_per_page_(max_shingles_per_page), 97 max_shingles_per_page_(max_shingles_per_page),
98 shingle_size_(shingle_size), 98 shingle_size_(shingle_size),
99 clock_(clock), 99 clock_(clock),
100 weak_factory_(this) { 100 weak_factory_(this) {
101 Clear(); 101 Clear();
102 } 102 }
103 103
104 PhishingTermFeatureExtractor::~PhishingTermFeatureExtractor() { 104 PhishingTermFeatureExtractor::~PhishingTermFeatureExtractor() {
105 // The RenderView should have called CancelPendingExtraction() before 105 // The RenderView should have called CancelPendingExtraction() before
106 // we are destroyed. 106 // we are destroyed.
107 CheckNoPendingExtraction(); 107 CheckNoPendingExtraction();
108 } 108 }
109 109
110 void PhishingTermFeatureExtractor::ExtractFeatures( 110 void PhishingTermFeatureExtractor::ExtractFeatures(
111 const base::string16* page_text, 111 const base::string16* page_text,
112 FeatureMap* features, 112 FeatureMap* features,
113 std::set<uint32>* shingle_hashes, 113 std::set<uint32_t>* shingle_hashes,
114 const DoneCallback& done_callback) { 114 const DoneCallback& done_callback) {
115 // The RenderView should have called CancelPendingExtraction() before 115 // The RenderView should have called CancelPendingExtraction() before
116 // starting a new extraction, so DCHECK this. 116 // starting a new extraction, so DCHECK this.
117 CheckNoPendingExtraction(); 117 CheckNoPendingExtraction();
118 // However, in an opt build, we will go ahead and clean up the pending 118 // However, in an opt build, we will go ahead and clean up the pending
119 // extraction so that we can start in a known state. 119 // extraction so that we can start in a known state.
120 CancelPendingExtraction(); 120 CancelPendingExtraction();
121 121
122 page_text_ = page_text; 122 page_text_ = page_text;
123 features_ = features; 123 features_ = features;
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
201 state_->shingle_word_sizes.push_back(word_lower.size() + 1); 201 state_->shingle_word_sizes.push_back(word_lower.size() + 1);
202 if (state_->shingle_word_sizes.size() == shingle_size_) { 202 if (state_->shingle_word_sizes.size() == shingle_size_) {
203 shingle_hashes_->insert( 203 shingle_hashes_->insert(
204 MurmurHash3String(state_->current_shingle, murmurhash3_seed_)); 204 MurmurHash3String(state_->current_shingle, murmurhash3_seed_));
205 state_->current_shingle.erase(0, state_->shingle_word_sizes.front()); 205 state_->current_shingle.erase(0, state_->shingle_word_sizes.front());
206 state_->shingle_word_sizes.pop_front(); 206 state_->shingle_word_sizes.pop_front();
207 } 207 }
208 // Check if the size of shingle hashes is over the limit. 208 // Check if the size of shingle hashes is over the limit.
209 if (shingle_hashes_->size() > max_shingles_per_page_) { 209 if (shingle_hashes_->size() > max_shingles_per_page_) {
210 // Pop the largest one. 210 // Pop the largest one.
211 std::set<uint32>::iterator it = shingle_hashes_->end(); 211 std::set<uint32_t>::iterator it = shingle_hashes_->end();
212 shingle_hashes_->erase(--it); 212 shingle_hashes_->erase(--it);
213 } 213 }
214 214
215 // Next, extract page terms. 215 // Next, extract page terms.
216 uint32 word_hash = MurmurHash3String(word_lower, murmurhash3_seed_); 216 uint32_t word_hash = MurmurHash3String(word_lower, murmurhash3_seed_);
217 217
218 // Quick out if the word is not part of any term, which is the common case. 218 // Quick out if the word is not part of any term, which is the common case.
219 if (page_word_hashes_->find(word_hash) == page_word_hashes_->end()) { 219 if (page_word_hashes_->find(word_hash) == page_word_hashes_->end()) {
220 // Word doesn't exist in our terms so we can clear the n-gram state. 220 // Word doesn't exist in our terms so we can clear the n-gram state.
221 state_->previous_words.clear(); 221 state_->previous_words.clear();
222 state_->previous_word_sizes.clear(); 222 state_->previous_word_sizes.clear();
223 return; 223 return;
224 } 224 }
225 225
226 // Find all of the n-grams that we need to check and compute their SHA-256 226 // Find all of the n-grams that we need to check and compute their SHA-256
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
290 290
291 void PhishingTermFeatureExtractor::Clear() { 291 void PhishingTermFeatureExtractor::Clear() {
292 page_text_ = NULL; 292 page_text_ = NULL;
293 features_ = NULL; 293 features_ = NULL;
294 shingle_hashes_ = NULL; 294 shingle_hashes_ = NULL;
295 done_callback_.Reset(); 295 done_callback_.Reset();
296 state_.reset(NULL); 296 state_.reset(NULL);
297 } 297 }
298 298
299 } // namespace safe_browsing 299 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698