Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(27)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc

Issue 6805019: Move crypto files out of base, to a top level directory. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Fixes comments by eroman Created 9 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h" 5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h"
6 6
7 #include <list> 7 #include <list>
8 #include <map> 8 #include <map>
9 9
10 #include "base/compiler_specific.h" 10 #include "base/compiler_specific.h"
11 #include "base/logging.h" 11 #include "base/logging.h"
12 #include "base/message_loop.h" 12 #include "base/message_loop.h"
13 #include "base/sha2.h"
14 #include "base/metrics/histogram.h" 13 #include "base/metrics/histogram.h"
15 #include "base/time.h" 14 #include "base/time.h"
16 #include "base/utf_string_conversions.h" 15 #include "base/utf_string_conversions.h"
16 #include "crypto/sha2.h"
17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" 17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
18 #include "chrome/renderer/safe_browsing/features.h" 18 #include "chrome/renderer/safe_browsing/features.h"
19 #include "ui/base/l10n/l10n_util.h" 19 #include "ui/base/l10n/l10n_util.h"
20 #include "unicode/ubrk.h" 20 #include "unicode/ubrk.h"
21 21
22 namespace safe_browsing { 22 namespace safe_browsing {
23 23
24 // This time should be short enough that it doesn't noticeably disrupt the 24 // This time should be short enough that it doesn't noticeably disrupt the
25 // user's interaction with the page. 25 // user's interaction with the page.
26 const int PhishingTermFeatureExtractor::kMaxTimePerChunkMs = 20; 26 const int PhishingTermFeatureExtractor::kMaxTimePerChunkMs = 20;
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after
193 return; 193 return;
194 } 194 }
195 // Otherwise, continue. 195 // Otherwise, continue.
196 } 196 }
197 } 197 }
198 RunCallback(true); 198 RunCallback(true);
199 } 199 }
200 200
201 void PhishingTermFeatureExtractor::HandleWord(const string16& word) { 201 void PhishingTermFeatureExtractor::HandleWord(const string16& word) {
202 std::string word_lower = UTF16ToUTF8(l10n_util::ToLower(word)); 202 std::string word_lower = UTF16ToUTF8(l10n_util::ToLower(word));
203 std::string word_hash = base::SHA256HashString(word_lower); 203 std::string word_hash = crypto::SHA256HashString(word_lower);
204 204
205 // Quick out if the word is not part of any term, which is the common case. 205 // Quick out if the word is not part of any term, which is the common case.
206 if (page_word_hashes_->find(word_hash) == page_word_hashes_->end()) { 206 if (page_word_hashes_->find(word_hash) == page_word_hashes_->end()) {
207 // Word doesn't exist in our terms so we can clear the n-gram state. 207 // Word doesn't exist in our terms so we can clear the n-gram state.
208 state_->previous_words.clear(); 208 state_->previous_words.clear();
209 state_->previous_word_sizes.clear(); 209 state_->previous_word_sizes.clear();
210 return; 210 return;
211 } 211 }
212 212
213 // Find all of the n-grams that we need to check and compute their hashes. 213 // Find all of the n-grams that we need to check and compute their hashes.
(...skipping 13 matching lines...) Expand all
227 // - We could include positional information about words in the n-grams, 227 // - We could include positional information about words in the n-grams,
228 // rather than just a list of all of the words. For example, we could 228 // rather than just a list of all of the words. For example, we could
229 // change the term format so that each word is hashed separately, or 229 // change the term format so that each word is hashed separately, or
230 // we could add extra data to the word list to indicate the position 230 // we could add extra data to the word list to indicate the position
231 // at which the word appears in an n-gram, and skip checking the word if 231 // at which the word appears in an n-gram, and skip checking the word if
232 // it's not at that position. 232 // it's not at that position.
233 state_->previous_words.append(word_lower); 233 state_->previous_words.append(word_lower);
234 std::string current_term = state_->previous_words; 234 std::string current_term = state_->previous_words;
235 for (std::list<size_t>::iterator it = state_->previous_word_sizes.begin(); 235 for (std::list<size_t>::iterator it = state_->previous_word_sizes.begin();
236 it != state_->previous_word_sizes.end(); ++it) { 236 it != state_->previous_word_sizes.end(); ++it) {
237 hashes_to_check[base::SHA256HashString(current_term)] = current_term; 237 hashes_to_check[crypto::SHA256HashString(current_term)] = current_term;
238 current_term.erase(0, *it); 238 current_term.erase(0, *it);
239 } 239 }
240 240
241 // Add features for any hashes that match page_term_hashes_. 241 // Add features for any hashes that match page_term_hashes_.
242 for (std::map<std::string, std::string>::iterator it = 242 for (std::map<std::string, std::string>::iterator it =
243 hashes_to_check.begin(); 243 hashes_to_check.begin();
244 it != hashes_to_check.end(); ++it) { 244 it != hashes_to_check.end(); ++it) {
245 if (page_term_hashes_->find(it->first) != page_term_hashes_->end()) { 245 if (page_term_hashes_->find(it->first) != page_term_hashes_->end()) {
246 features_->AddBooleanFeature(features::kPageTerm + it->second); 246 features_->AddBooleanFeature(features::kPageTerm + it->second);
247 } 247 }
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
286 } 286 }
287 287
288 void PhishingTermFeatureExtractor::Clear() { 288 void PhishingTermFeatureExtractor::Clear() {
289 page_text_ = NULL; 289 page_text_ = NULL;
290 features_ = NULL; 290 features_ = NULL;
291 done_callback_.reset(NULL); 291 done_callback_.reset(NULL);
292 state_.reset(NULL); 292 state_.reset(NULL);
293 } 293 }
294 294
295 } // namespace safe_browsing 295 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698