chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc - Issue 6805019: Move crypto files out of base, to a top level directory.

Side by Side Diff: chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc

Issue 6805019: Move crypto files out of base, to a top level directory. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Fixes comments by eroman Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc ('k') | chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h"	5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h"

6	6

7 #include <list>	7 #include <list>

8 #include <map>	8 #include <map>

9	9

10 #include "base/compiler_specific.h"	10 #include "base/compiler_specific.h"

11 #include "base/logging.h"	11 #include "base/logging.h"

12 #include "base/message_loop.h"	12 #include "base/message_loop.h"

13 #include "base/sha2.h"

14 #include "base/metrics/histogram.h"	13 #include "base/metrics/histogram.h"

15 #include "base/time.h"	14 #include "base/time.h"

16 #include "base/utf_string_conversions.h"	15 #include "base/utf_string_conversions.h"

	16 #include "crypto/sha2.h"

17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"	17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"

18 #include "chrome/renderer/safe_browsing/features.h"	18 #include "chrome/renderer/safe_browsing/features.h"

19 #include "ui/base/l10n/l10n_util.h"	19 #include "ui/base/l10n/l10n_util.h"

20 #include "unicode/ubrk.h"	20 #include "unicode/ubrk.h"

21	21

22 namespace safe_browsing {	22 namespace safe_browsing {

23	23

24 // This time should be short enough that it doesn't noticeably disrupt the	24 // This time should be short enough that it doesn't noticeably disrupt the

25 // user's interaction with the page.	25 // user's interaction with the page.

26 const int PhishingTermFeatureExtractor::kMaxTimePerChunkMs = 20;	26 const int PhishingTermFeatureExtractor::kMaxTimePerChunkMs = 20;

(...skipping 166 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
193 return;	193 return;

194 }	194 }

195 // Otherwise, continue.	195 // Otherwise, continue.

196 }	196 }

197 }	197 }

198 RunCallback(true);	198 RunCallback(true);

199 }	199 }

200	200

201 void PhishingTermFeatureExtractor::HandleWord(const string16& word) {	201 void PhishingTermFeatureExtractor::HandleWord(const string16& word) {

202 std::string word_lower = UTF16ToUTF8(l10n_util::ToLower(word));	202 std::string word_lower = UTF16ToUTF8(l10n_util::ToLower(word));

203 std::string word_hash = base::SHA256HashString(word_lower);	203 std::string word_hash = crypto::SHA256HashString(word_lower);

204	204

205 // Quick out if the word is not part of any term, which is the common case.	205 // Quick out if the word is not part of any term, which is the common case.

206 if (page_word_hashes_->find(word_hash) == page_word_hashes_->end()) {	206 if (page_word_hashes_->find(word_hash) == page_word_hashes_->end()) {

207 // Word doesn't exist in our terms so we can clear the n-gram state.	207 // Word doesn't exist in our terms so we can clear the n-gram state.

208 state_->previous_words.clear();	208 state_->previous_words.clear();

209 state_->previous_word_sizes.clear();	209 state_->previous_word_sizes.clear();

210 return;	210 return;

211 }	211 }

212	212

213 // Find all of the n-grams that we need to check and compute their hashes.	213 // Find all of the n-grams that we need to check and compute their hashes.

(...skipping 13 matching lines...) Expand all Loading...
227 // - We could include positional information about words in the n-grams,	227 // - We could include positional information about words in the n-grams,

228 // rather than just a list of all of the words. For example, we could	228 // rather than just a list of all of the words. For example, we could

229 // change the term format so that each word is hashed separately, or	229 // change the term format so that each word is hashed separately, or

230 // we could add extra data to the word list to indicate the position	230 // we could add extra data to the word list to indicate the position

231 // at which the word appears in an n-gram, and skip checking the word if	231 // at which the word appears in an n-gram, and skip checking the word if

232 // it's not at that position.	232 // it's not at that position.

233 state_->previous_words.append(word_lower);	233 state_->previous_words.append(word_lower);

234 std::string current_term = state_->previous_words;	234 std::string current_term = state_->previous_words;

235 for (std::list<size_t>::iterator it = state_->previous_word_sizes.begin();	235 for (std::list<size_t>::iterator it = state_->previous_word_sizes.begin();

236 it != state_->previous_word_sizes.end(); ++it) {	236 it != state_->previous_word_sizes.end(); ++it) {

237 hashes_to_check[base::SHA256HashString(current_term)] = current_term;	237 hashes_to_check[crypto::SHA256HashString(current_term)] = current_term;

238 current_term.erase(0, *it);	238 current_term.erase(0, *it);

239 }	239 }

240	240

241 // Add features for any hashes that match page_term_hashes_.	241 // Add features for any hashes that match page_term_hashes_.

242 for (std::map<std::string, std::string>::iterator it =	242 for (std::map<std::string, std::string>::iterator it =

243 hashes_to_check.begin();	243 hashes_to_check.begin();

244 it != hashes_to_check.end(); ++it) {	244 it != hashes_to_check.end(); ++it) {

245 if (page_term_hashes_->find(it->first) != page_term_hashes_->end()) {	245 if (page_term_hashes_->find(it->first) != page_term_hashes_->end()) {

246 features_->AddBooleanFeature(features::kPageTerm + it->second);	246 features_->AddBooleanFeature(features::kPageTerm + it->second);

247 }	247 }

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
286 }	286 }

287	287

288 void PhishingTermFeatureExtractor::Clear() {	288 void PhishingTermFeatureExtractor::Clear() {

289 page_text_ = NULL;	289 page_text_ = NULL;

290 features_ = NULL;	290 features_ = NULL;

291 done_callback_.reset(NULL);	291 done_callback_.reset(NULL);

292 state_.reset(NULL);	292 state_.reset(NULL);

293 }	293 }

294	294

295 } // namespace safe_browsing	295 } // namespace safe_browsing

OLD	NEW