Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(615)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc

Issue 7866011: Switch to the new client-side phishing model that uses Murmurhash for word hashes. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix compile problems and add another test Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h" 5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h"
6 6
7 #include <string> 7 #include <string>
8 8
9 #include "base/bind.h" 9 #include "base/bind.h"
10 #include "base/callback.h" 10 #include "base/callback.h"
11 #include "base/hash_tables.h" 11 #include "base/hash_tables.h"
12 #include "base/memory/scoped_ptr.h" 12 #include "base/memory/scoped_ptr.h"
13 #include "base/message_loop.h" 13 #include "base/message_loop.h"
14 #include "base/string16.h" 14 #include "base/string16.h"
15 #include "base/stringprintf.h" 15 #include "base/stringprintf.h"
16 #include "base/time.h" 16 #include "base/time.h"
17 #include "base/utf_string_conversions.h" 17 #include "base/utf_string_conversions.h"
18 #include "crypto/sha2.h" 18 #include "crypto/sha2.h"
19 #include "chrome/renderer/safe_browsing/features.h" 19 #include "chrome/renderer/safe_browsing/features.h"
20 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" 20 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
21 #include "chrome/renderer/safe_browsing/murmurhash3_util.h"
21 #include "testing/gmock/include/gmock/gmock.h" 22 #include "testing/gmock/include/gmock/gmock.h"
22 #include "testing/gtest/include/gtest/gtest.h" 23 #include "testing/gtest/include/gtest/gtest.h"
23 24
24 using ::testing::ContainerEq; 25 using ::testing::ContainerEq;
25 using ::testing::Return; 26 using ::testing::Return;
26 27
27 namespace safe_browsing { 28 namespace safe_browsing {
28 29
29 class PhishingTermFeatureExtractorTest : public ::testing::Test { 30 class PhishingTermFeatureExtractorTest : public ::testing::Test {
30 protected: 31 protected:
(...skipping 23 matching lines...) Expand all
54 words.insert("multi"); 55 words.insert("multi");
55 words.insert("word"); 56 words.insert("word");
56 words.insert("test"); 57 words.insert("test");
57 words.insert("capitalization"); 58 words.insert("capitalization");
58 words.insert("space"); 59 words.insert("space");
59 words.insert("separator"); 60 words.insert("separator");
60 words.insert("punctuation"); 61 words.insert("punctuation");
61 words.insert("\xe4\xbd\xa0\xe5\xa5\xbd"); 62 words.insert("\xe4\xbd\xa0\xe5\xa5\xbd");
62 words.insert("\xe5\x86\x8d\xe8\xa7\x81"); 63 words.insert("\xe5\x86\x8d\xe8\xa7\x81");
63 64
65 static const uint32 kMurmurHash3Seed = 2777808611U;
64 for (base::hash_set<std::string>::iterator it = words.begin(); 66 for (base::hash_set<std::string>::iterator it = words.begin();
65 it != words.end(); ++it) { 67 it != words.end(); ++it) {
66 word_hashes_.insert(crypto::SHA256HashString(*it)); 68 word_hashes_.insert(MurmurHash3String(*it, kMurmurHash3Seed));
67 } 69 }
68 70
69 extractor_.reset(new PhishingTermFeatureExtractor( 71 extractor_.reset(new PhishingTermFeatureExtractor(
70 &term_hashes_, 72 &term_hashes_,
71 &word_hashes_, 73 &word_hashes_,
72 3 /* max_words_per_term */, 74 3 /* max_words_per_term */,
75 kMurmurHash3Seed,
73 &clock_)); 76 &clock_));
74 } 77 }
75 78
76 // Runs the TermFeatureExtractor on |page_text|, waiting for the 79 // Runs the TermFeatureExtractor on |page_text|, waiting for the
77 // completion callback. Returns the success boolean from the callback. 80 // completion callback. Returns the success boolean from the callback.
78 bool ExtractFeatures(const string16* page_text, FeatureMap* features) { 81 bool ExtractFeatures(const string16* page_text, FeatureMap* features) {
79 success_ = false; 82 success_ = false;
80 extractor_->ExtractFeatures( 83 extractor_->ExtractFeatures(
81 page_text, 84 page_text,
82 features, 85 features,
(...skipping 22 matching lines...) Expand all
105 108
106 void QuitExtraction() { 109 void QuitExtraction() {
107 extractor_->CancelPendingExtraction(); 110 extractor_->CancelPendingExtraction();
108 msg_loop_.Quit(); 111 msg_loop_.Quit();
109 } 112 }
110 113
111 MessageLoop msg_loop_; 114 MessageLoop msg_loop_;
112 MockFeatureExtractorClock clock_; 115 MockFeatureExtractorClock clock_;
113 scoped_ptr<PhishingTermFeatureExtractor> extractor_; 116 scoped_ptr<PhishingTermFeatureExtractor> extractor_;
114 base::hash_set<std::string> term_hashes_; 117 base::hash_set<std::string> term_hashes_;
115 base::hash_set<std::string> word_hashes_; 118 base::hash_set<uint32> word_hashes_;
116 bool success_; // holds the success value from ExtractFeatures 119 bool success_; // holds the success value from ExtractFeatures
117 }; 120 };
118 121
119 TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) { 122 TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) {
120 // This test doesn't exercise the extraction timing. 123 // This test doesn't exercise the extraction timing.
121 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); 124 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
122 125
123 string16 page_text = ASCIIToUTF16("blah"); 126 string16 page_text = ASCIIToUTF16("blah");
124 FeatureMap expected_features; // initially empty 127 FeatureMap expected_features; // initially empty
125 128
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after
301 // Now extract normally and make sure nothing breaks. 304 // Now extract normally and make sure nothing breaks.
302 EXPECT_TRUE(ExtractFeatures(page_text.get(), &features)); 305 EXPECT_TRUE(ExtractFeatures(page_text.get(), &features));
303 306
304 FeatureMap expected_features; 307 FeatureMap expected_features;
305 expected_features.AddBooleanFeature(features::kPageTerm + 308 expected_features.AddBooleanFeature(features::kPageTerm +
306 std::string("multi word test")); 309 std::string("multi word test"));
307 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); 310 EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
308 } 311 }
309 312
310 } // namespace safe_browsing 313 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc ('k') | chrome/renderer/safe_browsing/scorer.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698