OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h" |
6 | 6 |
7 #include <string> | 7 #include <string> |
8 | 8 |
9 #include "base/bind.h" | 9 #include "base/bind.h" |
10 #include "base/callback.h" | 10 #include "base/callback.h" |
11 #include "base/hash_tables.h" | 11 #include "base/hash_tables.h" |
12 #include "base/memory/scoped_ptr.h" | 12 #include "base/memory/scoped_ptr.h" |
13 #include "base/message_loop.h" | 13 #include "base/message_loop.h" |
14 #include "base/string16.h" | 14 #include "base/string16.h" |
15 #include "base/stringprintf.h" | 15 #include "base/stringprintf.h" |
16 #include "base/time.h" | 16 #include "base/time.h" |
17 #include "base/utf_string_conversions.h" | 17 #include "base/utf_string_conversions.h" |
18 #include "crypto/sha2.h" | 18 #include "crypto/sha2.h" |
19 #include "chrome/renderer/safe_browsing/features.h" | 19 #include "chrome/renderer/safe_browsing/features.h" |
20 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" | 20 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" |
| 21 #include "chrome/renderer/safe_browsing/murmurhash3_util.h" |
21 #include "testing/gmock/include/gmock/gmock.h" | 22 #include "testing/gmock/include/gmock/gmock.h" |
22 #include "testing/gtest/include/gtest/gtest.h" | 23 #include "testing/gtest/include/gtest/gtest.h" |
23 | 24 |
24 using ::testing::ContainerEq; | 25 using ::testing::ContainerEq; |
25 using ::testing::Return; | 26 using ::testing::Return; |
26 | 27 |
27 namespace safe_browsing { | 28 namespace safe_browsing { |
28 | 29 |
29 class PhishingTermFeatureExtractorTest : public ::testing::Test { | 30 class PhishingTermFeatureExtractorTest : public ::testing::Test { |
30 protected: | 31 protected: |
(...skipping 23 matching lines...) Expand all Loading... |
54 words.insert("multi"); | 55 words.insert("multi"); |
55 words.insert("word"); | 56 words.insert("word"); |
56 words.insert("test"); | 57 words.insert("test"); |
57 words.insert("capitalization"); | 58 words.insert("capitalization"); |
58 words.insert("space"); | 59 words.insert("space"); |
59 words.insert("separator"); | 60 words.insert("separator"); |
60 words.insert("punctuation"); | 61 words.insert("punctuation"); |
61 words.insert("\xe4\xbd\xa0\xe5\xa5\xbd"); | 62 words.insert("\xe4\xbd\xa0\xe5\xa5\xbd"); |
62 words.insert("\xe5\x86\x8d\xe8\xa7\x81"); | 63 words.insert("\xe5\x86\x8d\xe8\xa7\x81"); |
63 | 64 |
| 65 static const uint32 kMurmurHash3Seed = 2777808611U; |
64 for (base::hash_set<std::string>::iterator it = words.begin(); | 66 for (base::hash_set<std::string>::iterator it = words.begin(); |
65 it != words.end(); ++it) { | 67 it != words.end(); ++it) { |
66 word_hashes_.insert(crypto::SHA256HashString(*it)); | 68 word_hashes_.insert(MurmurHash3String(*it, kMurmurHash3Seed)); |
67 } | 69 } |
68 | 70 |
69 extractor_.reset(new PhishingTermFeatureExtractor( | 71 extractor_.reset(new PhishingTermFeatureExtractor( |
70 &term_hashes_, | 72 &term_hashes_, |
71 &word_hashes_, | 73 &word_hashes_, |
72 3 /* max_words_per_term */, | 74 3 /* max_words_per_term */, |
| 75 kMurmurHash3Seed, |
73 &clock_)); | 76 &clock_)); |
74 } | 77 } |
75 | 78 |
76 // Runs the TermFeatureExtractor on |page_text|, waiting for the | 79 // Runs the TermFeatureExtractor on |page_text|, waiting for the |
77 // completion callback. Returns the success boolean from the callback. | 80 // completion callback. Returns the success boolean from the callback. |
78 bool ExtractFeatures(const string16* page_text, FeatureMap* features) { | 81 bool ExtractFeatures(const string16* page_text, FeatureMap* features) { |
79 success_ = false; | 82 success_ = false; |
80 extractor_->ExtractFeatures( | 83 extractor_->ExtractFeatures( |
81 page_text, | 84 page_text, |
82 features, | 85 features, |
(...skipping 22 matching lines...) Expand all Loading... |
105 | 108 |
106 void QuitExtraction() { | 109 void QuitExtraction() { |
107 extractor_->CancelPendingExtraction(); | 110 extractor_->CancelPendingExtraction(); |
108 msg_loop_.Quit(); | 111 msg_loop_.Quit(); |
109 } | 112 } |
110 | 113 |
111 MessageLoop msg_loop_; | 114 MessageLoop msg_loop_; |
112 MockFeatureExtractorClock clock_; | 115 MockFeatureExtractorClock clock_; |
113 scoped_ptr<PhishingTermFeatureExtractor> extractor_; | 116 scoped_ptr<PhishingTermFeatureExtractor> extractor_; |
114 base::hash_set<std::string> term_hashes_; | 117 base::hash_set<std::string> term_hashes_; |
115 base::hash_set<std::string> word_hashes_; | 118 base::hash_set<uint32> word_hashes_; |
116 bool success_; // holds the success value from ExtractFeatures | 119 bool success_; // holds the success value from ExtractFeatures |
117 }; | 120 }; |
118 | 121 |
119 TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) { | 122 TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) { |
120 // This test doesn't exercise the extraction timing. | 123 // This test doesn't exercise the extraction timing. |
121 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); | 124 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); |
122 | 125 |
123 string16 page_text = ASCIIToUTF16("blah"); | 126 string16 page_text = ASCIIToUTF16("blah"); |
124 FeatureMap expected_features; // initially empty | 127 FeatureMap expected_features; // initially empty |
125 | 128 |
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
301 // Now extract normally and make sure nothing breaks. | 304 // Now extract normally and make sure nothing breaks. |
302 EXPECT_TRUE(ExtractFeatures(page_text.get(), &features)); | 305 EXPECT_TRUE(ExtractFeatures(page_text.get(), &features)); |
303 | 306 |
304 FeatureMap expected_features; | 307 FeatureMap expected_features; |
305 expected_features.AddBooleanFeature(features::kPageTerm + | 308 expected_features.AddBooleanFeature(features::kPageTerm + |
306 std::string("multi word test")); | 309 std::string("multi word test")); |
307 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); | 310 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
308 } | 311 } |
309 | 312 |
310 } // namespace safe_browsing | 313 } // namespace safe_browsing |
OLD | NEW |