| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h" |
| 6 | 6 |
| 7 #include <string> | 7 #include <string> |
| 8 | 8 |
| 9 #include "base/bind.h" | 9 #include "base/bind.h" |
| 10 #include "base/callback.h" | 10 #include "base/callback.h" |
| 11 #include "base/hash_tables.h" | 11 #include "base/hash_tables.h" |
| 12 #include "base/memory/scoped_ptr.h" | 12 #include "base/memory/scoped_ptr.h" |
| 13 #include "base/message_loop.h" | 13 #include "base/message_loop.h" |
| 14 #include "base/string16.h" | 14 #include "base/string16.h" |
| 15 #include "base/stringprintf.h" | 15 #include "base/stringprintf.h" |
| 16 #include "base/time.h" | 16 #include "base/time.h" |
| 17 #include "base/utf_string_conversions.h" | 17 #include "base/utf_string_conversions.h" |
| 18 #include "crypto/sha2.h" | 18 #include "crypto/sha2.h" |
| 19 #include "chrome/renderer/safe_browsing/features.h" | 19 #include "chrome/renderer/safe_browsing/features.h" |
| 20 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" | 20 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" |
| 21 #include "chrome/renderer/safe_browsing/murmurhash3_util.h" |
| 21 #include "testing/gmock/include/gmock/gmock.h" | 22 #include "testing/gmock/include/gmock/gmock.h" |
| 22 #include "testing/gtest/include/gtest/gtest.h" | 23 #include "testing/gtest/include/gtest/gtest.h" |
| 23 | 24 |
| 24 using ::testing::ContainerEq; | 25 using ::testing::ContainerEq; |
| 25 using ::testing::Return; | 26 using ::testing::Return; |
| 26 | 27 |
| 27 namespace safe_browsing { | 28 namespace safe_browsing { |
| 28 | 29 |
| 29 class PhishingTermFeatureExtractorTest : public ::testing::Test { | 30 class PhishingTermFeatureExtractorTest : public ::testing::Test { |
| 30 protected: | 31 protected: |
| (...skipping 23 matching lines...) Expand all Loading... |
| 54 words.insert("multi"); | 55 words.insert("multi"); |
| 55 words.insert("word"); | 56 words.insert("word"); |
| 56 words.insert("test"); | 57 words.insert("test"); |
| 57 words.insert("capitalization"); | 58 words.insert("capitalization"); |
| 58 words.insert("space"); | 59 words.insert("space"); |
| 59 words.insert("separator"); | 60 words.insert("separator"); |
| 60 words.insert("punctuation"); | 61 words.insert("punctuation"); |
| 61 words.insert("\xe4\xbd\xa0\xe5\xa5\xbd"); | 62 words.insert("\xe4\xbd\xa0\xe5\xa5\xbd"); |
| 62 words.insert("\xe5\x86\x8d\xe8\xa7\x81"); | 63 words.insert("\xe5\x86\x8d\xe8\xa7\x81"); |
| 63 | 64 |
| 65 static const uint32 kMurmurHash3Seed = 2777808611U; |
| 64 for (base::hash_set<std::string>::iterator it = words.begin(); | 66 for (base::hash_set<std::string>::iterator it = words.begin(); |
| 65 it != words.end(); ++it) { | 67 it != words.end(); ++it) { |
| 66 word_hashes_.insert(crypto::SHA256HashString(*it)); | 68 word_hashes_.insert(MurmurHash3String(*it, kMurmurHash3Seed)); |
| 67 } | 69 } |
| 68 | 70 |
| 69 extractor_.reset(new PhishingTermFeatureExtractor( | 71 extractor_.reset(new PhishingTermFeatureExtractor( |
| 70 &term_hashes_, | 72 &term_hashes_, |
| 71 &word_hashes_, | 73 &word_hashes_, |
| 72 3 /* max_words_per_term */, | 74 3 /* max_words_per_term */, |
| 75 kMurmurHash3Seed, |
| 73 &clock_)); | 76 &clock_)); |
| 74 } | 77 } |
| 75 | 78 |
| 76 // Runs the TermFeatureExtractor on |page_text|, waiting for the | 79 // Runs the TermFeatureExtractor on |page_text|, waiting for the |
| 77 // completion callback. Returns the success boolean from the callback. | 80 // completion callback. Returns the success boolean from the callback. |
| 78 bool ExtractFeatures(const string16* page_text, FeatureMap* features) { | 81 bool ExtractFeatures(const string16* page_text, FeatureMap* features) { |
| 79 success_ = false; | 82 success_ = false; |
| 80 extractor_->ExtractFeatures( | 83 extractor_->ExtractFeatures( |
| 81 page_text, | 84 page_text, |
| 82 features, | 85 features, |
| (...skipping 22 matching lines...) Expand all Loading... |
| 105 | 108 |
| 106 void QuitExtraction() { | 109 void QuitExtraction() { |
| 107 extractor_->CancelPendingExtraction(); | 110 extractor_->CancelPendingExtraction(); |
| 108 msg_loop_.Quit(); | 111 msg_loop_.Quit(); |
| 109 } | 112 } |
| 110 | 113 |
| 111 MessageLoop msg_loop_; | 114 MessageLoop msg_loop_; |
| 112 MockFeatureExtractorClock clock_; | 115 MockFeatureExtractorClock clock_; |
| 113 scoped_ptr<PhishingTermFeatureExtractor> extractor_; | 116 scoped_ptr<PhishingTermFeatureExtractor> extractor_; |
| 114 base::hash_set<std::string> term_hashes_; | 117 base::hash_set<std::string> term_hashes_; |
| 115 base::hash_set<std::string> word_hashes_; | 118 base::hash_set<uint32> word_hashes_; |
| 116 bool success_; // holds the success value from ExtractFeatures | 119 bool success_; // holds the success value from ExtractFeatures |
| 117 }; | 120 }; |
| 118 | 121 |
| 119 TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) { | 122 TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) { |
| 120 // This test doesn't exercise the extraction timing. | 123 // This test doesn't exercise the extraction timing. |
| 121 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); | 124 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); |
| 122 | 125 |
| 123 string16 page_text = ASCIIToUTF16("blah"); | 126 string16 page_text = ASCIIToUTF16("blah"); |
| 124 FeatureMap expected_features; // initially empty | 127 FeatureMap expected_features; // initially empty |
| 125 | 128 |
| (...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 301 // Now extract normally and make sure nothing breaks. | 304 // Now extract normally and make sure nothing breaks. |
| 302 EXPECT_TRUE(ExtractFeatures(page_text.get(), &features)); | 305 EXPECT_TRUE(ExtractFeatures(page_text.get(), &features)); |
| 303 | 306 |
| 304 FeatureMap expected_features; | 307 FeatureMap expected_features; |
| 305 expected_features.AddBooleanFeature(features::kPageTerm + | 308 expected_features.AddBooleanFeature(features::kPageTerm + |
| 306 std::string("multi word test")); | 309 std::string("multi word test")); |
| 307 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); | 310 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
| 308 } | 311 } |
| 309 | 312 |
| 310 } // namespace safe_browsing | 313 } // namespace safe_browsing |
| OLD | NEW |