Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(61)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc

Issue 1548153002: Switch to standard integer types in chrome/. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h" 5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h"
6 6
7 #include <stddef.h>
8 #include <stdint.h>
9
7 #include <string> 10 #include <string>
8 11
9 #include "base/bind.h" 12 #include "base/bind.h"
10 #include "base/callback.h" 13 #include "base/callback.h"
11 #include "base/containers/hash_tables.h" 14 #include "base/containers/hash_tables.h"
12 #include "base/location.h" 15 #include "base/location.h"
13 #include "base/memory/scoped_ptr.h" 16 #include "base/memory/scoped_ptr.h"
14 #include "base/message_loop/message_loop.h" 17 #include "base/message_loop/message_loop.h"
15 #include "base/single_thread_task_runner.h" 18 #include "base/single_thread_task_runner.h"
16 #include "base/strings/string16.h" 19 #include "base/strings/string16.h"
17 #include "base/strings/stringprintf.h" 20 #include "base/strings/stringprintf.h"
18 #include "base/strings/utf_string_conversions.h" 21 #include "base/strings/utf_string_conversions.h"
19 #include "base/time/time.h" 22 #include "base/time/time.h"
23 #include "build/build_config.h"
20 #include "chrome/renderer/safe_browsing/features.h" 24 #include "chrome/renderer/safe_browsing/features.h"
21 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" 25 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
22 #include "chrome/renderer/safe_browsing/murmurhash3_util.h" 26 #include "chrome/renderer/safe_browsing/murmurhash3_util.h"
23 #include "chrome/renderer/safe_browsing/test_utils.h" 27 #include "chrome/renderer/safe_browsing/test_utils.h"
24 #include "crypto/sha2.h" 28 #include "crypto/sha2.h"
25 #include "testing/gmock/include/gmock/gmock.h" 29 #include "testing/gmock/include/gmock/gmock.h"
26 #include "testing/gtest/include/gtest/gtest.h" 30 #include "testing/gtest/include/gtest/gtest.h"
27 31
28 using base::ASCIIToUTF16; 32 using base::ASCIIToUTF16;
29 using ::testing::Return; 33 using ::testing::Return;
30 34
31 35 static const uint32_t kMurmurHash3Seed = 2777808611U;
32 static const uint32 kMurmurHash3Seed = 2777808611U;
33 36
34 namespace safe_browsing { 37 namespace safe_browsing {
35 38
36 class PhishingTermFeatureExtractorTest : public ::testing::Test { 39 class PhishingTermFeatureExtractorTest : public ::testing::Test {
37 protected: 40 protected:
38 void SetUp() override { 41 void SetUp() override {
39 base::hash_set<std::string> terms; 42 base::hash_set<std::string> terms;
40 terms.insert("one"); 43 terms.insert("one");
41 terms.insert("one one"); 44 terms.insert("one one");
42 terms.insert("two"); 45 terms.insert("two");
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
84 kMurmurHash3Seed, 87 kMurmurHash3Seed,
85 max_shingles_per_page, 88 max_shingles_per_page,
86 4 /* shingle_size */, 89 4 /* shingle_size */,
87 &clock_)); 90 &clock_));
88 } 91 }
89 92
90 // Runs the TermFeatureExtractor on |page_text|, waiting for the 93 // Runs the TermFeatureExtractor on |page_text|, waiting for the
91 // completion callback. Returns the success boolean from the callback. 94 // completion callback. Returns the success boolean from the callback.
92 bool ExtractFeatures(const base::string16* page_text, 95 bool ExtractFeatures(const base::string16* page_text,
93 FeatureMap* features, 96 FeatureMap* features,
94 std::set<uint32>* shingle_hashes) { 97 std::set<uint32_t>* shingle_hashes) {
95 success_ = false; 98 success_ = false;
96 extractor_->ExtractFeatures( 99 extractor_->ExtractFeatures(
97 page_text, 100 page_text,
98 features, 101 features,
99 shingle_hashes, 102 shingle_hashes,
100 base::Bind(&PhishingTermFeatureExtractorTest::ExtractionDone, 103 base::Bind(&PhishingTermFeatureExtractorTest::ExtractionDone,
101 base::Unretained(this))); 104 base::Unretained(this)));
102 msg_loop_.Run(); 105 msg_loop_.Run();
103 return success_; 106 return success_;
104 } 107 }
105 108
106 void PartialExtractFeatures(const base::string16* page_text, 109 void PartialExtractFeatures(const base::string16* page_text,
107 FeatureMap* features, 110 FeatureMap* features,
108 std::set<uint32>* shingle_hashes) { 111 std::set<uint32_t>* shingle_hashes) {
109 extractor_->ExtractFeatures( 112 extractor_->ExtractFeatures(
110 page_text, 113 page_text,
111 features, 114 features,
112 shingle_hashes, 115 shingle_hashes,
113 base::Bind(&PhishingTermFeatureExtractorTest::ExtractionDone, 116 base::Bind(&PhishingTermFeatureExtractorTest::ExtractionDone,
114 base::Unretained(this))); 117 base::Unretained(this)));
115 msg_loop_.task_runner()->PostTask( 118 msg_loop_.task_runner()->PostTask(
116 FROM_HERE, base::Bind(&PhishingTermFeatureExtractorTest::QuitExtraction, 119 FROM_HERE, base::Bind(&PhishingTermFeatureExtractorTest::QuitExtraction,
117 base::Unretained(this))); 120 base::Unretained(this)));
118 msg_loop_.RunUntilIdle(); 121 msg_loop_.RunUntilIdle();
119 } 122 }
120 123
121 // Completion callback for feature extraction. 124 // Completion callback for feature extraction.
122 void ExtractionDone(bool success) { 125 void ExtractionDone(bool success) {
123 success_ = success; 126 success_ = success;
124 msg_loop_.QuitWhenIdle(); 127 msg_loop_.QuitWhenIdle();
125 } 128 }
126 129
127 void QuitExtraction() { 130 void QuitExtraction() {
128 extractor_->CancelPendingExtraction(); 131 extractor_->CancelPendingExtraction();
129 msg_loop_.QuitWhenIdle(); 132 msg_loop_.QuitWhenIdle();
130 } 133 }
131 134
132 base::MessageLoop msg_loop_; 135 base::MessageLoop msg_loop_;
133 MockFeatureExtractorClock clock_; 136 MockFeatureExtractorClock clock_;
134 scoped_ptr<PhishingTermFeatureExtractor> extractor_; 137 scoped_ptr<PhishingTermFeatureExtractor> extractor_;
135 base::hash_set<std::string> term_hashes_; 138 base::hash_set<std::string> term_hashes_;
136 base::hash_set<uint32> word_hashes_; 139 base::hash_set<uint32_t> word_hashes_;
137 bool success_; // holds the success value from ExtractFeatures 140 bool success_; // holds the success value from ExtractFeatures
138 }; 141 };
139 142
140 TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) { 143 TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) {
141 // This test doesn't exercise the extraction timing. 144 // This test doesn't exercise the extraction timing.
142 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); 145 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
143 146
144 base::string16 page_text = ASCIIToUTF16("blah"); 147 base::string16 page_text = ASCIIToUTF16("blah");
145 FeatureMap expected_features; // initially empty 148 FeatureMap expected_features; // initially empty
146 std::set<uint32> expected_shingle_hashes; 149 std::set<uint32_t> expected_shingle_hashes;
147 150
148 FeatureMap features; 151 FeatureMap features;
149 std::set<uint32> shingle_hashes; 152 std::set<uint32_t> shingle_hashes;
150 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); 153 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes));
151 ExpectFeatureMapsAreEqual(features, expected_features); 154 ExpectFeatureMapsAreEqual(features, expected_features);
152 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); 155 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes));
153 156
154 page_text = ASCIIToUTF16("one one"); 157 page_text = ASCIIToUTF16("one one");
155 expected_features.Clear(); 158 expected_features.Clear();
156 expected_features.AddBooleanFeature(features::kPageTerm + 159 expected_features.AddBooleanFeature(features::kPageTerm +
157 std::string("one")); 160 std::string("one"));
158 expected_features.AddBooleanFeature(features::kPageTerm + 161 expected_features.AddBooleanFeature(features::kPageTerm +
159 std::string("one one")); 162 std::string("one one"));
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
232 expected_features.Clear(); 235 expected_features.Clear();
233 expected_shingle_hashes.clear(); 236 expected_shingle_hashes.clear();
234 expected_shingle_hashes.insert(MurmurHash3String("this page has way ", 237 expected_shingle_hashes.insert(MurmurHash3String("this page has way ",
235 kMurmurHash3Seed)); 238 kMurmurHash3Seed));
236 expected_shingle_hashes.insert(MurmurHash3String("page has way too ", 239 expected_shingle_hashes.insert(MurmurHash3String("page has way too ",
237 kMurmurHash3Seed)); 240 kMurmurHash3Seed));
238 expected_shingle_hashes.insert(MurmurHash3String("has way too many ", 241 expected_shingle_hashes.insert(MurmurHash3String("has way too many ",
239 kMurmurHash3Seed)); 242 kMurmurHash3Seed));
240 expected_shingle_hashes.insert(MurmurHash3String("way too many words ", 243 expected_shingle_hashes.insert(MurmurHash3String("way too many words ",
241 kMurmurHash3Seed)); 244 kMurmurHash3Seed));
242 std::set<uint32>::iterator it = expected_shingle_hashes.end(); 245 std::set<uint32_t>::iterator it = expected_shingle_hashes.end();
243 expected_shingle_hashes.erase(--it); 246 expected_shingle_hashes.erase(--it);
244 247
245 features.Clear(); 248 features.Clear();
246 shingle_hashes.clear(); 249 shingle_hashes.clear();
247 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); 250 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes));
248 ExpectFeatureMapsAreEqual(features, expected_features); 251 ExpectFeatureMapsAreEqual(features, expected_features);
249 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); 252 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes));
250 253
251 // Test with empty page text. 254 // Test with empty page text.
252 page_text = base::string16(); 255 page_text = base::string16();
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
324 // Time check after the next 5 words. 327 // Time check after the next 5 words.
325 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(28))) 328 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(28)))
326 // A final check for the histograms. 329 // A final check for the histograms.
327 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30))); 330 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30)));
328 331
329 FeatureMap expected_features; 332 FeatureMap expected_features;
330 expected_features.AddBooleanFeature(features::kPageTerm + 333 expected_features.AddBooleanFeature(features::kPageTerm +
331 std::string("one")); 334 std::string("one"));
332 expected_features.AddBooleanFeature(features::kPageTerm + 335 expected_features.AddBooleanFeature(features::kPageTerm +
333 std::string("two")); 336 std::string("two"));
334 std::set<uint32> expected_shingle_hashes; 337 std::set<uint32_t> expected_shingle_hashes;
335 expected_shingle_hashes.insert( 338 expected_shingle_hashes.insert(
336 MurmurHash3String("one 0 1 2 ", kMurmurHash3Seed)); 339 MurmurHash3String("one 0 1 2 ", kMurmurHash3Seed));
337 expected_shingle_hashes.insert( 340 expected_shingle_hashes.insert(
338 MurmurHash3String("0 1 2 3 ", kMurmurHash3Seed)); 341 MurmurHash3String("0 1 2 3 ", kMurmurHash3Seed));
339 expected_shingle_hashes.insert( 342 expected_shingle_hashes.insert(
340 MurmurHash3String("1 2 3 4 ", kMurmurHash3Seed)); 343 MurmurHash3String("1 2 3 4 ", kMurmurHash3Seed));
341 expected_shingle_hashes.insert( 344 expected_shingle_hashes.insert(
342 MurmurHash3String("2 3 4 5 ", kMurmurHash3Seed)); 345 MurmurHash3String("2 3 4 5 ", kMurmurHash3Seed));
343 expected_shingle_hashes.insert( 346 expected_shingle_hashes.insert(
344 MurmurHash3String("3 4 5 6 ", kMurmurHash3Seed)); 347 MurmurHash3String("3 4 5 6 ", kMurmurHash3Seed));
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
381 expected_shingle_hashes.insert( 384 expected_shingle_hashes.insert(
382 MurmurHash3String("22 23 24 25 ", kMurmurHash3Seed)); 385 MurmurHash3String("22 23 24 25 ", kMurmurHash3Seed));
383 expected_shingle_hashes.insert( 386 expected_shingle_hashes.insert(
384 MurmurHash3String("23 24 25 26 ", kMurmurHash3Seed)); 387 MurmurHash3String("23 24 25 26 ", kMurmurHash3Seed));
385 expected_shingle_hashes.insert( 388 expected_shingle_hashes.insert(
386 MurmurHash3String("24 25 26 27 ", kMurmurHash3Seed)); 389 MurmurHash3String("24 25 26 27 ", kMurmurHash3Seed));
387 expected_shingle_hashes.insert( 390 expected_shingle_hashes.insert(
388 MurmurHash3String("25 26 27 two ", kMurmurHash3Seed)); 391 MurmurHash3String("25 26 27 two ", kMurmurHash3Seed));
389 392
390 FeatureMap features; 393 FeatureMap features;
391 std::set<uint32> shingle_hashes; 394 std::set<uint32_t> shingle_hashes;
392 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); 395 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes));
393 ExpectFeatureMapsAreEqual(features, expected_features); 396 ExpectFeatureMapsAreEqual(features, expected_features);
394 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); 397 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes));
395 // Make sure none of the mock expectations carry over to the next test. 398 // Make sure none of the mock expectations carry over to the next test.
396 ::testing::Mock::VerifyAndClearExpectations(&clock_); 399 ::testing::Mock::VerifyAndClearExpectations(&clock_);
397 400
398 // Now repeat the test with the same text, but advance the clock faster so 401 // Now repeat the test with the same text, but advance the clock faster so
399 // that the extraction time exceeds the maximum total time for the feature 402 // that the extraction time exceeds the maximum total time for the feature
400 // extractor. Extraction should fail. Note that this assumes 403 // extractor. Extraction should fail. Note that this assumes
401 // kMaxTotalTimeMs = 500. 404 // kMaxTotalTimeMs = 500.
(...skipping 29 matching lines...) Expand all
431 .WillOnce(Return(now)) 434 .WillOnce(Return(now))
432 // Time check at the start of the first chunk of work. 435 // Time check at the start of the first chunk of work.
433 .WillOnce(Return(now)) 436 .WillOnce(Return(now))
434 // Time check after the first 5 words. 437 // Time check after the first 5 words.
435 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(7))) 438 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(7)))
436 // Time check after the next 5 words. This should be greater than 439 // Time check after the next 5 words. This should be greater than
437 // kMaxTimePerChunkMs so that we stop and schedule extraction for later. 440 // kMaxTimePerChunkMs so that we stop and schedule extraction for later.
438 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(14))); 441 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(14)));
439 442
440 FeatureMap features; 443 FeatureMap features;
441 std::set<uint32> shingle_hashes; 444 std::set<uint32_t> shingle_hashes;
442 // Extract first 10 words then stop. 445 // Extract first 10 words then stop.
443 PartialExtractFeatures(page_text.get(), &features, &shingle_hashes); 446 PartialExtractFeatures(page_text.get(), &features, &shingle_hashes);
444 447
445 page_text.reset(new base::string16()); 448 page_text.reset(new base::string16());
446 for (int i = 30; i < 58; ++i) { 449 for (int i = 30; i < 58; ++i) {
447 page_text->append(ASCIIToUTF16(base::StringPrintf("%d ", i))); 450 page_text->append(ASCIIToUTF16(base::StringPrintf("%d ", i)));
448 } 451 }
449 page_text->append(ASCIIToUTF16("multi word test ")); 452 page_text->append(ASCIIToUTF16("multi word test "));
450 features.Clear(); 453 features.Clear();
451 shingle_hashes.clear(); 454 shingle_hashes.clear();
452 455
453 // This part doesn't exercise the extraction timing. 456 // This part doesn't exercise the extraction timing.
454 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); 457 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
455 458
456 // Now extract normally and make sure nothing breaks. 459 // Now extract normally and make sure nothing breaks.
457 EXPECT_TRUE(ExtractFeatures(page_text.get(), &features, &shingle_hashes)); 460 EXPECT_TRUE(ExtractFeatures(page_text.get(), &features, &shingle_hashes));
458 461
459 FeatureMap expected_features; 462 FeatureMap expected_features;
460 expected_features.AddBooleanFeature(features::kPageTerm + 463 expected_features.AddBooleanFeature(features::kPageTerm +
461 std::string("multi word test")); 464 std::string("multi word test"));
462 ExpectFeatureMapsAreEqual(features, expected_features); 465 ExpectFeatureMapsAreEqual(features, expected_features);
463 } 466 }
464 467
465 } // namespace safe_browsing 468 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698