| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h" | |
| 6 | |
| 7 #include <stddef.h> | |
| 8 #include <stdint.h> | |
| 9 | |
| 10 #include <memory> | |
| 11 #include <string> | |
| 12 | |
| 13 #include "base/bind.h" | |
| 14 #include "base/callback.h" | |
| 15 #include "base/containers/hash_tables.h" | |
| 16 #include "base/location.h" | |
| 17 #include "base/message_loop/message_loop.h" | |
| 18 #include "base/run_loop.h" | |
| 19 #include "base/single_thread_task_runner.h" | |
| 20 #include "base/strings/string16.h" | |
| 21 #include "base/strings/stringprintf.h" | |
| 22 #include "base/strings/utf_string_conversions.h" | |
| 23 #include "base/time/time.h" | |
| 24 #include "build/build_config.h" | |
| 25 #include "chrome/renderer/safe_browsing/features.h" | |
| 26 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" | |
| 27 #include "chrome/renderer/safe_browsing/murmurhash3_util.h" | |
| 28 #include "chrome/renderer/safe_browsing/test_utils.h" | |
| 29 #include "crypto/sha2.h" | |
| 30 #include "testing/gmock/include/gmock/gmock.h" | |
| 31 #include "testing/gtest/include/gtest/gtest.h" | |
| 32 | |
| 33 using base::ASCIIToUTF16; | |
| 34 using ::testing::Return; | |
| 35 | |
| 36 static const uint32_t kMurmurHash3Seed = 2777808611U; | |
| 37 | |
| 38 namespace safe_browsing { | |
| 39 | |
| 40 class PhishingTermFeatureExtractorTest : public ::testing::Test { | |
| 41 protected: | |
| 42 void SetUp() override { | |
| 43 base::hash_set<std::string> terms; | |
| 44 terms.insert("one"); | |
| 45 terms.insert("one one"); | |
| 46 terms.insert("two"); | |
| 47 terms.insert("multi word test"); | |
| 48 terms.insert("capitalization"); | |
| 49 terms.insert("space"); | |
| 50 terms.insert("separator"); | |
| 51 terms.insert("punctuation"); | |
| 52 // Chinese (translation of "hello") | |
| 53 terms.insert("\xe4\xbd\xa0\xe5\xa5\xbd"); | |
| 54 // Chinese (translation of "goodbye") | |
| 55 terms.insert("\xe5\x86\x8d\xe8\xa7\x81"); | |
| 56 | |
| 57 for (base::hash_set<std::string>::iterator it = terms.begin(); | |
| 58 it != terms.end(); ++it) { | |
| 59 term_hashes_.insert(crypto::SHA256HashString(*it)); | |
| 60 } | |
| 61 | |
| 62 base::hash_set<std::string> words; | |
| 63 words.insert("one"); | |
| 64 words.insert("two"); | |
| 65 words.insert("multi"); | |
| 66 words.insert("word"); | |
| 67 words.insert("test"); | |
| 68 words.insert("capitalization"); | |
| 69 words.insert("space"); | |
| 70 words.insert("separator"); | |
| 71 words.insert("punctuation"); | |
| 72 words.insert("\xe4\xbd\xa0\xe5\xa5\xbd"); | |
| 73 words.insert("\xe5\x86\x8d\xe8\xa7\x81"); | |
| 74 | |
| 75 for (base::hash_set<std::string>::iterator it = words.begin(); | |
| 76 it != words.end(); ++it) { | |
| 77 word_hashes_.insert(MurmurHash3String(*it, kMurmurHash3Seed)); | |
| 78 } | |
| 79 | |
| 80 ResetExtractor(3 /* max shingles per page */); | |
| 81 } | |
| 82 | |
| 83 void ResetExtractor(size_t max_shingles_per_page) { | |
| 84 extractor_.reset(new PhishingTermFeatureExtractor( | |
| 85 &term_hashes_, | |
| 86 &word_hashes_, | |
| 87 3 /* max_words_per_term */, | |
| 88 kMurmurHash3Seed, | |
| 89 max_shingles_per_page, | |
| 90 4 /* shingle_size */, | |
| 91 &clock_)); | |
| 92 } | |
| 93 | |
| 94 // Runs the TermFeatureExtractor on |page_text|, waiting for the | |
| 95 // completion callback. Returns the success boolean from the callback. | |
| 96 bool ExtractFeatures(const base::string16* page_text, | |
| 97 FeatureMap* features, | |
| 98 std::set<uint32_t>* shingle_hashes) { | |
| 99 success_ = false; | |
| 100 extractor_->ExtractFeatures( | |
| 101 page_text, | |
| 102 features, | |
| 103 shingle_hashes, | |
| 104 base::Bind(&PhishingTermFeatureExtractorTest::ExtractionDone, | |
| 105 base::Unretained(this))); | |
| 106 base::RunLoop().Run(); | |
| 107 return success_; | |
| 108 } | |
| 109 | |
| 110 void PartialExtractFeatures(const base::string16* page_text, | |
| 111 FeatureMap* features, | |
| 112 std::set<uint32_t>* shingle_hashes) { | |
| 113 extractor_->ExtractFeatures( | |
| 114 page_text, | |
| 115 features, | |
| 116 shingle_hashes, | |
| 117 base::Bind(&PhishingTermFeatureExtractorTest::ExtractionDone, | |
| 118 base::Unretained(this))); | |
| 119 msg_loop_.task_runner()->PostTask( | |
| 120 FROM_HERE, base::Bind(&PhishingTermFeatureExtractorTest::QuitExtraction, | |
| 121 base::Unretained(this))); | |
| 122 base::RunLoop().RunUntilIdle(); | |
| 123 } | |
| 124 | |
| 125 // Completion callback for feature extraction. | |
| 126 void ExtractionDone(bool success) { | |
| 127 success_ = success; | |
| 128 msg_loop_.QuitWhenIdle(); | |
| 129 } | |
| 130 | |
| 131 void QuitExtraction() { | |
| 132 extractor_->CancelPendingExtraction(); | |
| 133 msg_loop_.QuitWhenIdle(); | |
| 134 } | |
| 135 | |
| 136 base::MessageLoop msg_loop_; | |
| 137 MockFeatureExtractorClock clock_; | |
| 138 std::unique_ptr<PhishingTermFeatureExtractor> extractor_; | |
| 139 base::hash_set<std::string> term_hashes_; | |
| 140 base::hash_set<uint32_t> word_hashes_; | |
| 141 bool success_; // holds the success value from ExtractFeatures | |
| 142 }; | |
| 143 | |
| 144 TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) { | |
| 145 // This test doesn't exercise the extraction timing. | |
| 146 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); | |
| 147 | |
| 148 base::string16 page_text = ASCIIToUTF16("blah"); | |
| 149 FeatureMap expected_features; // initially empty | |
| 150 std::set<uint32_t> expected_shingle_hashes; | |
| 151 | |
| 152 FeatureMap features; | |
| 153 std::set<uint32_t> shingle_hashes; | |
| 154 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); | |
| 155 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 156 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); | |
| 157 | |
| 158 page_text = ASCIIToUTF16("one one"); | |
| 159 expected_features.Clear(); | |
| 160 expected_features.AddBooleanFeature(features::kPageTerm + | |
| 161 std::string("one")); | |
| 162 expected_features.AddBooleanFeature(features::kPageTerm + | |
| 163 std::string("one one")); | |
| 164 expected_shingle_hashes.clear(); | |
| 165 | |
| 166 features.Clear(); | |
| 167 shingle_hashes.clear(); | |
| 168 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); | |
| 169 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 170 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); | |
| 171 | |
| 172 page_text = ASCIIToUTF16("bla bla multi word test bla"); | |
| 173 expected_features.Clear(); | |
| 174 expected_features.AddBooleanFeature(features::kPageTerm + | |
| 175 std::string("multi word test")); | |
| 176 expected_shingle_hashes.clear(); | |
| 177 expected_shingle_hashes.insert(MurmurHash3String("bla bla multi word ", | |
| 178 kMurmurHash3Seed)); | |
| 179 expected_shingle_hashes.insert(MurmurHash3String("bla multi word test ", | |
| 180 kMurmurHash3Seed)); | |
| 181 expected_shingle_hashes.insert(MurmurHash3String("multi word test bla ", | |
| 182 kMurmurHash3Seed)); | |
| 183 | |
| 184 features.Clear(); | |
| 185 shingle_hashes.clear(); | |
| 186 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); | |
| 187 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 188 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); | |
| 189 | |
| 190 // This text has all of the words for one of the terms, but they are | |
| 191 // not in the correct order. | |
| 192 page_text = ASCIIToUTF16("bla bla test word multi bla"); | |
| 193 expected_features.Clear(); | |
| 194 expected_shingle_hashes.clear(); | |
| 195 expected_shingle_hashes.insert(MurmurHash3String("bla bla test word ", | |
| 196 kMurmurHash3Seed)); | |
| 197 expected_shingle_hashes.insert(MurmurHash3String("bla test word multi ", | |
| 198 kMurmurHash3Seed)); | |
| 199 expected_shingle_hashes.insert(MurmurHash3String("test word multi bla ", | |
| 200 kMurmurHash3Seed)); | |
| 201 | |
| 202 features.Clear(); | |
| 203 shingle_hashes.clear(); | |
| 204 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); | |
| 205 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 206 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); | |
| 207 | |
| 208 // Test various separators. | |
| 209 page_text = ASCIIToUTF16("Capitalization plus non-space\n" | |
| 210 "separator... punctuation!"); | |
| 211 expected_features.Clear(); | |
| 212 expected_features.AddBooleanFeature(features::kPageTerm + | |
| 213 std::string("capitalization")); | |
| 214 expected_features.AddBooleanFeature(features::kPageTerm + | |
| 215 std::string("space")); | |
| 216 expected_features.AddBooleanFeature(features::kPageTerm + | |
| 217 std::string("separator")); | |
| 218 expected_features.AddBooleanFeature(features::kPageTerm + | |
| 219 std::string("punctuation")); | |
| 220 expected_shingle_hashes.clear(); | |
| 221 expected_shingle_hashes.insert( | |
| 222 MurmurHash3String("capitalization plus non space ", kMurmurHash3Seed)); | |
| 223 expected_shingle_hashes.insert(MurmurHash3String("plus non space separator ", | |
| 224 kMurmurHash3Seed)); | |
| 225 expected_shingle_hashes.insert( | |
| 226 MurmurHash3String("non space separator punctuation ", kMurmurHash3Seed)); | |
| 227 | |
| 228 features.Clear(); | |
| 229 shingle_hashes.clear(); | |
| 230 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); | |
| 231 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 232 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); | |
| 233 | |
| 234 // Test a page with too many words and we should only 3 minimum hashes. | |
| 235 page_text = ASCIIToUTF16("This page has way too many words."); | |
| 236 expected_features.Clear(); | |
| 237 expected_shingle_hashes.clear(); | |
| 238 expected_shingle_hashes.insert(MurmurHash3String("this page has way ", | |
| 239 kMurmurHash3Seed)); | |
| 240 expected_shingle_hashes.insert(MurmurHash3String("page has way too ", | |
| 241 kMurmurHash3Seed)); | |
| 242 expected_shingle_hashes.insert(MurmurHash3String("has way too many ", | |
| 243 kMurmurHash3Seed)); | |
| 244 expected_shingle_hashes.insert(MurmurHash3String("way too many words ", | |
| 245 kMurmurHash3Seed)); | |
| 246 std::set<uint32_t>::iterator it = expected_shingle_hashes.end(); | |
| 247 expected_shingle_hashes.erase(--it); | |
| 248 | |
| 249 features.Clear(); | |
| 250 shingle_hashes.clear(); | |
| 251 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); | |
| 252 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 253 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); | |
| 254 | |
| 255 // Test with empty page text. | |
| 256 page_text = base::string16(); | |
| 257 expected_features.Clear(); | |
| 258 expected_shingle_hashes.clear(); | |
| 259 features.Clear(); | |
| 260 shingle_hashes.clear(); | |
| 261 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); | |
| 262 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 263 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); | |
| 264 | |
| 265 #if !defined(OS_ANDROID) | |
| 266 // The test code is disabled due to http://crbug.com/392234 | |
| 267 // The client-side detection feature is not enabled on Android yet. | |
| 268 // If we decided to enable the feature, we need to fix the bug first. | |
| 269 | |
| 270 // Chinese translation of the phrase "hello goodbye hello goodbye". This tests | |
| 271 // that we can correctly separate terms in languages that don't use spaces. | |
| 272 page_text = | |
| 273 base::UTF8ToUTF16("\xe4\xbd\xa0\xe5\xa5\xbd\xe5\x86\x8d\xe8\xa7\x81" | |
| 274 "\xe4\xbd\xa0\xe5\xa5\xbd\xe5\x86\x8d\xe8\xa7\x81"); | |
| 275 expected_features.Clear(); | |
| 276 expected_features.AddBooleanFeature( | |
| 277 features::kPageTerm + std::string("\xe4\xbd\xa0\xe5\xa5\xbd")); | |
| 278 expected_features.AddBooleanFeature( | |
| 279 features::kPageTerm + std::string("\xe5\x86\x8d\xe8\xa7\x81")); | |
| 280 expected_shingle_hashes.clear(); | |
| 281 expected_shingle_hashes.insert(MurmurHash3String( | |
| 282 "\xe4\xbd\xa0\xe5\xa5\xbd \xe5\x86\x8d\xe8\xa7\x81 " | |
| 283 "\xe4\xbd\xa0\xe5\xa5\xbd \xe5\x86\x8d\xe8\xa7\x81 ", kMurmurHash3Seed)); | |
| 284 | |
| 285 features.Clear(); | |
| 286 shingle_hashes.clear(); | |
| 287 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); | |
| 288 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 289 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); | |
| 290 #endif | |
| 291 } | |
| 292 | |
| 293 TEST_F(PhishingTermFeatureExtractorTest, Continuation) { | |
| 294 // For this test, we'll cause the feature extraction to run multiple | |
| 295 // iterations by incrementing the clock. | |
| 296 ResetExtractor(200 /* max shingles per page */); | |
| 297 | |
| 298 // This page has a total of 30 words. For the features to be computed | |
| 299 // correctly, the extractor has to process the entire string of text. | |
| 300 base::string16 page_text(ASCIIToUTF16("one ")); | |
| 301 for (int i = 0; i < 28; ++i) { | |
| 302 page_text.append(ASCIIToUTF16(base::StringPrintf("%d ", i))); | |
| 303 } | |
| 304 page_text.append(ASCIIToUTF16("two")); | |
| 305 | |
| 306 // Advance the clock 3 ms every 5 words processed, 10 ms between chunks. | |
| 307 // Note that this assumes kClockCheckGranularity = 5 and | |
| 308 // kMaxTimePerChunkMs = 10. | |
| 309 base::TimeTicks now = base::TimeTicks::Now(); | |
| 310 EXPECT_CALL(clock_, Now()) | |
| 311 // Time check at the start of extraction. | |
| 312 .WillOnce(Return(now)) | |
| 313 // Time check at the start of the first chunk of work. | |
| 314 .WillOnce(Return(now)) | |
| 315 // Time check after the first 5 words. | |
| 316 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(3))) | |
| 317 // Time check after the next 5 words. | |
| 318 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(6))) | |
| 319 // Time check after the next 5 words. | |
| 320 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(9))) | |
| 321 // Time check after the next 5 words. This is over the chunk | |
| 322 // time limit, so a continuation task will be posted. | |
| 323 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(12))) | |
| 324 // Time check at the start of the second chunk of work. | |
| 325 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(22))) | |
| 326 // Time check after the next 5 words. | |
| 327 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(25))) | |
| 328 // Time check after the next 5 words. | |
| 329 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(28))) | |
| 330 // A final check for the histograms. | |
| 331 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30))); | |
| 332 | |
| 333 FeatureMap expected_features; | |
| 334 expected_features.AddBooleanFeature(features::kPageTerm + | |
| 335 std::string("one")); | |
| 336 expected_features.AddBooleanFeature(features::kPageTerm + | |
| 337 std::string("two")); | |
| 338 std::set<uint32_t> expected_shingle_hashes; | |
| 339 expected_shingle_hashes.insert( | |
| 340 MurmurHash3String("one 0 1 2 ", kMurmurHash3Seed)); | |
| 341 expected_shingle_hashes.insert( | |
| 342 MurmurHash3String("0 1 2 3 ", kMurmurHash3Seed)); | |
| 343 expected_shingle_hashes.insert( | |
| 344 MurmurHash3String("1 2 3 4 ", kMurmurHash3Seed)); | |
| 345 expected_shingle_hashes.insert( | |
| 346 MurmurHash3String("2 3 4 5 ", kMurmurHash3Seed)); | |
| 347 expected_shingle_hashes.insert( | |
| 348 MurmurHash3String("3 4 5 6 ", kMurmurHash3Seed)); | |
| 349 expected_shingle_hashes.insert( | |
| 350 MurmurHash3String("4 5 6 7 ", kMurmurHash3Seed)); | |
| 351 expected_shingle_hashes.insert( | |
| 352 MurmurHash3String("5 6 7 8 ", kMurmurHash3Seed)); | |
| 353 expected_shingle_hashes.insert( | |
| 354 MurmurHash3String("6 7 8 9 ", kMurmurHash3Seed)); | |
| 355 expected_shingle_hashes.insert( | |
| 356 MurmurHash3String("7 8 9 10 ", kMurmurHash3Seed)); | |
| 357 expected_shingle_hashes.insert( | |
| 358 MurmurHash3String("8 9 10 11 ", kMurmurHash3Seed)); | |
| 359 expected_shingle_hashes.insert( | |
| 360 MurmurHash3String("9 10 11 12 ", kMurmurHash3Seed)); | |
| 361 expected_shingle_hashes.insert( | |
| 362 MurmurHash3String("10 11 12 13 ", kMurmurHash3Seed)); | |
| 363 expected_shingle_hashes.insert( | |
| 364 MurmurHash3String("11 12 13 14 ", kMurmurHash3Seed)); | |
| 365 expected_shingle_hashes.insert( | |
| 366 MurmurHash3String("12 13 14 15 ", kMurmurHash3Seed)); | |
| 367 expected_shingle_hashes.insert( | |
| 368 MurmurHash3String("13 14 15 16 ", kMurmurHash3Seed)); | |
| 369 expected_shingle_hashes.insert( | |
| 370 MurmurHash3String("14 15 16 17 ", kMurmurHash3Seed)); | |
| 371 expected_shingle_hashes.insert( | |
| 372 MurmurHash3String("15 16 17 18 ", kMurmurHash3Seed)); | |
| 373 expected_shingle_hashes.insert( | |
| 374 MurmurHash3String("16 17 18 19 ", kMurmurHash3Seed)); | |
| 375 expected_shingle_hashes.insert( | |
| 376 MurmurHash3String("17 18 19 20 ", kMurmurHash3Seed)); | |
| 377 expected_shingle_hashes.insert( | |
| 378 MurmurHash3String("18 19 20 21 ", kMurmurHash3Seed)); | |
| 379 expected_shingle_hashes.insert( | |
| 380 MurmurHash3String("19 20 21 22 ", kMurmurHash3Seed)); | |
| 381 expected_shingle_hashes.insert( | |
| 382 MurmurHash3String("20 21 22 23 ", kMurmurHash3Seed)); | |
| 383 expected_shingle_hashes.insert( | |
| 384 MurmurHash3String("21 22 23 24 ", kMurmurHash3Seed)); | |
| 385 expected_shingle_hashes.insert( | |
| 386 MurmurHash3String("22 23 24 25 ", kMurmurHash3Seed)); | |
| 387 expected_shingle_hashes.insert( | |
| 388 MurmurHash3String("23 24 25 26 ", kMurmurHash3Seed)); | |
| 389 expected_shingle_hashes.insert( | |
| 390 MurmurHash3String("24 25 26 27 ", kMurmurHash3Seed)); | |
| 391 expected_shingle_hashes.insert( | |
| 392 MurmurHash3String("25 26 27 two ", kMurmurHash3Seed)); | |
| 393 | |
| 394 FeatureMap features; | |
| 395 std::set<uint32_t> shingle_hashes; | |
| 396 ASSERT_TRUE(ExtractFeatures(&page_text, &features, &shingle_hashes)); | |
| 397 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 398 EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); | |
| 399 // Make sure none of the mock expectations carry over to the next test. | |
| 400 ::testing::Mock::VerifyAndClearExpectations(&clock_); | |
| 401 | |
| 402 // Now repeat the test with the same text, but advance the clock faster so | |
| 403 // that the extraction time exceeds the maximum total time for the feature | |
| 404 // extractor. Extraction should fail. Note that this assumes | |
| 405 // kMaxTotalTimeMs = 500. | |
| 406 EXPECT_CALL(clock_, Now()) | |
| 407 // Time check at the start of extraction. | |
| 408 .WillOnce(Return(now)) | |
| 409 // Time check at the start of the first chunk of work. | |
| 410 .WillOnce(Return(now)) | |
| 411 // Time check after the first 5 words, | |
| 412 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(300))) | |
| 413 // Time check at the start of the second chunk of work. | |
| 414 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(350))) | |
| 415 // Time check after the next 5 words. This is over the limit. | |
| 416 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(600))) | |
| 417 // A final time check for the histograms. | |
| 418 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(620))); | |
| 419 | |
| 420 features.Clear(); | |
| 421 shingle_hashes.clear(); | |
| 422 EXPECT_FALSE(ExtractFeatures(&page_text, &features, &shingle_hashes)); | |
| 423 } | |
| 424 | |
| 425 TEST_F(PhishingTermFeatureExtractorTest, PartialExtractionTest) { | |
| 426 std::unique_ptr<base::string16> page_text( | |
| 427 new base::string16(ASCIIToUTF16("one "))); | |
| 428 for (int i = 0; i < 28; ++i) { | |
| 429 page_text->append(ASCIIToUTF16(base::StringPrintf("%d ", i))); | |
| 430 } | |
| 431 | |
| 432 base::TimeTicks now = base::TimeTicks::Now(); | |
| 433 EXPECT_CALL(clock_, Now()) | |
| 434 // Time check at the start of extraction. | |
| 435 .WillOnce(Return(now)) | |
| 436 // Time check at the start of the first chunk of work. | |
| 437 .WillOnce(Return(now)) | |
| 438 // Time check after the first 5 words. | |
| 439 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(7))) | |
| 440 // Time check after the next 5 words. This should be greater than | |
| 441 // kMaxTimePerChunkMs so that we stop and schedule extraction for later. | |
| 442 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(14))); | |
| 443 | |
| 444 FeatureMap features; | |
| 445 std::set<uint32_t> shingle_hashes; | |
| 446 // Extract first 10 words then stop. | |
| 447 PartialExtractFeatures(page_text.get(), &features, &shingle_hashes); | |
| 448 | |
| 449 page_text.reset(new base::string16()); | |
| 450 for (int i = 30; i < 58; ++i) { | |
| 451 page_text->append(ASCIIToUTF16(base::StringPrintf("%d ", i))); | |
| 452 } | |
| 453 page_text->append(ASCIIToUTF16("multi word test ")); | |
| 454 features.Clear(); | |
| 455 shingle_hashes.clear(); | |
| 456 | |
| 457 // This part doesn't exercise the extraction timing. | |
| 458 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); | |
| 459 | |
| 460 // Now extract normally and make sure nothing breaks. | |
| 461 EXPECT_TRUE(ExtractFeatures(page_text.get(), &features, &shingle_hashes)); | |
| 462 | |
| 463 FeatureMap expected_features; | |
| 464 expected_features.AddBooleanFeature(features::kPageTerm + | |
| 465 std::string("multi word test")); | |
| 466 ExpectFeatureMapsAreEqual(features, expected_features); | |
| 467 } | |
| 468 | |
| 469 } // namespace safe_browsing | |
| OLD | NEW |