| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/renderer/safe_browsing/scorer.h" | |
| 6 | |
| 7 #include <stdint.h> | |
| 8 | |
| 9 #include <memory> | |
| 10 | |
| 11 #include "base/containers/hash_tables.h" | |
| 12 #include "base/files/file_path.h" | |
| 13 #include "base/files/scoped_temp_dir.h" | |
| 14 #include "base/format_macros.h" | |
| 15 #include "base/message_loop/message_loop.h" | |
| 16 #include "base/threading/thread.h" | |
| 17 #include "chrome/common/safe_browsing/client_model.pb.h" | |
| 18 #include "chrome/renderer/safe_browsing/features.h" | |
| 19 #include "testing/gmock/include/gmock/gmock.h" | |
| 20 #include "testing/gtest/include/gtest/gtest.h" | |
| 21 | |
| 22 namespace safe_browsing { | |
| 23 | |
| 24 class PhishingScorerTest : public ::testing::Test { | |
| 25 protected: | |
| 26 void SetUp() override { | |
| 27 // Setup a simple model. Note that the scorer does not care about | |
| 28 // how features are encoded so we use readable strings here to make | |
| 29 // the test simpler to follow. | |
| 30 model_.Clear(); | |
| 31 model_.add_hashes("feature1"); | |
| 32 model_.add_hashes("feature2"); | |
| 33 model_.add_hashes("feature3"); | |
| 34 model_.add_hashes("token one"); | |
| 35 model_.add_hashes("token two"); | |
| 36 | |
| 37 ClientSideModel::Rule* rule; | |
| 38 rule = model_.add_rule(); | |
| 39 rule->set_weight(0.5); | |
| 40 | |
| 41 rule = model_.add_rule(); | |
| 42 rule->add_feature(0); // feature1 | |
| 43 rule->set_weight(2.0); | |
| 44 | |
| 45 rule = model_.add_rule(); | |
| 46 rule->add_feature(0); // feature1 | |
| 47 rule->add_feature(1); // feature2 | |
| 48 rule->set_weight(3.0); | |
| 49 | |
| 50 model_.add_page_term(3); // token one | |
| 51 model_.add_page_term(4); // token two | |
| 52 | |
| 53 // These will be murmur3 hashes, but for this test it's not necessary | |
| 54 // that the hashes correspond to actual words. | |
| 55 model_.add_page_word(1000U); | |
| 56 model_.add_page_word(2000U); | |
| 57 model_.add_page_word(3000U); | |
| 58 | |
| 59 model_.set_max_words_per_term(2); | |
| 60 model_.set_murmur_hash_seed(12345U); | |
| 61 model_.set_max_shingles_per_page(10); | |
| 62 model_.set_shingle_size(3); | |
| 63 } | |
| 64 | |
| 65 ClientSideModel model_; | |
| 66 }; | |
| 67 | |
| 68 TEST_F(PhishingScorerTest, HasValidModel) { | |
| 69 std::unique_ptr<Scorer> scorer; | |
| 70 scorer.reset(Scorer::Create(model_.SerializeAsString())); | |
| 71 EXPECT_TRUE(scorer.get() != NULL); | |
| 72 | |
| 73 // Invalid model string. | |
| 74 scorer.reset(Scorer::Create("bogus string")); | |
| 75 EXPECT_FALSE(scorer.get()); | |
| 76 | |
| 77 // Mode is missing a required field. | |
| 78 model_.clear_max_words_per_term(); | |
| 79 scorer.reset(Scorer::Create(model_.SerializePartialAsString())); | |
| 80 EXPECT_FALSE(scorer.get()); | |
| 81 } | |
| 82 | |
| 83 TEST_F(PhishingScorerTest, PageTerms) { | |
| 84 std::unique_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); | |
| 85 ASSERT_TRUE(scorer.get()); | |
| 86 | |
| 87 // Use std::vector instead of base::hash_set for comparison. | |
| 88 // On Android, EXPECT_THAT(..., ContainerEq(...)) doesn't support | |
| 89 // std::hash_set, but std::vector works fine. | |
| 90 std::vector<std::string> expected_page_terms; | |
| 91 expected_page_terms.push_back("token one"); | |
| 92 expected_page_terms.push_back("token two"); | |
| 93 std::sort(expected_page_terms.begin(), expected_page_terms.end()); | |
| 94 | |
| 95 base::hash_set<std::string> page_terms = scorer->page_terms(); | |
| 96 std::vector<std::string> page_terms_v(page_terms.begin(), page_terms.end()); | |
| 97 std::sort(page_terms_v.begin(), page_terms_v.end()); | |
| 98 | |
| 99 EXPECT_THAT(page_terms_v, ::testing::ContainerEq(expected_page_terms)); | |
| 100 } | |
| 101 | |
| 102 TEST_F(PhishingScorerTest, PageWords) { | |
| 103 std::unique_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); | |
| 104 ASSERT_TRUE(scorer.get()); | |
| 105 std::vector<uint32_t> expected_page_words; | |
| 106 expected_page_words.push_back(1000U); | |
| 107 expected_page_words.push_back(2000U); | |
| 108 expected_page_words.push_back(3000U); | |
| 109 std::sort(expected_page_words.begin(), expected_page_words.end()); | |
| 110 | |
| 111 base::hash_set<uint32_t> page_words = scorer->page_words(); | |
| 112 std::vector<uint32_t> page_words_v(page_words.begin(), page_words.end()); | |
| 113 std::sort(page_words_v.begin(), page_words_v.end()); | |
| 114 | |
| 115 EXPECT_THAT(page_words_v, ::testing::ContainerEq(expected_page_words)); | |
| 116 | |
| 117 EXPECT_EQ(2U, scorer->max_words_per_term()); | |
| 118 EXPECT_EQ(12345U, scorer->murmurhash3_seed()); | |
| 119 EXPECT_EQ(10U, scorer->max_shingles_per_page()); | |
| 120 EXPECT_EQ(3U, scorer->shingle_size()); | |
| 121 } | |
| 122 | |
| 123 TEST_F(PhishingScorerTest, ComputeScore) { | |
| 124 std::unique_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); | |
| 125 ASSERT_TRUE(scorer.get()); | |
| 126 | |
| 127 // An empty feature map should match the empty rule. | |
| 128 FeatureMap features; | |
| 129 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1) | |
| 130 // => 0.62245933120185459 | |
| 131 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); | |
| 132 // Same if the feature does not match any rule. | |
| 133 EXPECT_TRUE(features.AddBooleanFeature("not existing feature")); | |
| 134 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); | |
| 135 | |
| 136 // Feature 1 matches which means that the logodds will be: | |
| 137 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8 | |
| 138 // => p = 0.6899744811276125 | |
| 139 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15)); | |
| 140 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features)); | |
| 141 | |
| 142 // Now, both feature 1 and feature 2 match. Expected logodds: | |
| 143 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) + | |
| 144 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8 | |
| 145 // => p = 0.99999627336071584 | |
| 146 EXPECT_TRUE(features.AddBooleanFeature("feature2")); | |
| 147 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features)); | |
| 148 } | |
| 149 } // namespace safe_browsing | |
| OLD | NEW |