| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/scorer.h" | 5 #include "chrome/renderer/safe_browsing/scorer.h" |
| 6 | 6 |
| 7 #include "base/file_path.h" | 7 #include "base/file_path.h" |
| 8 #include "base/file_util.h" | 8 #include "base/file_util.h" |
| 9 #include "base/format_macros.h" | 9 #include "base/format_macros.h" |
| 10 #include "base/hash_tables.h" | 10 #include "base/hash_tables.h" |
| (...skipping 13 matching lines...) Expand all Loading... |
| 24 virtual void SetUp() { | 24 virtual void SetUp() { |
| 25 // Setup a simple model. Note that the scorer does not care about | 25 // Setup a simple model. Note that the scorer does not care about |
| 26 // how features are encoded so we use readable strings here to make | 26 // how features are encoded so we use readable strings here to make |
| 27 // the test simpler to follow. | 27 // the test simpler to follow. |
| 28 model_.Clear(); | 28 model_.Clear(); |
| 29 model_.add_hashes("feature1"); | 29 model_.add_hashes("feature1"); |
| 30 model_.add_hashes("feature2"); | 30 model_.add_hashes("feature2"); |
| 31 model_.add_hashes("feature3"); | 31 model_.add_hashes("feature3"); |
| 32 model_.add_hashes("token one"); | 32 model_.add_hashes("token one"); |
| 33 model_.add_hashes("token two"); | 33 model_.add_hashes("token two"); |
| 34 model_.add_hashes("token"); | |
| 35 model_.add_hashes("one"); | |
| 36 model_.add_hashes("two"); | |
| 37 | 34 |
| 38 ClientSideModel::Rule* rule; | 35 ClientSideModel::Rule* rule; |
| 39 rule = model_.add_rule(); | 36 rule = model_.add_rule(); |
| 40 rule->set_weight(0.5); | 37 rule->set_weight(0.5); |
| 41 | 38 |
| 42 rule = model_.add_rule(); | 39 rule = model_.add_rule(); |
| 43 rule->add_feature(0); // feature1 | 40 rule->add_feature(0); // feature1 |
| 44 rule->set_weight(2.0); | 41 rule->set_weight(2.0); |
| 45 | 42 |
| 46 rule = model_.add_rule(); | 43 rule = model_.add_rule(); |
| 47 rule->add_feature(0); // feature1 | 44 rule->add_feature(0); // feature1 |
| 48 rule->add_feature(1); // feature2 | 45 rule->add_feature(1); // feature2 |
| 49 rule->set_weight(3.0); | 46 rule->set_weight(3.0); |
| 50 | 47 |
| 51 model_.add_page_term(3); // token one | 48 model_.add_page_term(3); // token one |
| 52 model_.add_page_term(4); // token two | 49 model_.add_page_term(4); // token two |
| 53 | 50 |
| 54 model_.add_page_word(5); // token | 51 // These will be murmur3 hashes, but for this test it's not necessary |
| 55 model_.add_page_word(6); // one | 52 // that the hashes correspond to actual words. |
| 56 model_.add_page_word(7); // two | 53 model_.add_page_word(1000U); |
| 54 model_.add_page_word(2000U); |
| 55 model_.add_page_word(3000U); |
| 57 | 56 |
| 58 model_.set_max_words_per_term(2); | 57 model_.set_max_words_per_term(2); |
| 58 model_.set_murmur_hash_seed(12345U); |
| 59 } | 59 } |
| 60 | 60 |
| 61 ClientSideModel model_; | 61 ClientSideModel model_; |
| 62 }; | 62 }; |
| 63 | 63 |
| 64 TEST_F(PhishingScorerTest, HasValidModel) { | 64 TEST_F(PhishingScorerTest, HasValidModel) { |
| 65 scoped_ptr<Scorer> scorer; | 65 scoped_ptr<Scorer> scorer; |
| 66 scorer.reset(Scorer::Create(model_.SerializeAsString())); | 66 scorer.reset(Scorer::Create(model_.SerializeAsString())); |
| 67 EXPECT_TRUE(scorer.get() != NULL); | 67 EXPECT_TRUE(scorer.get() != NULL); |
| 68 | 68 |
| (...skipping 13 matching lines...) Expand all Loading... |
| 82 base::hash_set<std::string> expected_page_terms; | 82 base::hash_set<std::string> expected_page_terms; |
| 83 expected_page_terms.insert("token one"); | 83 expected_page_terms.insert("token one"); |
| 84 expected_page_terms.insert("token two"); | 84 expected_page_terms.insert("token two"); |
| 85 EXPECT_THAT(scorer->page_terms(), | 85 EXPECT_THAT(scorer->page_terms(), |
| 86 ::testing::ContainerEq(expected_page_terms)); | 86 ::testing::ContainerEq(expected_page_terms)); |
| 87 } | 87 } |
| 88 | 88 |
| 89 TEST_F(PhishingScorerTest, PageWords) { | 89 TEST_F(PhishingScorerTest, PageWords) { |
| 90 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); | 90 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); |
| 91 ASSERT_TRUE(scorer.get()); | 91 ASSERT_TRUE(scorer.get()); |
| 92 base::hash_set<std::string> expected_page_words; | 92 base::hash_set<uint32> expected_page_words; |
| 93 expected_page_words.insert("token"); | 93 expected_page_words.insert(1000U); |
| 94 expected_page_words.insert("one"); | 94 expected_page_words.insert(2000U); |
| 95 expected_page_words.insert("two"); | 95 expected_page_words.insert(3000U); |
| 96 EXPECT_THAT(scorer->page_words(), | 96 EXPECT_THAT(scorer->page_words(), |
| 97 ::testing::ContainerEq(expected_page_words)); | 97 ::testing::ContainerEq(expected_page_words)); |
| 98 EXPECT_EQ(2U, scorer->max_words_per_term()); |
| 99 EXPECT_EQ(12345U, scorer->murmurhash3_seed()); |
| 98 } | 100 } |
| 99 | 101 |
| 100 TEST_F(PhishingScorerTest, ComputeScore) { | 102 TEST_F(PhishingScorerTest, ComputeScore) { |
| 101 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); | 103 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); |
| 102 ASSERT_TRUE(scorer.get()); | 104 ASSERT_TRUE(scorer.get()); |
| 103 | 105 |
| 104 // An empty feature map should match the empty rule. | 106 // An empty feature map should match the empty rule. |
| 105 FeatureMap features; | 107 FeatureMap features; |
| 106 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1) | 108 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1) |
| 107 // => 0.62245933120185459 | 109 // => 0.62245933120185459 |
| 108 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); | 110 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); |
| 109 // Same if the feature does not match any rule. | 111 // Same if the feature does not match any rule. |
| 110 EXPECT_TRUE(features.AddBooleanFeature("not existing feature")); | 112 EXPECT_TRUE(features.AddBooleanFeature("not existing feature")); |
| 111 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); | 113 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); |
| 112 | 114 |
| 113 // Feature 1 matches which means that the logodds will be: | 115 // Feature 1 matches which means that the logodds will be: |
| 114 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8 | 116 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8 |
| 115 // => p = 0.6899744811276125 | 117 // => p = 0.6899744811276125 |
| 116 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15)); | 118 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15)); |
| 117 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features)); | 119 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features)); |
| 118 | 120 |
| 119 // Now, both feature 1 and feature 2 match. Expected logodds: | 121 // Now, both feature 1 and feature 2 match. Expected logodds: |
| 120 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) + | 122 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) + |
| 121 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8 | 123 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8 |
| 122 // => p = 0.99999627336071584 | 124 // => p = 0.99999627336071584 |
| 123 EXPECT_TRUE(features.AddBooleanFeature("feature2")); | 125 EXPECT_TRUE(features.AddBooleanFeature("feature2")); |
| 124 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features)); | 126 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features)); |
| 125 } | 127 } |
| 126 } // namespace safe_browsing | 128 } // namespace safe_browsing |
| OLD | NEW |