OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/scorer.h" | 5 #include "chrome/renderer/safe_browsing/scorer.h" |
6 | 6 |
7 #include "base/file_path.h" | 7 #include "base/file_path.h" |
8 #include "base/file_util.h" | 8 #include "base/file_util.h" |
9 #include "base/format_macros.h" | 9 #include "base/format_macros.h" |
10 #include "base/hash_tables.h" | 10 #include "base/hash_tables.h" |
(...skipping 13 matching lines...) Expand all Loading... |
24 virtual void SetUp() { | 24 virtual void SetUp() { |
25 // Setup a simple model. Note that the scorer does not care about | 25 // Setup a simple model. Note that the scorer does not care about |
26 // how features are encoded so we use readable strings here to make | 26 // how features are encoded so we use readable strings here to make |
27 // the test simpler to follow. | 27 // the test simpler to follow. |
28 model_.Clear(); | 28 model_.Clear(); |
29 model_.add_hashes("feature1"); | 29 model_.add_hashes("feature1"); |
30 model_.add_hashes("feature2"); | 30 model_.add_hashes("feature2"); |
31 model_.add_hashes("feature3"); | 31 model_.add_hashes("feature3"); |
32 model_.add_hashes("token one"); | 32 model_.add_hashes("token one"); |
33 model_.add_hashes("token two"); | 33 model_.add_hashes("token two"); |
34 model_.add_hashes("token"); | |
35 model_.add_hashes("one"); | |
36 model_.add_hashes("two"); | |
37 | 34 |
38 ClientSideModel::Rule* rule; | 35 ClientSideModel::Rule* rule; |
39 rule = model_.add_rule(); | 36 rule = model_.add_rule(); |
40 rule->set_weight(0.5); | 37 rule->set_weight(0.5); |
41 | 38 |
42 rule = model_.add_rule(); | 39 rule = model_.add_rule(); |
43 rule->add_feature(0); // feature1 | 40 rule->add_feature(0); // feature1 |
44 rule->set_weight(2.0); | 41 rule->set_weight(2.0); |
45 | 42 |
46 rule = model_.add_rule(); | 43 rule = model_.add_rule(); |
47 rule->add_feature(0); // feature1 | 44 rule->add_feature(0); // feature1 |
48 rule->add_feature(1); // feature2 | 45 rule->add_feature(1); // feature2 |
49 rule->set_weight(3.0); | 46 rule->set_weight(3.0); |
50 | 47 |
51 model_.add_page_term(3); // token one | 48 model_.add_page_term(3); // token one |
52 model_.add_page_term(4); // token two | 49 model_.add_page_term(4); // token two |
53 | 50 |
54 model_.add_page_word(5); // token | 51 // These will be murmur3 hashes, but for this test it's not necessary |
55 model_.add_page_word(6); // one | 52 // that the hashes correspond to actual words. |
56 model_.add_page_word(7); // two | 53 model_.add_page_word(1000U); |
| 54 model_.add_page_word(2000U); |
| 55 model_.add_page_word(3000U); |
57 | 56 |
58 model_.set_max_words_per_term(2); | 57 model_.set_max_words_per_term(2); |
| 58 model_.set_murmur_hash_seed(12345U); |
59 } | 59 } |
60 | 60 |
61 ClientSideModel model_; | 61 ClientSideModel model_; |
62 }; | 62 }; |
63 | 63 |
64 TEST_F(PhishingScorerTest, HasValidModel) { | 64 TEST_F(PhishingScorerTest, HasValidModel) { |
65 scoped_ptr<Scorer> scorer; | 65 scoped_ptr<Scorer> scorer; |
66 scorer.reset(Scorer::Create(model_.SerializeAsString())); | 66 scorer.reset(Scorer::Create(model_.SerializeAsString())); |
67 EXPECT_TRUE(scorer.get() != NULL); | 67 EXPECT_TRUE(scorer.get() != NULL); |
68 | 68 |
(...skipping 13 matching lines...) Expand all Loading... |
82 base::hash_set<std::string> expected_page_terms; | 82 base::hash_set<std::string> expected_page_terms; |
83 expected_page_terms.insert("token one"); | 83 expected_page_terms.insert("token one"); |
84 expected_page_terms.insert("token two"); | 84 expected_page_terms.insert("token two"); |
85 EXPECT_THAT(scorer->page_terms(), | 85 EXPECT_THAT(scorer->page_terms(), |
86 ::testing::ContainerEq(expected_page_terms)); | 86 ::testing::ContainerEq(expected_page_terms)); |
87 } | 87 } |
88 | 88 |
89 TEST_F(PhishingScorerTest, PageWords) { | 89 TEST_F(PhishingScorerTest, PageWords) { |
90 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); | 90 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); |
91 ASSERT_TRUE(scorer.get()); | 91 ASSERT_TRUE(scorer.get()); |
92 base::hash_set<std::string> expected_page_words; | 92 base::hash_set<uint32> expected_page_words; |
93 expected_page_words.insert("token"); | 93 expected_page_words.insert(1000U); |
94 expected_page_words.insert("one"); | 94 expected_page_words.insert(2000U); |
95 expected_page_words.insert("two"); | 95 expected_page_words.insert(3000U); |
96 EXPECT_THAT(scorer->page_words(), | 96 EXPECT_THAT(scorer->page_words(), |
97 ::testing::ContainerEq(expected_page_words)); | 97 ::testing::ContainerEq(expected_page_words)); |
| 98 EXPECT_EQ(2U, scorer->max_words_per_term()); |
| 99 EXPECT_EQ(12345U, scorer->murmurhash3_seed()); |
98 } | 100 } |
99 | 101 |
100 TEST_F(PhishingScorerTest, ComputeScore) { | 102 TEST_F(PhishingScorerTest, ComputeScore) { |
101 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); | 103 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); |
102 ASSERT_TRUE(scorer.get()); | 104 ASSERT_TRUE(scorer.get()); |
103 | 105 |
104 // An empty feature map should match the empty rule. | 106 // An empty feature map should match the empty rule. |
105 FeatureMap features; | 107 FeatureMap features; |
106 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1) | 108 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1) |
107 // => 0.62245933120185459 | 109 // => 0.62245933120185459 |
108 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); | 110 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); |
109 // Same if the feature does not match any rule. | 111 // Same if the feature does not match any rule. |
110 EXPECT_TRUE(features.AddBooleanFeature("not existing feature")); | 112 EXPECT_TRUE(features.AddBooleanFeature("not existing feature")); |
111 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); | 113 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); |
112 | 114 |
113 // Feature 1 matches which means that the logodds will be: | 115 // Feature 1 matches which means that the logodds will be: |
114 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8 | 116 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8 |
115 // => p = 0.6899744811276125 | 117 // => p = 0.6899744811276125 |
116 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15)); | 118 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15)); |
117 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features)); | 119 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features)); |
118 | 120 |
119 // Now, both feature 1 and feature 2 match. Expected logodds: | 121 // Now, both feature 1 and feature 2 match. Expected logodds: |
120 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) + | 122 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) + |
121 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8 | 123 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8 |
122 // => p = 0.99999627336071584 | 124 // => p = 0.99999627336071584 |
123 EXPECT_TRUE(features.AddBooleanFeature("feature2")); | 125 EXPECT_TRUE(features.AddBooleanFeature("feature2")); |
124 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features)); | 126 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features)); |
125 } | 127 } |
126 } // namespace safe_browsing | 128 } // namespace safe_browsing |
OLD | NEW |