Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(34)

Side by Side Diff: chrome/renderer/safe_browsing/scorer_unittest.cc

Issue 7866011: Switch to the new client-side phishing model that uses Murmurhash for word hashes. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix compile problems and add another test Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/renderer/safe_browsing/scorer.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/scorer.h" 5 #include "chrome/renderer/safe_browsing/scorer.h"
6 6
7 #include "base/file_path.h" 7 #include "base/file_path.h"
8 #include "base/file_util.h" 8 #include "base/file_util.h"
9 #include "base/format_macros.h" 9 #include "base/format_macros.h"
10 #include "base/hash_tables.h" 10 #include "base/hash_tables.h"
(...skipping 13 matching lines...) Expand all
24 virtual void SetUp() { 24 virtual void SetUp() {
25 // Setup a simple model. Note that the scorer does not care about 25 // Setup a simple model. Note that the scorer does not care about
26 // how features are encoded so we use readable strings here to make 26 // how features are encoded so we use readable strings here to make
27 // the test simpler to follow. 27 // the test simpler to follow.
28 model_.Clear(); 28 model_.Clear();
29 model_.add_hashes("feature1"); 29 model_.add_hashes("feature1");
30 model_.add_hashes("feature2"); 30 model_.add_hashes("feature2");
31 model_.add_hashes("feature3"); 31 model_.add_hashes("feature3");
32 model_.add_hashes("token one"); 32 model_.add_hashes("token one");
33 model_.add_hashes("token two"); 33 model_.add_hashes("token two");
34 model_.add_hashes("token");
35 model_.add_hashes("one");
36 model_.add_hashes("two");
37 34
38 ClientSideModel::Rule* rule; 35 ClientSideModel::Rule* rule;
39 rule = model_.add_rule(); 36 rule = model_.add_rule();
40 rule->set_weight(0.5); 37 rule->set_weight(0.5);
41 38
42 rule = model_.add_rule(); 39 rule = model_.add_rule();
43 rule->add_feature(0); // feature1 40 rule->add_feature(0); // feature1
44 rule->set_weight(2.0); 41 rule->set_weight(2.0);
45 42
46 rule = model_.add_rule(); 43 rule = model_.add_rule();
47 rule->add_feature(0); // feature1 44 rule->add_feature(0); // feature1
48 rule->add_feature(1); // feature2 45 rule->add_feature(1); // feature2
49 rule->set_weight(3.0); 46 rule->set_weight(3.0);
50 47
51 model_.add_page_term(3); // token one 48 model_.add_page_term(3); // token one
52 model_.add_page_term(4); // token two 49 model_.add_page_term(4); // token two
53 50
54 model_.add_page_word(5); // token 51 // These will be murmur3 hashes, but for this test it's not necessary
55 model_.add_page_word(6); // one 52 // that the hashes correspond to actual words.
56 model_.add_page_word(7); // two 53 model_.add_page_word(1000U);
54 model_.add_page_word(2000U);
55 model_.add_page_word(3000U);
57 56
58 model_.set_max_words_per_term(2); 57 model_.set_max_words_per_term(2);
58 model_.set_murmur_hash_seed(12345U);
59 } 59 }
60 60
61 ClientSideModel model_; 61 ClientSideModel model_;
62 }; 62 };
63 63
64 TEST_F(PhishingScorerTest, HasValidModel) { 64 TEST_F(PhishingScorerTest, HasValidModel) {
65 scoped_ptr<Scorer> scorer; 65 scoped_ptr<Scorer> scorer;
66 scorer.reset(Scorer::Create(model_.SerializeAsString())); 66 scorer.reset(Scorer::Create(model_.SerializeAsString()));
67 EXPECT_TRUE(scorer.get() != NULL); 67 EXPECT_TRUE(scorer.get() != NULL);
68 68
(...skipping 13 matching lines...) Expand all
82 base::hash_set<std::string> expected_page_terms; 82 base::hash_set<std::string> expected_page_terms;
83 expected_page_terms.insert("token one"); 83 expected_page_terms.insert("token one");
84 expected_page_terms.insert("token two"); 84 expected_page_terms.insert("token two");
85 EXPECT_THAT(scorer->page_terms(), 85 EXPECT_THAT(scorer->page_terms(),
86 ::testing::ContainerEq(expected_page_terms)); 86 ::testing::ContainerEq(expected_page_terms));
87 } 87 }
88 88
89 TEST_F(PhishingScorerTest, PageWords) { 89 TEST_F(PhishingScorerTest, PageWords) {
90 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); 90 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
91 ASSERT_TRUE(scorer.get()); 91 ASSERT_TRUE(scorer.get());
92 base::hash_set<std::string> expected_page_words; 92 base::hash_set<uint32> expected_page_words;
93 expected_page_words.insert("token"); 93 expected_page_words.insert(1000U);
94 expected_page_words.insert("one"); 94 expected_page_words.insert(2000U);
95 expected_page_words.insert("two"); 95 expected_page_words.insert(3000U);
96 EXPECT_THAT(scorer->page_words(), 96 EXPECT_THAT(scorer->page_words(),
97 ::testing::ContainerEq(expected_page_words)); 97 ::testing::ContainerEq(expected_page_words));
98 EXPECT_EQ(2U, scorer->max_words_per_term());
99 EXPECT_EQ(12345U, scorer->murmurhash3_seed());
98 } 100 }
99 101
100 TEST_F(PhishingScorerTest, ComputeScore) { 102 TEST_F(PhishingScorerTest, ComputeScore) {
101 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); 103 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
102 ASSERT_TRUE(scorer.get()); 104 ASSERT_TRUE(scorer.get());
103 105
104 // An empty feature map should match the empty rule. 106 // An empty feature map should match the empty rule.
105 FeatureMap features; 107 FeatureMap features;
106 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1) 108 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1)
107 // => 0.62245933120185459 109 // => 0.62245933120185459
108 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); 110 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
109 // Same if the feature does not match any rule. 111 // Same if the feature does not match any rule.
110 EXPECT_TRUE(features.AddBooleanFeature("not existing feature")); 112 EXPECT_TRUE(features.AddBooleanFeature("not existing feature"));
111 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); 113 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
112 114
113 // Feature 1 matches which means that the logodds will be: 115 // Feature 1 matches which means that the logodds will be:
114 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8 116 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8
115 // => p = 0.6899744811276125 117 // => p = 0.6899744811276125
116 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15)); 118 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15));
117 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features)); 119 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features));
118 120
119 // Now, both feature 1 and feature 2 match. Expected logodds: 121 // Now, both feature 1 and feature 2 match. Expected logodds:
120 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) + 122 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) +
121 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8 123 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8
122 // => p = 0.99999627336071584 124 // => p = 0.99999627336071584
123 EXPECT_TRUE(features.AddBooleanFeature("feature2")); 125 EXPECT_TRUE(features.AddBooleanFeature("feature2"));
124 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features)); 126 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features));
125 } 127 }
126 } // namespace safe_browsing 128 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/renderer/safe_browsing/scorer.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698