Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(236)

Side by Side Diff: chrome/renderer/safe_browsing/scorer_unittest.cc

Issue 268673007: Extracting page shingle hashes for similarity detection. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Address 1st round comment Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/scorer.h" 5 #include "chrome/renderer/safe_browsing/scorer.h"
6 6
7 #include "base/containers/hash_tables.h" 7 #include "base/containers/hash_tables.h"
8 #include "base/files/file_path.h" 8 #include "base/files/file_path.h"
9 #include "base/files/scoped_temp_dir.h" 9 #include "base/files/scoped_temp_dir.h"
10 #include "base/format_macros.h" 10 #include "base/format_macros.h"
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
48 model_.add_page_term(4); // token two 48 model_.add_page_term(4); // token two
49 49
50 // These will be murmur3 hashes, but for this test it's not necessary 50 // These will be murmur3 hashes, but for this test it's not necessary
51 // that the hashes correspond to actual words. 51 // that the hashes correspond to actual words.
52 model_.add_page_word(1000U); 52 model_.add_page_word(1000U);
53 model_.add_page_word(2000U); 53 model_.add_page_word(2000U);
54 model_.add_page_word(3000U); 54 model_.add_page_word(3000U);
55 55
56 model_.set_max_words_per_term(2); 56 model_.set_max_words_per_term(2);
57 model_.set_murmur_hash_seed(12345U); 57 model_.set_murmur_hash_seed(12345U);
58 model_.set_max_shingles_per_page(10);
59 model_.set_shingle_size(3);
58 } 60 }
59 61
60 ClientSideModel model_; 62 ClientSideModel model_;
61 }; 63 };
62 64
63 TEST_F(PhishingScorerTest, HasValidModel) { 65 TEST_F(PhishingScorerTest, HasValidModel) {
64 scoped_ptr<Scorer> scorer; 66 scoped_ptr<Scorer> scorer;
65 scorer.reset(Scorer::Create(model_.SerializeAsString())); 67 scorer.reset(Scorer::Create(model_.SerializeAsString()));
66 EXPECT_TRUE(scorer.get() != NULL); 68 EXPECT_TRUE(scorer.get() != NULL);
67 69
(...skipping 21 matching lines...) Expand all
89 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); 91 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
90 ASSERT_TRUE(scorer.get()); 92 ASSERT_TRUE(scorer.get());
91 base::hash_set<uint32> expected_page_words; 93 base::hash_set<uint32> expected_page_words;
92 expected_page_words.insert(1000U); 94 expected_page_words.insert(1000U);
93 expected_page_words.insert(2000U); 95 expected_page_words.insert(2000U);
94 expected_page_words.insert(3000U); 96 expected_page_words.insert(3000U);
95 EXPECT_THAT(scorer->page_words(), 97 EXPECT_THAT(scorer->page_words(),
96 ::testing::ContainerEq(expected_page_words)); 98 ::testing::ContainerEq(expected_page_words));
97 EXPECT_EQ(2U, scorer->max_words_per_term()); 99 EXPECT_EQ(2U, scorer->max_words_per_term());
98 EXPECT_EQ(12345U, scorer->murmurhash3_seed()); 100 EXPECT_EQ(12345U, scorer->murmurhash3_seed());
101 EXPECT_EQ(10U, scorer->max_shingles_per_page());
102 EXPECT_EQ(3U, scorer->shingle_size());
99 } 103 }
100 104
101 TEST_F(PhishingScorerTest, ComputeScore) { 105 TEST_F(PhishingScorerTest, ComputeScore) {
102 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); 106 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
103 ASSERT_TRUE(scorer.get()); 107 ASSERT_TRUE(scorer.get());
104 108
105 // An empty feature map should match the empty rule. 109 // An empty feature map should match the empty rule.
106 FeatureMap features; 110 FeatureMap features;
107 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1) 111 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1)
108 // => 0.62245933120185459 112 // => 0.62245933120185459
109 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); 113 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
110 // Same if the feature does not match any rule. 114 // Same if the feature does not match any rule.
111 EXPECT_TRUE(features.AddBooleanFeature("not existing feature")); 115 EXPECT_TRUE(features.AddBooleanFeature("not existing feature"));
112 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); 116 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
113 117
114 // Feature 1 matches which means that the logodds will be: 118 // Feature 1 matches which means that the logodds will be:
115 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8 119 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8
116 // => p = 0.6899744811276125 120 // => p = 0.6899744811276125
117 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15)); 121 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15));
118 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features)); 122 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features));
119 123
120 // Now, both feature 1 and feature 2 match. Expected logodds: 124 // Now, both feature 1 and feature 2 match. Expected logodds:
121 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) + 125 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) +
122 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8 126 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8
123 // => p = 0.99999627336071584 127 // => p = 0.99999627336071584
124 EXPECT_TRUE(features.AddBooleanFeature("feature2")); 128 EXPECT_TRUE(features.AddBooleanFeature("feature2"));
125 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features)); 129 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features));
126 } 130 }
127 } // namespace safe_browsing 131 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698