chrome/renderer/safe_browsing/scorer_unittest.cc - Issue 7866011: Switch to the new client-side phishing model that uses Murmurhash for word hashes.

Side by Side Diff: chrome/renderer/safe_browsing/scorer_unittest.cc

Issue 7866011: Switch to the new client-side phishing model that uses Murmurhash for word hashes. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fix compile problems and add another test Created 9 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/renderer/safe_browsing/scorer.h"	5 #include "chrome/renderer/safe_browsing/scorer.h"

6	6

7 #include "base/file_path.h"	7 #include "base/file_path.h"

8 #include "base/file_util.h"	8 #include "base/file_util.h"

9 #include "base/format_macros.h"	9 #include "base/format_macros.h"

10 #include "base/hash_tables.h"	10 #include "base/hash_tables.h"

(...skipping 13 matching lines...) Expand all Loading...
24 virtual void SetUp() {	24 virtual void SetUp() {

25 // Setup a simple model. Note that the scorer does not care about	25 // Setup a simple model. Note that the scorer does not care about

26 // how features are encoded so we use readable strings here to make	26 // how features are encoded so we use readable strings here to make

27 // the test simpler to follow.	27 // the test simpler to follow.

28 model_.Clear();	28 model_.Clear();

29 model_.add_hashes("feature1");	29 model_.add_hashes("feature1");

30 model_.add_hashes("feature2");	30 model_.add_hashes("feature2");

31 model_.add_hashes("feature3");	31 model_.add_hashes("feature3");

32 model_.add_hashes("token one");	32 model_.add_hashes("token one");

33 model_.add_hashes("token two");	33 model_.add_hashes("token two");

34 model_.add_hashes("token");

35 model_.add_hashes("one");

36 model_.add_hashes("two");

37	34

38 ClientSideModel::Rule* rule;	35 ClientSideModel::Rule* rule;

39 rule = model_.add_rule();	36 rule = model_.add_rule();

40 rule->set_weight(0.5);	37 rule->set_weight(0.5);

41	38

42 rule = model_.add_rule();	39 rule = model_.add_rule();

43 rule->add_feature(0); // feature1	40 rule->add_feature(0); // feature1

44 rule->set_weight(2.0);	41 rule->set_weight(2.0);

45	42

46 rule = model_.add_rule();	43 rule = model_.add_rule();

47 rule->add_feature(0); // feature1	44 rule->add_feature(0); // feature1

48 rule->add_feature(1); // feature2	45 rule->add_feature(1); // feature2

49 rule->set_weight(3.0);	46 rule->set_weight(3.0);

50	47

51 model_.add_page_term(3); // token one	48 model_.add_page_term(3); // token one

52 model_.add_page_term(4); // token two	49 model_.add_page_term(4); // token two

53	50

54 model_.add_page_word(5); // token	51 // These will be murmur3 hashes, but for this test it's not necessary

55 model_.add_page_word(6); // one	52 // that the hashes correspond to actual words.

56 model_.add_page_word(7); // two	53 model_.add_page_word(1000U);

	54 model_.add_page_word(2000U);

	55 model_.add_page_word(3000U);

57	56

58 model_.set_max_words_per_term(2);	57 model_.set_max_words_per_term(2);

	58 model_.set_murmur_hash_seed(12345U);

59 }	59 }

60	60

61 ClientSideModel model_;	61 ClientSideModel model_;

62 };	62 };

63	63

64 TEST_F(PhishingScorerTest, HasValidModel) {	64 TEST_F(PhishingScorerTest, HasValidModel) {

65 scoped_ptr<Scorer> scorer;	65 scoped_ptr<Scorer> scorer;

66 scorer.reset(Scorer::Create(model_.SerializeAsString()));	66 scorer.reset(Scorer::Create(model_.SerializeAsString()));

67 EXPECT_TRUE(scorer.get() != NULL);	67 EXPECT_TRUE(scorer.get() != NULL);

68	68

(...skipping 13 matching lines...) Expand all Loading...
82 base::hash_set<std::string> expected_page_terms;	82 base::hash_set<std::string> expected_page_terms;

83 expected_page_terms.insert("token one");	83 expected_page_terms.insert("token one");

84 expected_page_terms.insert("token two");	84 expected_page_terms.insert("token two");

85 EXPECT_THAT(scorer->page_terms(),	85 EXPECT_THAT(scorer->page_terms(),

86 ::testing::ContainerEq(expected_page_terms));	86 ::testing::ContainerEq(expected_page_terms));

87 }	87 }

88	88

89 TEST_F(PhishingScorerTest, PageWords) {	89 TEST_F(PhishingScorerTest, PageWords) {

90 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));	90 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));

91 ASSERT_TRUE(scorer.get());	91 ASSERT_TRUE(scorer.get());

92 base::hash_set<std::string> expected_page_words;	92 base::hash_set<uint32> expected_page_words;

93 expected_page_words.insert("token");	93 expected_page_words.insert(1000U);

94 expected_page_words.insert("one");	94 expected_page_words.insert(2000U);

95 expected_page_words.insert("two");	95 expected_page_words.insert(3000U);

96 EXPECT_THAT(scorer->page_words(),	96 EXPECT_THAT(scorer->page_words(),

97 ::testing::ContainerEq(expected_page_words));	97 ::testing::ContainerEq(expected_page_words));

	98 EXPECT_EQ(2U, scorer->max_words_per_term());

	99 EXPECT_EQ(12345U, scorer->murmurhash3_seed());

98 }	100 }

99	101

100 TEST_F(PhishingScorerTest, ComputeScore) {	102 TEST_F(PhishingScorerTest, ComputeScore) {

101 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));	103 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));

102 ASSERT_TRUE(scorer.get());	104 ASSERT_TRUE(scorer.get());

103	105

104 // An empty feature map should match the empty rule.	106 // An empty feature map should match the empty rule.

105 FeatureMap features;	107 FeatureMap features;

106 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1)	108 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1)

107 // => 0.62245933120185459	109 // => 0.62245933120185459

108 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));	110 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));

109 // Same if the feature does not match any rule.	111 // Same if the feature does not match any rule.

110 EXPECT_TRUE(features.AddBooleanFeature("not existing feature"));	112 EXPECT_TRUE(features.AddBooleanFeature("not existing feature"));

111 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));	113 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));

112	114

113 // Feature 1 matches which means that the logodds will be:	115 // Feature 1 matches which means that the logodds will be:

114 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8	116 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8

115 // => p = 0.6899744811276125	117 // => p = 0.6899744811276125

116 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15));	118 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15));

117 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features));	119 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features));

118	120

119 // Now, both feature 1 and feature 2 match. Expected logodds:	121 // Now, both feature 1 and feature 2 match. Expected logodds:

120 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) +	122 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) +

121 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8	123 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8

122 // => p = 0.99999627336071584	124 // => p = 0.99999627336071584

123 EXPECT_TRUE(features.AddBooleanFeature("feature2"));	125 EXPECT_TRUE(features.AddBooleanFeature("feature2"));

124 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features));	126 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features));

125 }	127 }

126 } // namespace safe_browsing	128 } // namespace safe_browsing

OLD	NEW

« no previous file with comments | « chrome/renderer/safe_browsing/scorer.cc ('k') | no next file » | no next file with comments »