Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(655)

Side by Side Diff: chrome/renderer/safe_browsing/scorer.cc

Issue 2667343006: Componentize safe_browsing [X+1] : move the renderer part to component.
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/renderer/safe_browsing/scorer.h"
6
7 #include <math.h>
8
9 #include <memory>
10
11 #include "base/logging.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/strings/string_piece.h"
14 #include "chrome/common/safe_browsing/client_model.pb.h"
15 #include "chrome/renderer/safe_browsing/features.h"
16
17 namespace {
18 // Enum used to keep stats about the status of the Scorer creation.
19 enum ScorerCreationStatus {
20 SCORER_SUCCESS,
21 SCORER_FAIL_MODEL_OPEN_FAIL, // Not used anymore
22 SCORER_FAIL_MODEL_FILE_EMPTY, // Not used anymore
23 SCORER_FAIL_MODEL_FILE_TOO_LARGE, // Not used anymore
24 SCORER_FAIL_MODEL_PARSE_ERROR,
25 SCORER_FAIL_MODEL_MISSING_FIELDS,
26 SCORER_STATUS_MAX // Always add new values before this one.
27 };
28
29 void RecordScorerCreationStatus(ScorerCreationStatus status) {
30 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.ScorerCreationStatus",
31 status,
32 SCORER_STATUS_MAX);
33 }
34 } // namespace
35
36 namespace safe_browsing {
37
38 // Helper function which converts log odds to a probability in the range
39 // [0.0,1.0].
40 static double LogOdds2Prob(double log_odds) {
41 // 709 = floor(1023*ln(2)). 2**1023 is the largest finite double.
42 // Small log odds aren't a problem. as the odds will be 0. It's only
43 // when we get +infinity for the odds, that odds/(odds+1) would be NaN.
44 if (log_odds >= 709) {
45 return 1.0;
46 }
47 double odds = exp(log_odds);
48 return odds/(odds+1.0);
49 }
50
51 Scorer::Scorer() {}
52 Scorer::~Scorer() {}
53
54 /* static */
55 Scorer* Scorer::Create(const base::StringPiece& model_str) {
56 std::unique_ptr<Scorer> scorer(new Scorer());
57 ClientSideModel& model = scorer->model_;
58 if (!model.ParseFromArray(model_str.data(), model_str.size())) {
59 DLOG(ERROR) << "Unable to parse phishing model. This Scorer object is "
60 << "invalid.";
61 RecordScorerCreationStatus(SCORER_FAIL_MODEL_PARSE_ERROR);
62 return NULL;
63 } else if (!model.IsInitialized()) {
64 DLOG(ERROR) << "Unable to parse phishing model. The model is missing "
65 << "some required fields. Maybe the .proto file changed?";
66 RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS);
67 return NULL;
68 }
69 RecordScorerCreationStatus(SCORER_SUCCESS);
70 for (int i = 0; i < model.page_term_size(); ++i) {
71 scorer->page_terms_.insert(model.hashes(model.page_term(i)));
72 }
73 for (int i = 0; i < model.page_word_size(); ++i) {
74 scorer->page_words_.insert(model.page_word(i));
75 }
76 return scorer.release();
77 }
78
79 double Scorer::ComputeScore(const FeatureMap& features) const {
80 double logodds = 0.0;
81 for (int i = 0; i < model_.rule_size(); ++i) {
82 logodds += ComputeRuleScore(model_.rule(i), features);
83 }
84 return LogOdds2Prob(logodds);
85 }
86
87 int Scorer::model_version() const {
88 return model_.version();
89 }
90
91 const base::hash_set<std::string>& Scorer::page_terms() const {
92 return page_terms_;
93 }
94
95 const base::hash_set<uint32_t>& Scorer::page_words() const {
96 return page_words_;
97 }
98
99 size_t Scorer::max_words_per_term() const {
100 return model_.max_words_per_term();
101 }
102
103 uint32_t Scorer::murmurhash3_seed() const {
104 return model_.murmur_hash_seed();
105 }
106
107 size_t Scorer::max_shingles_per_page() const {
108 return model_.max_shingles_per_page();
109 }
110
111 size_t Scorer::shingle_size() const {
112 return model_.shingle_size();
113 }
114
115 double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule,
116 const FeatureMap& features) const {
117 const base::hash_map<std::string, double>& feature_map = features.features();
118 double rule_score = 1.0;
119 for (int i = 0; i < rule.feature_size(); ++i) {
120 base::hash_map<std::string, double>::const_iterator it = feature_map.find(
121 model_.hashes(rule.feature(i)));
122 if (it == feature_map.end() || it->second == 0.0) {
123 // If the feature of the rule does not exist in the given feature map the
124 // feature weight is considered to be zero. If the feature weight is zero
125 // we leave early since we know that the rule score will be zero.
126 return 0.0;
127 }
128 rule_score *= it->second;
129 }
130 return rule_score * rule.weight();
131 }
132 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/renderer/safe_browsing/scorer.h ('k') | chrome/renderer/safe_browsing/scorer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698