Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(586)

Side by Side Diff: components/ntp_snippets/user_classifier.cc

Issue 2346263002: Extending the UserClassifier to actually support classification. (Closed)
Patch Set: Removing unnecessary static_casts Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/ntp_snippets/user_classifier.h" 5 #include "components/ntp_snippets/user_classifier.h"
6 6
7 #include <float.h> 7 #include <float.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <string> 10 #include <string>
11 11
12 #include "base/metrics/histogram_macros.h" 12 #include "base/metrics/histogram_macros.h"
13 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_number_conversions.h"
14 #include "components/ntp_snippets/pref_names.h" 14 #include "components/ntp_snippets/pref_names.h"
15 #include "components/prefs/pref_registry_simple.h" 15 #include "components/prefs/pref_registry_simple.h"
16 #include "components/prefs/pref_service.h" 16 #include "components/prefs/pref_service.h"
17 17
18 namespace ntp_snippets {
19
18 namespace { 20 namespace {
19 21
20 // TODO(jkrcal): Make all of this configurable via variations_service. 22 // TODO(jkrcal): Make all of this configurable via variations_service.
21 23
22 // The discount factor for computing the discounted-average metrics. Must be 24 // The discount factor for computing the discounted-average metrics. Must be
23 // strictly larger than 0 and strictly smaller than 1! 25 // strictly larger than 0 and strictly smaller than 1!
24 const double kDiscountFactorPerDay = 0.25; 26 const double kDiscountFactorPerDay = 0.25;
25 27
26 // Never consider any larger interval than this (so that extreme situations such 28 // Never consider any larger interval than this (so that extreme situations such
27 // as losing your phone or going for a long offline vacation do not skew the 29 // as losing your phone or going for a long offline vacation do not skew the
28 // average too much). 30 // average too much).
29 const double kMaxHours = 7 * 24; 31 const double kMaxHours = 7 * 24;
30 32
31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is 33 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is
32 // the length of the browsing session where subsequent events of the same type 34 // the length of the browsing session where subsequent events of the same type
33 // do not count again). 35 // do not count again).
34 const double kMinHours = 0.5; 36 const double kMinHours = 0.5;
35 37
38 // Classification constants.
39 const double kFrequentUserScrollsAtLeastOncePerHours = 24;
40 const double kOccasionalUserOpensNTPAtMostOncePerHours = 72;
41
36 const char kHistogramAverageHoursToOpenNTP[] = 42 const char kHistogramAverageHoursToOpenNTP[] =
37 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; 43 "NewTabPage.UserClassifier.AverageHoursToOpenNTP";
38 const char kHistogramAverageHoursToShowSuggestions[] = 44 const char kHistogramAverageHoursToShowSuggestions[] =
39 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; 45 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions";
40 const char kHistogramAverageHoursToUseSuggestions[] = 46 const char kHistogramAverageHoursToUseSuggestions[] =
41 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; 47 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
42 48
43 } // namespace 49 // The enum used for iteration.
44 50 const UserClassifier::Metric kMetrics[] = {
45 namespace ntp_snippets { 51 UserClassifier::Metric::NTP_OPENED,
46 52 UserClassifier::Metric::SUGGESTIONS_SHOWN,
47 UserClassifier::UserClassifier(PrefService* pref_service) 53 UserClassifier::Metric::SUGGESTIONS_USED};
48 : pref_service_(pref_service), 54
49 // Compute discount_rate_per_hour such that 55 // The summary of the prefs.
50 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. 56 const char* kMetricKeys[] = {
51 discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {} 57 prefs::kUserClassifierAverageNTPOpenedPerHour,
52 58 prefs::kUserClassifierAverageSuggestionsShownPerHour,
53 UserClassifier::~UserClassifier() {} 59 prefs::kUserClassifierAverageSuggestionsUsedPerHour};
54 60 const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP,
55 // static 61 prefs::kUserClassifierLastTimeToShowSuggestions,
56 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { 62 prefs::kUserClassifierLastTimeToUseSuggestions};
57 registry->RegisterDoublePref( 63
58 prefs::kUserClassifierAverageNTPOpenedPerHour, 1); 64 // Default lengths of the intervals for new users for the metrics.
59 registry->RegisterDoublePref( 65 const double kDefaults[] = {24, 36, 48};
60 prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); 66
61 registry->RegisterDoublePref( 67 static_assert(arraysize(kMetrics) ==
62 prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); 68 static_cast<int>(UserClassifier::Metric::COUNT) &&
Bernhard Bauer 2016/09/20 15:59:54 Maybe split this up into separate asserts?
jkrcal 2016/09/21 08:58:46 I think these checks conceptually belong together,
63 69 arraysize(kMetricKeys) ==
64 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); 70 static_cast<int>(UserClassifier::Metric::COUNT) &&
65 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, 71 arraysize(kLastTimeKeys) ==
66 0); 72 static_cast<int>(UserClassifier::Metric::COUNT) &&
67 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, 73 arraysize(kDefaults) ==
68 0); 74 static_cast<int>(UserClassifier::Metric::COUNT),
69 } 75 "Fill in info for all metrics.");
70 76
71 void UserClassifier::OnNTPOpened() { 77 // Computes the discount rate.
72 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, 78 double GetDiscountRatePerHour() {
73 prefs::kUserClassifierLastTimeToOpenNTP); 79 // Compute discount_rate_per_hour such that
74 80 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
75 double avg = GetEstimateHoursBetweenEvents( 81 return std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0;
76 prefs::kUserClassifierAverageNTPOpenedPerHour); 82 }
77 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, 83
78 kMaxHours, 50); 84 // Returns the new value of the metric using its |old_value|, assuming
79 } 85 // |hours_since_last_time| hours have passed since it was last recomputed.
80 86 // If |event_now| is true, the event is assumed to have happened right now,
81 void UserClassifier::OnSuggestionsShown() { 87 // otherwise no event is assumed to happen within the last
82 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, 88 // |hours_since_last_time| hours.
83 prefs::kUserClassifierLastTimeToShowSuggestions); 89 double RecomputeMetric(double old_value,
84 90 double hours_since_last_time,
85 double avg = GetEstimateHoursBetweenEvents( 91 double discount_rate_per_hour,
86 prefs::kUserClassifierAverageSuggestionsShownPerHour); 92 bool event_now) {
87 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1,
88 kMaxHours, 50);
89 }
90
91 void UserClassifier::OnSuggestionsUsed() {
92 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour,
93 prefs::kUserClassifierLastTimeToUseSuggestions);
94
95 double avg = GetEstimateHoursBetweenEvents(
96 prefs::kUserClassifierAverageSuggestionsUsedPerHour);
97 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1,
98 kMaxHours, 50);
99 }
100
101 void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
102 const char* last_time_pref_name) {
103 if (!pref_service_)
104 return;
105
106 double hours_since_last_time =
107 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
108 // Ignore events within the same "browsing session".
109 if (hours_since_last_time < kMinHours)
110 return;
111 SetLastTimeToNow(last_time_pref_name);
112
113 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
114 // Compute and store the new discounted average according to the formula 93 // Compute and store the new discounted average according to the formula
115 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. 94 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
116 double new_avg_events_per_hour = 95 return (event_now ? 1 : 0) +
Bernhard Bauer 2016/09/20 15:59:54 It might be a bit simpler to have the caller add t
jkrcal 2016/09/21 08:58:46 Done.
117 1 + 96 std::exp(-discount_rate_per_hour * hours_since_last_time) *
118 std::exp(-discount_rate_per_hour_ * hours_since_last_time) * 97 old_value;
119 avg_events_per_hour; 98 }
120 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); 99
121 } 100 // Compute the number of hours between two events for the given metric value
122 101 // assuming the events were equally distributed.
123 double UserClassifier::GetEstimateHoursBetweenEvents( 102 double GetEstimateHoursBetweenEvents(double metric_value,
124 const char* metric_pref_name) { 103 double discount_rate_per_hour) {
125 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); 104 // The computation below is well-defined only for |metric_value| > 1 (log of
126 105 // negative value or division by zero). When |metric_value| -> 1, the estimate
127 // Right after the first update, the metric is equal to 1. 106 // below -> infinity, so kMaxHours is a natural result, here.
128 if (avg_events_per_hour <= 1) 107 if (metric_value <= 1)
129 return kMaxHours; 108 return kMaxHours;
130 109
131 // This is the estimate with the assumption that last event happened right 110 // This is the estimate with the assumption that last event happened right
132 // now and the system is in the steady-state. Solve estimate_hours in the 111 // now and the system is in the steady-state. Solve estimate_hours in the
133 // steady-state equation: 112 // steady-state equation:
134 // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events, 113 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
135 // i.e. 114 // i.e.
136 // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events), 115 // -discount_rate * estimate_hours = log((metric_value - 1) / metric_value),
137 // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)), 116 // discount_rate * estimate_hours = log(metric_value / (metric_value - 1)),
138 // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate. 117 // estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate.
139 return std::min(kMaxHours, 118 double estimate_hours =
140 std::log(avg_events_per_hour / (avg_events_per_hour - 1)) / 119 std::log(metric_value / (metric_value - 1)) / discount_rate_per_hour;
141 discount_rate_per_hour_); 120 return std::max(kMinHours, std::min(kMaxHours, estimate_hours));
142 } 121 }
143 122
144 double UserClassifier::GetHoursSinceLastTime( 123 // The inverse of GetEstimateHoursBetweenEvents().
145 const char* last_time_pref_name) { 124 double GetMetricValueForEstimateHoursBetweenEvents(
146 if (!pref_service_->HasPrefPath(last_time_pref_name)) 125 double estimate_hours,
147 return DBL_MAX; 126 double discount_rate_per_hour) {
127 // Keep the input value within [kMinHours, kMaxHours].
128 estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours));
129
130 // Return |metric_value| such that GetEstimateHoursBetweenEvents for
131 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
132 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
133 // i.e.
134 // metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1,
135 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
136 return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours));
137 }
138
139 } // namespace
140
141 UserClassifier::UserClassifier(PrefService* pref_service)
142 : pref_service_(pref_service),
143 discount_rate_per_hour_(GetDiscountRatePerHour()) {
144 // The pref_service_ can be null in tests.
145 if (!pref_service_)
146 return;
147
148 // Initialize the prefs storing the last time: the counter has just started!
149 for (const Metric metric : kMetrics) {
150 if (!HasLastTime(metric))
151 SetLastTimeToNow(metric);
152 }
153 }
154
155 UserClassifier::~UserClassifier() {}
156
157 // static
158 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
159 for (Metric metric : kMetrics) {
160 double default_metric_value = GetMetricValueForEstimateHoursBetweenEvents(
161 kDefaults[static_cast<int>(metric)], GetDiscountRatePerHour());
162 registry->RegisterDoublePref(kMetricKeys[static_cast<int>(metric)],
163 default_metric_value);
164 registry->RegisterInt64Pref(kLastTimeKeys[static_cast<int>(metric)], 0);
165 }
166 }
167
168 void UserClassifier::OnEvent(Metric metric) {
169 DCHECK_NE(metric, Metric::COUNT);
170 double metric_value = UpdateMetricOnEvent(metric);
171
172 double avg =
173 GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_);
174 switch (metric) {
175 case Metric::NTP_OPENED:
176 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
177 kMaxHours, 50);
178 break;
179 case Metric::SUGGESTIONS_SHOWN:
180 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg,
181 1, kMaxHours, 50);
182 break;
183 case Metric::SUGGESTIONS_USED:
184 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg,
185 1, kMaxHours, 50);
186 break;
187 case Metric::COUNT:
188 NOTREACHED();
189 break;
190 }
191 }
192
193 double UserClassifier::GetEstimatedAvgTime(Metric metric) const {
194 DCHECK_NE(metric, Metric::COUNT);
195 double metric_value = GetUpToDateMetricValue(metric);
196 return GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_);
197 }
198
199 UserClassifier::UserClass UserClassifier::GetUserClass() const {
200 if (GetEstimatedAvgTime(Metric::NTP_OPENED) >=
201 kOccasionalUserOpensNTPAtMostOncePerHours) {
202 return UserClass::RARE_NTP_USER;
203 }
204
205 if (GetEstimatedAvgTime(Metric::SUGGESTIONS_SHOWN) <=
206 kFrequentUserScrollsAtLeastOncePerHours) {
207 return UserClass::ACTIVE_SUGGESTIONS_CONSUMER;
208 }
209
210 return UserClass::ACTIVE_NTP_USER;
211 }
212
213 std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
214 switch (GetUserClass()) {
215 case UserClass::RARE_NTP_USER:
216 return "Rare user of the NTP";
217 case UserClass::ACTIVE_NTP_USER:
218 return "Active user of the NTP";
219 case UserClass::ACTIVE_SUGGESTIONS_CONSUMER:
220 return "Active consumer of NTP suggestions";
221 }
222 NOTREACHED();
223 return "Unknown user class";
Bernhard Bauer 2016/09/20 15:59:54 Just return a std::string, as this is only there t
jkrcal 2016/09/21 08:58:46 Done.
224 }
225
226 void UserClassifier::ClearClassificationForDebugging() {
227 // The pref_service_ can be null in tests.
228 if (!pref_service_)
229 return;
230
231 for (const Metric& metric : kMetrics) {
232 ClearMetricValue(metric);
233 SetLastTimeToNow(metric);
234 }
235 }
236
237 double UserClassifier::UpdateMetricOnEvent(Metric metric) {
238 // The pref_service_ can be null in tests.
239 if (!pref_service_)
240 return 0;
241
242 double hours_since_last_time =
243 std::min(kMaxHours, GetHoursSinceLastTime(metric));
244 // Ignore events within the same "browsing session".
245 if (hours_since_last_time < kMinHours)
246 return GetUpToDateMetricValue(metric);
247
248 SetLastTimeToNow(metric);
249
250 double metric_value = GetMetricValue(metric);
251 double new_metric_value =
252 RecomputeMetric(metric_value, hours_since_last_time,
253 discount_rate_per_hour_, /*event_now=*/true);
254 SetMetricValue(metric, new_metric_value);
255 return new_metric_value;
256 }
257
258 double UserClassifier::GetUpToDateMetricValue(Metric metric) const {
259 // The pref_service_ can be null in tests.
260 if (!pref_service_)
261 return 0;
262
263 double hours_since_last_time =
264 std::min(kMaxHours, GetHoursSinceLastTime(metric));
265
266 double metric_value = GetMetricValue(metric);
267 return RecomputeMetric(metric_value, hours_since_last_time,
268 discount_rate_per_hour_, /*event_now=*/false);
269 }
270
271 double UserClassifier::GetHoursSinceLastTime(Metric metric) const {
272 if (!HasLastTime(metric))
273 return 0;
148 274
149 base::TimeDelta since_last_time = 275 base::TimeDelta since_last_time =
150 base::Time::Now() - base::Time::FromInternalValue( 276 base::Time::Now() - base::Time::FromInternalValue(pref_service_->GetInt64(
151 pref_service_->GetInt64(last_time_pref_name)); 277 kLastTimeKeys[static_cast<int>(metric)]));
152 return since_last_time.InSecondsF() / 3600; 278 return since_last_time.InSecondsF() / 3600;
153 } 279 }
154 280
155 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { 281 bool UserClassifier::HasLastTime(Metric metric) const {
156 pref_service_->SetInt64(last_time_pref_name, 282 return pref_service_->HasPrefPath(kLastTimeKeys[static_cast<int>(metric)]);
283 }
284
285 void UserClassifier::SetLastTimeToNow(Metric metric) {
286 pref_service_->SetInt64(kLastTimeKeys[static_cast<int>(metric)],
157 base::Time::Now().ToInternalValue()); 287 base::Time::Now().ToInternalValue());
158 } 288 }
159 289
290 double UserClassifier::GetMetricValue(Metric metric) const {
291 return pref_service_->GetDouble(kMetricKeys[static_cast<int>(metric)]);
292 }
293
294 void UserClassifier::SetMetricValue(Metric metric, double metric_value) {
295 pref_service_->SetDouble(kMetricKeys[static_cast<int>(metric)], metric_value);
296 }
297
298 void UserClassifier::ClearMetricValue(Metric metric) {
299 pref_service_->ClearPref(kMetricKeys[static_cast<int>(metric)]);
300 }
301
160 } // namespace ntp_snippets 302 } // namespace ntp_snippets
OLDNEW
« chrome/browser/resources/snippets_internals.html ('K') | « components/ntp_snippets/user_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698