Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(209)

Side by Side Diff: components/ntp_snippets/user_classifier.cc

Issue 2346263002: Extending the UserClassifier to actually support classification. (Closed)
Patch Set: Minor rebase & larger refactoring Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/ntp_snippets/user_classifier.h" 5 #include "components/ntp_snippets/user_classifier.h"
6 6
7 #include <float.h> 7 #include <float.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <string> 10 #include <string>
11 11
12 #include "base/metrics/histogram_macros.h" 12 #include "base/metrics/histogram_macros.h"
13 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_number_conversions.h"
14 #include "components/ntp_snippets/pref_names.h" 14 #include "components/ntp_snippets/pref_names.h"
15 #include "components/prefs/pref_registry_simple.h" 15 #include "components/prefs/pref_registry_simple.h"
16 #include "components/prefs/pref_service.h" 16 #include "components/prefs/pref_service.h"
17 17
18 namespace ntp_snippets {
19
18 namespace { 20 namespace {
19 21
20 // TODO(jkrcal): Make all of this configurable via variations_service. 22 // TODO(jkrcal): Make all of this configurable via variations_service.
21 23
22 // The discount factor for computing the discounted-average metrics. Must be 24 // The discount factor for computing the discounted-average metrics. Must be
23 // strictly larger than 0 and strictly smaller than 1! 25 // strictly larger than 0 and strictly smaller than 1!
24 const double kDiscountFactorPerDay = 0.25; 26 const double kDiscountFactorPerDay = 0.25;
25 27
26 // Never consider any larger interval than this (so that extreme situations such 28 // Never consider any larger interval than this (so that extreme situations such
27 // as losing your phone or going for a long offline vacation do not skew the 29 // as losing your phone or going for a long offline vacation do not skew the
28 // average too much). 30 // average too much).
29 const double kMaxHours = 7 * 24; 31 const double kMaxHours = 7 * 24;
30 32
31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is 33 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is
32 // the length of the browsing session where subsequent events of the same type 34 // the length of the browsing session where subsequent events of the same type
33 // do not count again). 35 // do not count again).
34 const double kMinHours = 0.5; 36 const double kMinHours = 0.5;
35 37
38 // Classification constants.
39 const double kFrequentUserScrollsAtLeastOncePerHours = 24;
40 const double kOccasionalUserOpensNTPAtMostOncePerHours = 72;
41
36 const char kHistogramAverageHoursToOpenNTP[] = 42 const char kHistogramAverageHoursToOpenNTP[] =
37 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; 43 "NewTabPage.UserClassifier.AverageHoursToOpenNTP";
38 const char kHistogramAverageHoursToShowSuggestions[] = 44 const char kHistogramAverageHoursToShowSuggestions[] =
39 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; 45 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions";
40 const char kHistogramAverageHoursToUseSuggestions[] = 46 const char kHistogramAverageHoursToUseSuggestions[] =
41 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; 47 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
42 48
43 } // namespace 49 // The summary of the prefs.
44 50 const char* kMetricKeys[] = {
45 namespace ntp_snippets { 51 prefs::kUserClassifierAverageNTPOpenedPerHour,
46 52 prefs::kUserClassifierAverageSuggestionsShownPerHour,
47 UserClassifier::UserClassifier(PrefService* pref_service) 53 prefs::kUserClassifierAverageSuggestionsUsedPerHour};
48 : pref_service_(pref_service), 54 const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP,
49 // Compute discount_rate_per_hour such that 55 prefs::kUserClassifierLastTimeToShowSuggestions,
50 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. 56 prefs::kUserClassifierLastTimeToUseSuggestions};
51 discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {} 57
52 58 // Default lengths of the intervals for new users for the metrics.
53 UserClassifier::~UserClassifier() {} 59 const double kDefaults[] = {24, 36, 48};
Marc Treib 2016/09/20 10:27:03 optional: add some static_asserts to make sure the
jkrcal 2016/09/20 13:10:13 Done.
54 60
55 // static 61 // Computes the discount rate.
56 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { 62 double
57 registry->RegisterDoublePref( 63 GetDiscountRatePerHour() {
Marc Treib 2016/09/20 10:27:03 extra line break
jkrcal 2016/09/20 13:10:13 Done.
58 prefs::kUserClassifierAverageNTPOpenedPerHour, 1); 64 static double discount_rate_per_hour = 0.0;
59 registry->RegisterDoublePref( 65
60 prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); 66 if (discount_rate_per_hour == 0.0) {
tschumann 2016/09/20 11:32:47 what's the purpose of this? Do you want to initia
jkrcal 2016/09/20 13:10:13 It was initialization, I removed the static var, a
61 registry->RegisterDoublePref( 67 // Compute discount_rate_per_hour such that
62 prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); 68 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
63 69 discount_rate_per_hour =
64 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); 70 std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0;
65 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, 71 }
66 0); 72
67 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, 73 return discount_rate_per_hour;
68 0); 74 }
69 } 75
70 76 // Returns the new value of the metric using its |old_value|, assuming
71 void UserClassifier::OnNTPOpened() { 77 // |hours_since_last_time| hours have passed since it was last recomputed.
72 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, 78 // If |event_now| is true, the event is assumed to have happened right now,
73 prefs::kUserClassifierLastTimeToOpenNTP); 79 // otherwise no event is assumed to happen within the last
74 80 // |hours_since_last_time| hours.
75 double avg = GetEstimateHoursBetweenEvents( 81 double RecomputeMetric(double old_value,
76 prefs::kUserClassifierAverageNTPOpenedPerHour); 82 double hours_since_last_time,
77 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, 83 bool event_now) {
78 kMaxHours, 50);
79 }
80
81 void UserClassifier::OnSuggestionsShown() {
82 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour,
83 prefs::kUserClassifierLastTimeToShowSuggestions);
84
85 double avg = GetEstimateHoursBetweenEvents(
86 prefs::kUserClassifierAverageSuggestionsShownPerHour);
87 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1,
88 kMaxHours, 50);
89 }
90
91 void UserClassifier::OnSuggestionsUsed() {
92 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour,
93 prefs::kUserClassifierLastTimeToUseSuggestions);
94
95 double avg = GetEstimateHoursBetweenEvents(
96 prefs::kUserClassifierAverageSuggestionsUsedPerHour);
97 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1,
98 kMaxHours, 50);
99 }
100
101 void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
102 const char* last_time_pref_name) {
103 if (!pref_service_)
104 return;
105
106 double hours_since_last_time =
107 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
108 // Ignore events within the same "browsing session".
109 if (hours_since_last_time < kMinHours)
110 return;
111 SetLastTimeToNow(last_time_pref_name);
112
113 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
114 // Compute and store the new discounted average according to the formula 84 // Compute and store the new discounted average according to the formula
115 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. 85 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
116 double new_avg_events_per_hour = 86 return (event_now ? 1 : 0) +
117 1 + 87 std::exp(-GetDiscountRatePerHour() * hours_since_last_time) *
118 std::exp(-discount_rate_per_hour_ * hours_since_last_time) * 88 old_value;
119 avg_events_per_hour; 89 }
120 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); 90
121 } 91 // Compute the number of hours between two events for the given metric value
122 92 // assuming the events were equally distributed.
123 double UserClassifier::GetEstimateHoursBetweenEvents( 93 double GetEstimateHoursBetweenEvents(const double metric_value) {
124 const char* metric_pref_name) {
125 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
126
127 // Right after the first update, the metric is equal to 1. 94 // Right after the first update, the metric is equal to 1.
128 if (avg_events_per_hour <= 1) 95 if (metric_value <= 1)
tschumann 2016/09/20 11:32:47 maybe adjust the comment to also explain why this
jkrcal 2016/09/20 13:10:13 Done.
129 return kMaxHours; 96 return kMaxHours;
130 97
131 // This is the estimate with the assumption that last event happened right 98 // This is the estimate with the assumption that last event happened right
132 // now and the system is in the steady-state. Solve estimate_hours in the 99 // now and the system is in the steady-state. Solve estimate_hours in the
133 // steady-state equation: 100 // steady-state equation:
134 // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events, 101 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
135 // i.e. 102 // i.e.
136 // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events), 103 // -discount_rate * estimate_hours = log((metric_value - 1) / metric_value),
137 // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)), 104 // discount_rate * estimate_hours = log(metric_value / (metric_value - 1)),
138 // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate. 105 // estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate.
139 return std::min(kMaxHours, 106 double estimate_hours =
140 std::log(avg_events_per_hour / (avg_events_per_hour - 1)) / 107 std::log(metric_value / (metric_value - 1)) / GetDiscountRatePerHour();
141 discount_rate_per_hour_); 108 return std::max(kMinHours, std::min(kMaxHours, estimate_hours));
142 } 109 }
143 110
144 double UserClassifier::GetHoursSinceLastTime( 111 // The inverse of GetEstimateHoursBetweenEvents().
145 const char* last_time_pref_name) { 112 double GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours) {
146 if (!pref_service_->HasPrefPath(last_time_pref_name)) 113 // Keep the input value within [kMinHours, kMaxHours].
147 return DBL_MAX; 114 estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours));
115
116 // Return |metric_value| such that GetEstimateHoursBetweenEvents for
117 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
118 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
119 // i.e.
120 // metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1,
121 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
122 return 1.0 / (1.0 - std::exp(-GetDiscountRatePerHour() * estimate_hours));
123 }
124
125 } // namespace
126
127 // static
128 const UserClassifier::Metric UserClassifier::kMetrics[3] = {
Marc Treib 2016/09/20 10:27:03 Is the "3" required?
jkrcal 2016/09/20 13:10:13 Done.
129 Metric::OPEN_NTP, Metric::SHOW_SUGGESTIONS, Metric::USE_SUGGESTIONS};
130
131 UserClassifier::UserClassifier(PrefService* pref_service)
132 : pref_service_(pref_service) {
133 // The pref_service_ can be null in tests.
134 if (!pref_service_)
135 return;
136
137 // Initialize the prefs storing the last time: the counter has just started!
138 for (const Metric metric : kMetrics) {
139 if (!HasLastTime(metric))
140 SetLastTimeToNow(metric);
141 }
142 }
143
144 UserClassifier::~UserClassifier() {}
145
146 // static
147 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
148 for (const Metric metric : kMetrics) {
149 registry->RegisterDoublePref(kMetricKeys[metric],
150 GetMetricValueForEstimateHoursBetweenEvents(
151 kDefaults[metric]));
152 registry->RegisterInt64Pref(kLastTimeKeys[metric], 0);
153 }
154 }
155
156 void UserClassifier::OnNTPOpened() {
157 double metric = UpdateMetricOnEvent(Metric::OPEN_NTP);
158
159 double avg = GetEstimateHoursBetweenEvents(metric);
160 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
161 kMaxHours, 50);
162 }
163
164 void UserClassifier::OnSuggestionsShown() {
165 double metric = UpdateMetricOnEvent(Metric::SHOW_SUGGESTIONS);
166
167 double avg = GetEstimateHoursBetweenEvents(metric);
168 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1,
169 kMaxHours, 50);
170 }
171
172 void UserClassifier::OnSuggestionsUsed() {
173 double metric = UpdateMetricOnEvent(Metric::USE_SUGGESTIONS);
174
175 double avg = GetEstimateHoursBetweenEvents(metric);
176 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1,
177 kMaxHours, 50);
178 }
179
180 double UserClassifier::GetEstimatedAvgTimeToOpenNTP() const {
Marc Treib 2016/09/20 10:27:03 Are the three GetEstimatedAvgTimeTo... methods sti
jkrcal 2016/09/20 13:10:13 Done.
181 return GetEstimatedAvgTime(Metric::OPEN_NTP);
182 }
183
184 double UserClassifier::GetEstimatedAvgTimeToShowSuggestions() const {
185 return GetEstimatedAvgTime(Metric::SHOW_SUGGESTIONS);
186 }
187
188 double UserClassifier::GetEstimatedAvgTimeToUseSuggestions() const {
189 return GetEstimatedAvgTime(Metric::USE_SUGGESTIONS);
190 }
191
192 UserClassifier::UserClass UserClassifier::GetUserClass() const {
193 if (GetEstimatedAvgTimeToOpenNTP() >=
194 kOccasionalUserOpensNTPAtMostOncePerHours) {
195 return UserClass::OCCASIONAL_NTP_USER;
196 }
197
198 if (GetEstimatedAvgTimeToUseSuggestions() <=
199 kFrequentUserScrollsAtLeastOncePerHours) {
200 return UserClass::FREQUENT_NTP_USER;
201 }
202
203 return UserClass::NORMAL_NTP_USER;
204 }
205
206 std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
207 switch (GetUserClass()) {
208 case UserClass::OCCASIONAL_NTP_USER:
209 return "Occasional user of the NTP";
210 case UserClass::NORMAL_NTP_USER:
211 return "Normal user of the NTP";
212 case UserClass::FREQUENT_NTP_USER:
213 return "Frequent user of the NTP";
214 }
215 NOTREACHED();
216 return "Unknown user class";
217 }
218
219 void UserClassifier::ClearClassificationForDebugging() {
220 // The pref_service_ can be null in tests.
221 if (!pref_service_)
222 return;
223
224 for (const Metric& metric : kMetrics) {
225 ClearMetricValue(metric);
226 SetLastTimeToNow(metric);
227 }
228 }
229
230 double UserClassifier::GetEstimatedAvgTime(Metric metric) const {
231 double metric_value = GetUpToDateMetricValue(metric);
232 return GetEstimateHoursBetweenEvents(metric_value);
233 }
234
235 double UserClassifier::UpdateMetricOnEvent(Metric metric) {
236 // The pref_service_ can be null in tests.
237 if (!pref_service_)
238 return 0;
239
240 double hours_since_last_time =
241 std::min(kMaxHours, GetHoursSinceLastTime(metric));
242 // Ignore events within the same "browsing session".
243 if (hours_since_last_time < kMinHours)
244 return GetUpToDateMetricValue(metric);
245
246 SetLastTimeToNow(metric);
247
248 double metric_value = GetMetricValue(metric);
249 double new_metric_value = RecomputeMetric(metric_value, hours_since_last_time,
250 true /* event_now */);
Marc Treib 2016/09/20 10:27:03 nit: Preferred style is /*event_now=*/true
jkrcal 2016/09/20 13:10:13 I recently had a CL discussion with Bernhard: -
Marc Treib 2016/09/20 13:26:52 Tim suggested the above format, because for intern
jkrcal 2016/09/20 13:46:39 Done.
Bernhard Bauer 2016/09/20 15:59:54 Weeeelll… If that tool needs to be written / porte
251 SetMetricValue(metric, new_metric_value);
252 return new_metric_value;
253 }
254
255 double UserClassifier::GetUpToDateMetricValue(Metric metric) const {
256 // The pref_service_ can be null in tests.
257 if (!pref_service_)
258 return 0;
259
260 double hours_since_last_time =
261 std::min(kMaxHours, GetHoursSinceLastTime(metric));
262
263 double metric_value = GetMetricValue(metric);
264 return RecomputeMetric(metric_value, hours_since_last_time,
265 false /* event_now */);
266 }
267
268 double UserClassifier::GetHoursSinceLastTime(Metric metric) const {
269 DCHECK(pref_service_);
270 if (!HasLastTime(metric))
271 return 0;
148 272
149 base::TimeDelta since_last_time = 273 base::TimeDelta since_last_time =
150 base::Time::Now() - base::Time::FromInternalValue( 274 base::Time::Now() - base::Time::FromInternalValue(
151 pref_service_->GetInt64(last_time_pref_name)); 275 pref_service_->GetInt64(kLastTimeKeys[metric]));
152 return since_last_time.InSecondsF() / 3600; 276 return since_last_time.InSecondsF() / 3600;
153 } 277 }
154 278
155 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { 279 bool UserClassifier::HasLastTime(const Metric metric) const {
156 pref_service_->SetInt64(last_time_pref_name, 280 DCHECK(pref_service_);
Marc Treib 2016/09/20 10:27:03 These DCHECKs aren't really helpful: If it were nu
jkrcal 2016/09/20 13:10:13 Done.
281 return pref_service_->HasPrefPath(kLastTimeKeys[metric]);
282 }
283
284 void UserClassifier::SetLastTimeToNow(Metric metric) {
285 DCHECK(pref_service_);
286 pref_service_->SetInt64(kLastTimeKeys[metric],
157 base::Time::Now().ToInternalValue()); 287 base::Time::Now().ToInternalValue());
158 } 288 }
159 289
290 double UserClassifier::GetMetricValue(const Metric metric) const {
291 DCHECK(pref_service_);
292 return pref_service_->GetDouble(kMetricKeys[metric]);
293 }
294
295 void UserClassifier::SetMetricValue(const Metric metric, double metric_value) {
296 DCHECK(pref_service_);
297 pref_service_->SetDouble(kMetricKeys[metric], metric_value);
298 }
299
300 void UserClassifier::ClearMetricValue(const Metric metric) {
301 DCHECK(pref_service_);
302 pref_service_->ClearPref(kMetricKeys[metric]);
303 }
304
160 } // namespace ntp_snippets 305 } // namespace ntp_snippets
OLDNEW
« components/ntp_snippets/user_classifier.h ('K') | « components/ntp_snippets/user_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698