Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Side by Side Diff: components/ntp_snippets/user_classifier.cc

Issue 2346263002: Extending the UserClassifier to actually support classification. (Closed)
Patch Set: Comments of Tim and Marc Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/ntp_snippets/user_classifier.h" 5 #include "components/ntp_snippets/user_classifier.h"
6 6
7 #include <float.h> 7 #include <float.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <string> 10 #include <string>
11 11
12 #include "base/metrics/histogram_macros.h" 12 #include "base/metrics/histogram_macros.h"
13 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_number_conversions.h"
14 #include "components/ntp_snippets/pref_names.h" 14 #include "components/ntp_snippets/pref_names.h"
15 #include "components/prefs/pref_registry_simple.h" 15 #include "components/prefs/pref_registry_simple.h"
16 #include "components/prefs/pref_service.h" 16 #include "components/prefs/pref_service.h"
17 17
18 namespace ntp_snippets {
19
18 namespace { 20 namespace {
19 21
20 // TODO(jkrcal): Make all of this configurable via variations_service. 22 // TODO(jkrcal): Make all of this configurable via variations_service.
21 23
22 // The discount factor for computing the discounted-average metrics. Must be 24 // The discount factor for computing the discounted-average metrics. Must be
23 // strictly larger than 0 and strictly smaller than 1! 25 // strictly larger than 0 and strictly smaller than 1!
24 const double kDiscountFactorPerDay = 0.25; 26 const double kDiscountFactorPerDay = 0.25;
25 27
26 // Never consider any larger interval than this (so that extreme situations such 28 // Never consider any larger interval than this (so that extreme situations such
27 // as losing your phone or going for a long offline vacation do not skew the 29 // as losing your phone or going for a long offline vacation do not skew the
28 // average too much). 30 // average too much).
29 const double kMaxHours = 7 * 24; 31 const double kMaxHours = 7 * 24;
30 32
31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is 33 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is
32 // the length of the browsing session where subsequent events of the same type 34 // the length of the browsing session where subsequent events of the same type
33 // do not count again). 35 // do not count again).
34 const double kMinHours = 0.5; 36 const double kMinHours = 0.5;
35 37
38 // Classification constants.
39 const double kFrequentUserScrollsAtLeastOncePerHours = 24;
40 const double kOccasionalUserOpensNTPAtMostOncePerHours = 72;
41
36 const char kHistogramAverageHoursToOpenNTP[] = 42 const char kHistogramAverageHoursToOpenNTP[] =
37 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; 43 "NewTabPage.UserClassifier.AverageHoursToOpenNTP";
38 const char kHistogramAverageHoursToShowSuggestions[] = 44 const char kHistogramAverageHoursToShowSuggestions[] =
39 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; 45 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions";
40 const char kHistogramAverageHoursToUseSuggestions[] = 46 const char kHistogramAverageHoursToUseSuggestions[] =
41 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; 47 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
42 48
43 } // namespace 49 // The enum used for iteration.
44 50 const UserClassifier::Metric kMetrics[] = {
45 namespace ntp_snippets { 51 UserClassifier::Metric::NTP_OPENED,
46 52 UserClassifier::Metric::SUGGESTIONS_SHOWN,
47 UserClassifier::UserClassifier(PrefService* pref_service) 53 UserClassifier::Metric::SUGGESTIONS_USED};
48 : pref_service_(pref_service), 54
49 // Compute discount_rate_per_hour such that 55 // The summary of the prefs.
50 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. 56 const char* kMetricKeys[] = {
51 discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {} 57 prefs::kUserClassifierAverageNTPOpenedPerHour,
52 58 prefs::kUserClassifierAverageSuggestionsShownPerHour,
53 UserClassifier::~UserClassifier() {} 59 prefs::kUserClassifierAverageSuggestionsUsedPerHour};
54 60 const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP,
55 // static 61 prefs::kUserClassifierLastTimeToShowSuggestions,
56 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { 62 prefs::kUserClassifierLastTimeToUseSuggestions};
57 registry->RegisterDoublePref( 63
58 prefs::kUserClassifierAverageNTPOpenedPerHour, 1); 64 // Default lengths of the intervals for new users for the metrics.
59 registry->RegisterDoublePref( 65 const double kDefaults[] = {24, 36, 48};
60 prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); 66
61 registry->RegisterDoublePref( 67 static_assert(arraysize(kMetrics) == UserClassifier::Metric::COUNT &&
62 prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); 68 arraysize(kMetricKeys) == UserClassifier::Metric::COUNT &&
63 69 arraysize(kLastTimeKeys) == UserClassifier::Metric::COUNT &&
64 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); 70 arraysize(kDefaults) == UserClassifier::Metric::COUNT,
65 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, 71 "Fill in info for all metrics.");
66 0); 72
67 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, 73 // Computes the discount rate.
68 0); 74 double GetDiscountRatePerHour() {
69 } 75 // Compute discount_rate_per_hour such that
70 76 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
71 void UserClassifier::OnNTPOpened() { 77 return std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0;
72 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, 78 }
73 prefs::kUserClassifierLastTimeToOpenNTP); 79
74 80 // Returns the new value of the metric using its |old_value|, assuming
75 double avg = GetEstimateHoursBetweenEvents( 81 // |hours_since_last_time| hours have passed since it was last recomputed.
76 prefs::kUserClassifierAverageNTPOpenedPerHour); 82 // If |event_now| is true, the event is assumed to have happened right now,
77 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, 83 // otherwise no event is assumed to happen within the last
78 kMaxHours, 50); 84 // |hours_since_last_time| hours.
79 } 85 double RecomputeMetric(double old_value,
80 86 double hours_since_last_time,
81 void UserClassifier::OnSuggestionsShown() { 87 double discount_rate_per_hour,
82 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, 88 bool event_now) {
83 prefs::kUserClassifierLastTimeToShowSuggestions);
84
85 double avg = GetEstimateHoursBetweenEvents(
86 prefs::kUserClassifierAverageSuggestionsShownPerHour);
87 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1,
88 kMaxHours, 50);
89 }
90
91 void UserClassifier::OnSuggestionsUsed() {
92 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour,
93 prefs::kUserClassifierLastTimeToUseSuggestions);
94
95 double avg = GetEstimateHoursBetweenEvents(
96 prefs::kUserClassifierAverageSuggestionsUsedPerHour);
97 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1,
98 kMaxHours, 50);
99 }
100
101 void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
102 const char* last_time_pref_name) {
103 if (!pref_service_)
104 return;
105
106 double hours_since_last_time =
107 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
108 // Ignore events within the same "browsing session".
109 if (hours_since_last_time < kMinHours)
110 return;
111 SetLastTimeToNow(last_time_pref_name);
112
113 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
114 // Compute and store the new discounted average according to the formula 89 // Compute and store the new discounted average according to the formula
115 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. 90 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
116 double new_avg_events_per_hour = 91 return (event_now ? 1 : 0) +
117 1 + 92 std::exp(-discount_rate_per_hour * hours_since_last_time) *
118 std::exp(-discount_rate_per_hour_ * hours_since_last_time) * 93 old_value;
119 avg_events_per_hour; 94 }
120 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); 95
121 } 96 // Compute the number of hours between two events for the given metric value
122 97 // assuming the events were equally distributed.
123 double UserClassifier::GetEstimateHoursBetweenEvents( 98 double GetEstimateHoursBetweenEvents(double metric_value,
124 const char* metric_pref_name) { 99 double discount_rate_per_hour) {
125 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); 100 // The computation below is well-defined only for |metric_value| > 1 (log of
126 101 // negative value or division by zero). When |metric_value| -> 1, the estimate
127 // Right after the first update, the metric is equal to 1. 102 // below -> infinity, so kMaxHours is a natural result, here.
128 if (avg_events_per_hour <= 1) 103 if (metric_value <= 1)
129 return kMaxHours; 104 return kMaxHours;
130 105
131 // This is the estimate with the assumption that last event happened right 106 // This is the estimate with the assumption that last event happened right
132 // now and the system is in the steady-state. Solve estimate_hours in the 107 // now and the system is in the steady-state. Solve estimate_hours in the
133 // steady-state equation: 108 // steady-state equation:
134 // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events, 109 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
135 // i.e. 110 // i.e.
136 // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events), 111 // -discount_rate * estimate_hours = log((metric_value - 1) / metric_value),
137 // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)), 112 // discount_rate * estimate_hours = log(metric_value / (metric_value - 1)),
138 // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate. 113 // estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate.
139 return std::min(kMaxHours, 114 double estimate_hours =
140 std::log(avg_events_per_hour / (avg_events_per_hour - 1)) / 115 std::log(metric_value / (metric_value - 1)) / discount_rate_per_hour;
141 discount_rate_per_hour_); 116 return std::max(kMinHours, std::min(kMaxHours, estimate_hours));
142 } 117 }
143 118
144 double UserClassifier::GetHoursSinceLastTime( 119 // The inverse of GetEstimateHoursBetweenEvents().
145 const char* last_time_pref_name) { 120 double GetMetricValueForEstimateHoursBetweenEvents(
146 if (!pref_service_->HasPrefPath(last_time_pref_name)) 121 double estimate_hours,
147 return DBL_MAX; 122 double discount_rate_per_hour) {
123 // Keep the input value within [kMinHours, kMaxHours].
124 estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours));
125
126 // Return |metric_value| such that GetEstimateHoursBetweenEvents for
127 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
128 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
129 // i.e.
130 // metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1,
131 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
132 return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours));
133 }
134
135 } // namespace
136
137 UserClassifier::UserClassifier(PrefService* pref_service)
138 : pref_service_(pref_service),
139 discount_rate_per_hour_(GetDiscountRatePerHour()) {
140 // The pref_service_ can be null in tests.
141 if (!pref_service_)
142 return;
143
144 // Initialize the prefs storing the last time: the counter has just started!
145 for (const Metric metric : kMetrics) {
146 if (!HasLastTime(metric))
147 SetLastTimeToNow(metric);
148 }
149 }
150
151 UserClassifier::~UserClassifier() {}
152
153 // static
154 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
155 for (Metric metric : kMetrics) {
156 registry->RegisterDoublePref(
157 kMetricKeys[metric], GetMetricValueForEstimateHoursBetweenEvents(
158 kDefaults[metric], GetDiscountRatePerHour()));
159 registry->RegisterInt64Pref(kLastTimeKeys[metric], 0);
160 }
161 }
162
163 void UserClassifier::OnEvent(Metric metric) {
164 DCHECK(0 <= metric && metric < Metric::COUNT);
165 double metric_value = UpdateMetricOnEvent(metric);
166
167 double avg =
168 GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_);
169 switch (metric) {
170 case Metric::NTP_OPENED:
171 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
172 kMaxHours, 50);
173 break;
174 case Metric::SUGGESTIONS_SHOWN:
175 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg,
176 1, kMaxHours, 50);
177 break;
178 case Metric::SUGGESTIONS_USED:
179 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg,
180 1, kMaxHours, 50);
181 break;
182 case Metric::COUNT:
183 break;
Marc Treib 2016/09/20 13:26:52 NOTREACHED() ?
jkrcal 2016/09/20 13:46:39 Done.
184 }
185 }
186
187 double UserClassifier::GetEstimatedAvgTime(Metric metric) const {
188 DCHECK(0 <= metric && metric < Metric::COUNT);
Marc Treib 2016/09/20 13:26:52 IMO "DCHECK_NE(metric, Metric::COUNT)" would be en
jkrcal 2016/09/20 13:46:39 Done. Are enum values always non-negative?
189 double metric_value = GetUpToDateMetricValue(metric);
190 return GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_);
191 }
192
193 UserClassifier::UserClass UserClassifier::GetUserClass() const {
194 if (GetEstimatedAvgTime(Metric::NTP_OPENED) >=
195 kOccasionalUserOpensNTPAtMostOncePerHours) {
196 return UserClass::RARE_NTP_USER;
197 }
198
199 if (GetEstimatedAvgTime(Metric::SUGGESTIONS_SHOWN) <=
200 kFrequentUserScrollsAtLeastOncePerHours) {
201 return UserClass::FREQUENT_NTP_USER;
202 }
203
204 return UserClass::ORDINARY_NTP_USER;
205 }
206
207 std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
208 switch (GetUserClass()) {
209 case UserClass::RARE_NTP_USER:
210 return "Rare user of the NTP";
211 case UserClass::ORDINARY_NTP_USER:
212 return "Normal user of the NTP";
Marc Treib 2016/09/20 13:26:52 "Ordinary" (or whatever it becomes), to match the
jkrcal 2016/09/20 13:46:39 Done.
213 case UserClass::FREQUENT_NTP_USER:
214 return "Frequent user of the NTP";
215 }
216 NOTREACHED();
217 return "Unknown user class";
218 }
219
220 void UserClassifier::ClearClassificationForDebugging() {
221 // The pref_service_ can be null in tests.
222 if (!pref_service_)
223 return;
224
225 for (const Metric& metric : kMetrics) {
226 ClearMetricValue(metric);
227 SetLastTimeToNow(metric);
228 }
229 }
230
231 double UserClassifier::UpdateMetricOnEvent(Metric metric) {
232 // The pref_service_ can be null in tests.
233 if (!pref_service_)
234 return 0;
235
236 double hours_since_last_time =
237 std::min(kMaxHours, GetHoursSinceLastTime(metric));
238 // Ignore events within the same "browsing session".
239 if (hours_since_last_time < kMinHours)
240 return GetUpToDateMetricValue(metric);
241
242 SetLastTimeToNow(metric);
243
244 double metric_value = GetMetricValue(metric);
245 double new_metric_value =
246 RecomputeMetric(metric_value, hours_since_last_time,
247 discount_rate_per_hour_, true /* event_now */);
248 SetMetricValue(metric, new_metric_value);
249 return new_metric_value;
250 }
251
252 double UserClassifier::GetUpToDateMetricValue(Metric metric) const {
253 // The pref_service_ can be null in tests.
254 if (!pref_service_)
255 return 0;
256
257 double hours_since_last_time =
258 std::min(kMaxHours, GetHoursSinceLastTime(metric));
259
260 double metric_value = GetMetricValue(metric);
261 return RecomputeMetric(metric_value, hours_since_last_time,
262 discount_rate_per_hour_, false /* event_now */);
263 }
264
265 double UserClassifier::GetHoursSinceLastTime(Metric metric) const {
266 if (!HasLastTime(metric))
267 return 0;
148 268
149 base::TimeDelta since_last_time = 269 base::TimeDelta since_last_time =
150 base::Time::Now() - base::Time::FromInternalValue( 270 base::Time::Now() - base::Time::FromInternalValue(
151 pref_service_->GetInt64(last_time_pref_name)); 271 pref_service_->GetInt64(kLastTimeKeys[metric]));
152 return since_last_time.InSecondsF() / 3600; 272 return since_last_time.InSecondsF() / 3600;
153 } 273 }
154 274
155 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { 275 bool UserClassifier::HasLastTime(const Metric metric) const {
156 pref_service_->SetInt64(last_time_pref_name, 276 return pref_service_->HasPrefPath(kLastTimeKeys[metric]);
277 }
278
279 void UserClassifier::SetLastTimeToNow(Metric metric) {
280 pref_service_->SetInt64(kLastTimeKeys[metric],
157 base::Time::Now().ToInternalValue()); 281 base::Time::Now().ToInternalValue());
158 } 282 }
159 283
284 double UserClassifier::GetMetricValue(const Metric metric) const {
285 return pref_service_->GetDouble(kMetricKeys[metric]);
286 }
287
288 void UserClassifier::SetMetricValue(const Metric metric, double metric_value) {
289 pref_service_->SetDouble(kMetricKeys[metric], metric_value);
290 }
291
292 void UserClassifier::ClearMetricValue(const Metric metric) {
293 pref_service_->ClearPref(kMetricKeys[metric]);
294 }
295
160 } // namespace ntp_snippets 296 } // namespace ntp_snippets
OLDNEW
« components/ntp_snippets/user_classifier.h ('K') | « components/ntp_snippets/user_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698