Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(322)

Side by Side Diff: components/ntp_snippets/user_classifier.cc

Issue 2346263002: Extending the UserClassifier to actually support classification. (Closed)
Patch Set: Bernhard's comments Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/ntp_snippets/user_classifier.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/ntp_snippets/user_classifier.h" 5 #include "components/ntp_snippets/user_classifier.h"
6 6
7 #include <float.h> 7 #include <float.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <string> 10 #include <string>
11 11
12 #include "base/metrics/histogram_macros.h" 12 #include "base/metrics/histogram_macros.h"
13 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_number_conversions.h"
14 #include "components/ntp_snippets/pref_names.h" 14 #include "components/ntp_snippets/pref_names.h"
15 #include "components/prefs/pref_registry_simple.h" 15 #include "components/prefs/pref_registry_simple.h"
16 #include "components/prefs/pref_service.h" 16 #include "components/prefs/pref_service.h"
17 17
18 namespace ntp_snippets {
19
18 namespace { 20 namespace {
19 21
20 // TODO(jkrcal): Make all of this configurable via variations_service. 22 // TODO(jkrcal): Make all of this configurable via variations_service.
21 23
22 // The discount factor for computing the discounted-average metrics. Must be 24 // The discount factor for computing the discounted-average metrics. Must be
23 // strictly larger than 0 and strictly smaller than 1! 25 // strictly larger than 0 and strictly smaller than 1!
24 const double kDiscountFactorPerDay = 0.25; 26 const double kDiscountFactorPerDay = 0.25;
25 27
26 // Never consider any larger interval than this (so that extreme situations such 28 // Never consider any larger interval than this (so that extreme situations such
27 // as losing your phone or going for a long offline vacation do not skew the 29 // as losing your phone or going for a long offline vacation do not skew the
28 // average too much). 30 // average too much).
29 const double kMaxHours = 7 * 24; 31 const double kMaxHours = 7 * 24;
30 32
31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is 33 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is
32 // the length of the browsing session where subsequent events of the same type 34 // the length of the browsing session where subsequent events of the same type
33 // do not count again). 35 // do not count again).
34 const double kMinHours = 0.5; 36 const double kMinHours = 0.5;
35 37
38 // Classification constants.
39 const double kFrequentUserScrollsAtLeastOncePerHours = 24;
40 const double kOccasionalUserOpensNTPAtMostOncePerHours = 72;
41
36 const char kHistogramAverageHoursToOpenNTP[] = 42 const char kHistogramAverageHoursToOpenNTP[] =
37 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; 43 "NewTabPage.UserClassifier.AverageHoursToOpenNTP";
38 const char kHistogramAverageHoursToShowSuggestions[] = 44 const char kHistogramAverageHoursToShowSuggestions[] =
39 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; 45 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions";
40 const char kHistogramAverageHoursToUseSuggestions[] = 46 const char kHistogramAverageHoursToUseSuggestions[] =
41 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; 47 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
42 48
43 } // namespace 49 // The enum used for iteration.
44 50 const UserClassifier::Metric kMetrics[] = {
45 namespace ntp_snippets { 51 UserClassifier::Metric::NTP_OPENED,
46 52 UserClassifier::Metric::SUGGESTIONS_SHOWN,
47 UserClassifier::UserClassifier(PrefService* pref_service) 53 UserClassifier::Metric::SUGGESTIONS_USED};
48 : pref_service_(pref_service), 54
49 // Compute discount_rate_per_hour such that 55 // The summary of the prefs.
50 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. 56 const char* kMetricKeys[] = {
51 discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {} 57 prefs::kUserClassifierAverageNTPOpenedPerHour,
52 58 prefs::kUserClassifierAverageSuggestionsShownPerHour,
53 UserClassifier::~UserClassifier() {} 59 prefs::kUserClassifierAverageSuggestionsUsedPerHour};
54 60 const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP,
55 // static 61 prefs::kUserClassifierLastTimeToShowSuggestions,
56 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { 62 prefs::kUserClassifierLastTimeToUseSuggestions};
57 registry->RegisterDoublePref( 63
58 prefs::kUserClassifierAverageNTPOpenedPerHour, 1); 64 // Default lengths of the intervals for new users for the metrics.
59 registry->RegisterDoublePref( 65 const double kDefaults[] = {24, 36, 48};
60 prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); 66
61 registry->RegisterDoublePref( 67 static_assert(arraysize(kMetrics) ==
62 prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); 68 static_cast<int>(UserClassifier::Metric::COUNT) &&
63 69 arraysize(kMetricKeys) ==
64 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); 70 static_cast<int>(UserClassifier::Metric::COUNT) &&
65 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, 71 arraysize(kLastTimeKeys) ==
66 0); 72 static_cast<int>(UserClassifier::Metric::COUNT) &&
67 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, 73 arraysize(kDefaults) ==
68 0); 74 static_cast<int>(UserClassifier::Metric::COUNT),
69 } 75 "Fill in info for all metrics.");
70 76
71 void UserClassifier::OnNTPOpened() { 77 // Computes the discount rate.
72 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, 78 double GetDiscountRatePerHour() {
73 prefs::kUserClassifierLastTimeToOpenNTP); 79 // Compute discount_rate_per_hour such that
74 80 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
75 double avg = GetEstimateHoursBetweenEvents( 81 return std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0;
76 prefs::kUserClassifierAverageNTPOpenedPerHour); 82 }
77 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, 83
78 kMaxHours, 50); 84 // Returns the new value of the metric using its |old_value|, assuming
79 } 85 // |hours_since_last_time| hours have passed since it was last discounted.
80 86 double DiscountMetric(double old_value,
81 void UserClassifier::OnSuggestionsShown() { 87 double hours_since_last_time,
82 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, 88 double discount_rate_per_hour) {
83 prefs::kUserClassifierLastTimeToShowSuggestions); 89 // Compute the new discounted average according to the formula
84 90 // avg_events := e^{-discount_rate_per_hour * hours_since} * avg_events
85 double avg = GetEstimateHoursBetweenEvents( 91 return std::exp(-discount_rate_per_hour * hours_since_last_time) * old_value;
86 prefs::kUserClassifierAverageSuggestionsShownPerHour); 92 }
87 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1, 93
88 kMaxHours, 50); 94 // Compute the number of hours between two events for the given metric value
89 } 95 // assuming the events were equally distributed.
90 96 double GetEstimateHoursBetweenEvents(double metric_value,
91 void UserClassifier::OnSuggestionsUsed() { 97 double discount_rate_per_hour) {
92 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour, 98 // The computation below is well-defined only for |metric_value| > 1 (log of
93 prefs::kUserClassifierLastTimeToUseSuggestions); 99 // negative value or division by zero). When |metric_value| -> 1, the estimate
94 100 // below -> infinity, so kMaxHours is a natural result, here.
95 double avg = GetEstimateHoursBetweenEvents( 101 if (metric_value <= 1)
96 prefs::kUserClassifierAverageSuggestionsUsedPerHour);
97 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1,
98 kMaxHours, 50);
99 }
100
101 void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
102 const char* last_time_pref_name) {
103 if (!pref_service_)
104 return;
105
106 double hours_since_last_time =
107 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
108 // Ignore events within the same "browsing session".
109 if (hours_since_last_time < kMinHours)
110 return;
111 SetLastTimeToNow(last_time_pref_name);
112
113 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
114 // Compute and store the new discounted average according to the formula
115 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
116 double new_avg_events_per_hour =
117 1 +
118 std::exp(-discount_rate_per_hour_ * hours_since_last_time) *
119 avg_events_per_hour;
120 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour);
121 }
122
123 double UserClassifier::GetEstimateHoursBetweenEvents(
124 const char* metric_pref_name) {
125 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
126
127 // Right after the first update, the metric is equal to 1.
128 if (avg_events_per_hour <= 1)
129 return kMaxHours; 102 return kMaxHours;
130 103
131 // This is the estimate with the assumption that last event happened right 104 // This is the estimate with the assumption that last event happened right
132 // now and the system is in the steady-state. Solve estimate_hours in the 105 // now and the system is in the steady-state. Solve estimate_hours in the
133 // steady-state equation: 106 // steady-state equation:
134 // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events, 107 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
135 // i.e. 108 // i.e.
136 // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events), 109 // -discount_rate * estimate_hours = log((metric_value - 1) / metric_value),
137 // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)), 110 // discount_rate * estimate_hours = log(metric_value / (metric_value - 1)),
138 // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate. 111 // estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate.
139 return std::min(kMaxHours, 112 double estimate_hours =
140 std::log(avg_events_per_hour / (avg_events_per_hour - 1)) / 113 std::log(metric_value / (metric_value - 1)) / discount_rate_per_hour;
141 discount_rate_per_hour_); 114 return std::max(kMinHours, std::min(kMaxHours, estimate_hours));
142 } 115 }
143 116
144 double UserClassifier::GetHoursSinceLastTime( 117 // The inverse of GetEstimateHoursBetweenEvents().
145 const char* last_time_pref_name) { 118 double GetMetricValueForEstimateHoursBetweenEvents(
146 if (!pref_service_->HasPrefPath(last_time_pref_name)) 119 double estimate_hours,
147 return DBL_MAX; 120 double discount_rate_per_hour) {
121 // Keep the input value within [kMinHours, kMaxHours].
122 estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours));
123
124 // Return |metric_value| such that GetEstimateHoursBetweenEvents for
125 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
126 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
127 // i.e.
128 // metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1,
129 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
130 return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours));
131 }
132
133 } // namespace
134
135 UserClassifier::UserClassifier(PrefService* pref_service)
136 : pref_service_(pref_service),
137 discount_rate_per_hour_(GetDiscountRatePerHour()) {
138 // The pref_service_ can be null in tests.
139 if (!pref_service_)
140 return;
141
142 // Initialize the prefs storing the last time: the counter has just started!
143 for (const Metric metric : kMetrics) {
144 if (!HasLastTime(metric))
145 SetLastTimeToNow(metric);
146 }
147 }
148
149 UserClassifier::~UserClassifier() {}
150
151 // static
152 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
153 for (Metric metric : kMetrics) {
154 double default_metric_value = GetMetricValueForEstimateHoursBetweenEvents(
155 kDefaults[static_cast<int>(metric)], GetDiscountRatePerHour());
156 registry->RegisterDoublePref(kMetricKeys[static_cast<int>(metric)],
157 default_metric_value);
158 registry->RegisterInt64Pref(kLastTimeKeys[static_cast<int>(metric)], 0);
159 }
160 }
161
162 void UserClassifier::OnEvent(Metric metric) {
163 DCHECK_NE(metric, Metric::COUNT);
164 double metric_value = UpdateMetricOnEvent(metric);
165
166 double avg =
167 GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_);
168 switch (metric) {
169 case Metric::NTP_OPENED:
170 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
171 kMaxHours, 50);
172 break;
173 case Metric::SUGGESTIONS_SHOWN:
174 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg,
175 1, kMaxHours, 50);
176 break;
177 case Metric::SUGGESTIONS_USED:
178 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg,
179 1, kMaxHours, 50);
180 break;
181 case Metric::COUNT:
182 NOTREACHED();
183 break;
184 }
185 }
186
187 double UserClassifier::GetEstimatedAvgTime(Metric metric) const {
188 DCHECK_NE(metric, Metric::COUNT);
189 double metric_value = GetUpToDateMetricValue(metric);
190 return GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_);
191 }
192
193 UserClassifier::UserClass UserClassifier::GetUserClass() const {
194 if (GetEstimatedAvgTime(Metric::NTP_OPENED) >=
195 kOccasionalUserOpensNTPAtMostOncePerHours) {
196 return UserClass::RARE_NTP_USER;
197 }
198
199 if (GetEstimatedAvgTime(Metric::SUGGESTIONS_SHOWN) <=
200 kFrequentUserScrollsAtLeastOncePerHours) {
201 return UserClass::ACTIVE_SUGGESTIONS_CONSUMER;
202 }
203
204 return UserClass::ACTIVE_NTP_USER;
205 }
206
207 std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
208 switch (GetUserClass()) {
209 case UserClass::RARE_NTP_USER:
210 return "Rare user of the NTP";
211 case UserClass::ACTIVE_NTP_USER:
212 return "Active user of the NTP";
213 case UserClass::ACTIVE_SUGGESTIONS_CONSUMER:
214 return "Active consumer of NTP suggestions";
215 }
216 NOTREACHED();
217 return std::string();
218 }
219
220 void UserClassifier::ClearClassificationForDebugging() {
221 // The pref_service_ can be null in tests.
222 if (!pref_service_)
223 return;
224
225 for (const Metric& metric : kMetrics) {
226 ClearMetricValue(metric);
227 SetLastTimeToNow(metric);
228 }
229 }
230
231 double UserClassifier::UpdateMetricOnEvent(Metric metric) {
232 // The pref_service_ can be null in tests.
233 if (!pref_service_)
234 return 0;
235
236 double hours_since_last_time =
237 std::min(kMaxHours, GetHoursSinceLastTime(metric));
238 // Ignore events within the same "browsing session".
239 if (hours_since_last_time < kMinHours)
240 return GetUpToDateMetricValue(metric);
241
242 SetLastTimeToNow(metric);
243
244 double metric_value = GetMetricValue(metric);
245 // Add 1 to the discounted metric as the event has happened right now.
246 double new_metric_value =
247 1 + DiscountMetric(metric_value, hours_since_last_time,
248 discount_rate_per_hour_);
249 SetMetricValue(metric, new_metric_value);
250 return new_metric_value;
251 }
252
253 double UserClassifier::GetUpToDateMetricValue(Metric metric) const {
254 // The pref_service_ can be null in tests.
255 if (!pref_service_)
256 return 0;
257
258 double hours_since_last_time =
259 std::min(kMaxHours, GetHoursSinceLastTime(metric));
260
261 double metric_value = GetMetricValue(metric);
262 return DiscountMetric(metric_value, hours_since_last_time,
263 discount_rate_per_hour_);
264 }
265
266 double UserClassifier::GetHoursSinceLastTime(Metric metric) const {
267 if (!HasLastTime(metric))
268 return 0;
148 269
149 base::TimeDelta since_last_time = 270 base::TimeDelta since_last_time =
150 base::Time::Now() - base::Time::FromInternalValue( 271 base::Time::Now() - base::Time::FromInternalValue(pref_service_->GetInt64(
151 pref_service_->GetInt64(last_time_pref_name)); 272 kLastTimeKeys[static_cast<int>(metric)]));
152 return since_last_time.InSecondsF() / 3600; 273 return since_last_time.InSecondsF() / 3600;
153 } 274 }
154 275
155 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { 276 bool UserClassifier::HasLastTime(Metric metric) const {
156 pref_service_->SetInt64(last_time_pref_name, 277 return pref_service_->HasPrefPath(kLastTimeKeys[static_cast<int>(metric)]);
278 }
279
280 void UserClassifier::SetLastTimeToNow(Metric metric) {
281 pref_service_->SetInt64(kLastTimeKeys[static_cast<int>(metric)],
157 base::Time::Now().ToInternalValue()); 282 base::Time::Now().ToInternalValue());
158 } 283 }
159 284
285 double UserClassifier::GetMetricValue(Metric metric) const {
286 return pref_service_->GetDouble(kMetricKeys[static_cast<int>(metric)]);
287 }
288
289 void UserClassifier::SetMetricValue(Metric metric, double metric_value) {
290 pref_service_->SetDouble(kMetricKeys[static_cast<int>(metric)], metric_value);
291 }
292
293 void UserClassifier::ClearMetricValue(Metric metric) {
294 pref_service_->ClearPref(kMetricKeys[static_cast<int>(metric)]);
295 }
296
160 } // namespace ntp_snippets 297 } // namespace ntp_snippets
OLDNEW
« no previous file with comments | « components/ntp_snippets/user_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698