Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(42)

Side by Side Diff: components/ntp_snippets/user_classifier.cc

Issue 2346263002: Extending the UserClassifier to actually support classification. (Closed)
Patch Set: Marc's comments Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/ntp_snippets/user_classifier.h" 5 #include "components/ntp_snippets/user_classifier.h"
6 6
7 #include <float.h> 7 #include <float.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <string> 10 #include <string>
(...skipping 15 matching lines...) Expand all
26 // Never consider any larger interval than this (so that extreme situations such 26 // Never consider any larger interval than this (so that extreme situations such
27 // as losing your phone or going for a long offline vacation do not skew the 27 // as losing your phone or going for a long offline vacation do not skew the
28 // average too much). 28 // average too much).
29 const double kMaxHours = 7 * 24; 29 const double kMaxHours = 7 * 24;
30 30
31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is 31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is
32 // the length of the browsing session where subsequent events of the same type 32 // the length of the browsing session where subsequent events of the same type
33 // do not count again). 33 // do not count again).
34 const double kMinHours = 0.5; 34 const double kMinHours = 0.5;
35 35
36 // Classification constants.
37 const double kFrequentUserScrollsAtLeastOncePerHours = 24;
38 const double kOccasionalUserOpensNTPAtMostOncePerHours = 72;
39
40 // Default lengths of the intervals for new users.
41 const double kNTPFrequencyOfANewUserInHours = 24;
42 const double kShowFrequencyOfANewUserInHours = 36;
43 const double kUseFrequencyOfANewUserInHours = 48;
44
36 const char kHistogramAverageHoursToOpenNTP[] = 45 const char kHistogramAverageHoursToOpenNTP[] =
37 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; 46 "NewTabPage.UserClassifier.AverageHoursToOpenNTP";
38 const char kHistogramAverageHoursToShowSuggestions[] = 47 const char kHistogramAverageHoursToShowSuggestions[] =
39 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; 48 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions";
40 const char kHistogramAverageHoursToUseSuggestions[] = 49 const char kHistogramAverageHoursToUseSuggestions[] =
41 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; 50 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
42 51
52 // Computes the discount rate.
53 double GetDiscountRatePerHour() {
54 static double discount_rate_per_hour = 0.0;
55
56 if (discount_rate_per_hour == 0.0) {
57 // Compute discount_rate_per_hour such that
58 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
59 discount_rate_per_hour =
60 std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0;
61 }
62
63 return discount_rate_per_hour;
64 }
65
66 // Returns the new value of the metric using its |old_value|, assuming
67 // |hours_since_last_time| hours have passed since it was last recomputed.
68 // If |event_now| is true, the event is assumed to have happened right now,
69 // otherwise no event is assumed to happen within the last
70 // |hours_since_last_time| hours.
71 double RecomputeMetric(double old_value,
72 double hours_since_last_time,
73 bool event_now) {
74 // Compute and store the new discounted average according to the formula
75 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
76 return (event_now ? 1 : 0) +
77 std::exp(-GetDiscountRatePerHour() * hours_since_last_time) *
78 old_value;
79 }
80
81 // Compute the number of hours between two events for the given metric value
82 // assuming the events were equally distributed.
83 double GetEstimateHoursBetweenEvents(const double metric_value) {
84 // Right after the first update, the metric is equal to 1.
85 if (metric_value <= 1)
86 return kMaxHours;
87
88 // This is the estimate with the assumption that last event happened right
89 // now and the system is in the steady-state. Solve estimate_hours in the
90 // steady-state equation:
91 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
92 // i.e.
93 // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events),
94 // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)),
95 // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate.
96 double estimate_hours =
97 std::log(metric_value / (metric_value - 1)) / GetDiscountRatePerHour();
98 return std::max(kMinHours, std::min(kMaxHours, estimate_hours));
99 }
100
101 // The inverse of GetEstimateHoursBetweenEvents().
102 double GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours) {
103 // Keep the input value within [kMinHours, kMaxHours].
104 estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours));
105
106 // Return |metric_value| such that GetEstimateHoursBetweenEvents for
107 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
108 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
109 // i.e.
110 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
111 return 1.0 / (1.0 - std::exp(-GetDiscountRatePerHour() * estimate_hours));
112 }
113
43 } // namespace 114 } // namespace
44 115
45 namespace ntp_snippets { 116 namespace ntp_snippets {
46 117
47 UserClassifier::UserClassifier(PrefService* pref_service) 118 UserClassifier::UserClassifier(PrefService* pref_service)
48 : pref_service_(pref_service), 119 : pref_service_(pref_service) {}
49 // Compute discount_rate_per_hour such that
50 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
51 discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {}
52 120
53 UserClassifier::~UserClassifier() {} 121 UserClassifier::~UserClassifier() {}
54 122
55 // static 123 // static
56 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { 124 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
125 registry->RegisterDoublePref(prefs::kUserClassifierAverageNTPOpenedPerHour,
126 GetMetricValueForEstimateHoursBetweenEvents(
127 kNTPFrequencyOfANewUserInHours));
57 registry->RegisterDoublePref( 128 registry->RegisterDoublePref(
58 prefs::kUserClassifierAverageNTPOpenedPerHour, 1); 129 prefs::kUserClassifierAverageSuggestionsShownPerHour,
130 GetMetricValueForEstimateHoursBetweenEvents(
131 kShowFrequencyOfANewUserInHours));
59 registry->RegisterDoublePref( 132 registry->RegisterDoublePref(
60 prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); 133 prefs::kUserClassifierAverageSuggestionsUsedPerHour,
61 registry->RegisterDoublePref( 134 GetMetricValueForEstimateHoursBetweenEvents(
62 prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); 135 kUseFrequencyOfANewUserInHours));
63 136
64 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); 137 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0);
65 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, 138 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions,
66 0); 139 0);
67 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, 140 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions,
68 0); 141 0);
69 } 142 }
70 143
71 void UserClassifier::OnNTPOpened() { 144 void UserClassifier::OnNTPOpened() {
72 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, 145 double metric =
73 prefs::kUserClassifierLastTimeToOpenNTP); 146 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour,
147 prefs::kUserClassifierLastTimeToOpenNTP);
74 148
75 double avg = GetEstimateHoursBetweenEvents( 149 double avg = GetEstimateHoursBetweenEvents(metric);
76 prefs::kUserClassifierAverageNTPOpenedPerHour);
77 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, 150 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
78 kMaxHours, 50); 151 kMaxHours, 50);
79 } 152 }
80 153
81 void UserClassifier::OnSuggestionsShown() { 154 void UserClassifier::OnSuggestionsShown() {
82 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, 155 double metric =
83 prefs::kUserClassifierLastTimeToShowSuggestions); 156 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour,
157 prefs::kUserClassifierLastTimeToShowSuggestions);
84 158
85 double avg = GetEstimateHoursBetweenEvents( 159 double avg = GetEstimateHoursBetweenEvents(metric);
86 prefs::kUserClassifierAverageSuggestionsShownPerHour);
87 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1, 160 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1,
88 kMaxHours, 50); 161 kMaxHours, 50);
89 } 162 }
90 163
91 void UserClassifier::OnSuggestionsUsed() { 164 void UserClassifier::OnSuggestionsUsed() {
92 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour, 165 double metric =
93 prefs::kUserClassifierLastTimeToUseSuggestions); 166 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour,
167 prefs::kUserClassifierLastTimeToUseSuggestions);
94 168
95 double avg = GetEstimateHoursBetweenEvents( 169 double avg = GetEstimateHoursBetweenEvents(metric);
96 prefs::kUserClassifierAverageSuggestionsUsedPerHour);
97 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1, 170 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1,
98 kMaxHours, 50); 171 kMaxHours, 50);
99 } 172 }
100 173
101 void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name, 174 double UserClassifier::GetEstimatedAvgTimeToOpenNTP() const {
102 const char* last_time_pref_name) { 175 double metric =
176 GetUpToDateMetricValue(prefs::kUserClassifierAverageNTPOpenedPerHour,
177 prefs::kUserClassifierLastTimeToOpenNTP);
178 return GetEstimateHoursBetweenEvents(metric);
179 }
180
181 double UserClassifier::GetEstimatedAvgTimeToShowSuggestions() const {
182 double metric = GetUpToDateMetricValue(
183 prefs::kUserClassifierAverageSuggestionsShownPerHour,
184 prefs::kUserClassifierLastTimeToShowSuggestions);
185 return GetEstimateHoursBetweenEvents(metric);
186 }
187
188 double UserClassifier::GetEstimatedAvgTimeToUseSuggestions() const {
189 double metric = GetUpToDateMetricValue(
190 prefs::kUserClassifierAverageSuggestionsUsedPerHour,
191 prefs::kUserClassifierLastTimeToUseSuggestions);
192 return GetEstimateHoursBetweenEvents(metric);
193 }
194
195 UserClassifier::UserClass UserClassifier::GetUserClass() const {
196 if (GetEstimatedAvgTimeToOpenNTP() >=
197 kOccasionalUserOpensNTPAtMostOncePerHours) {
198 return UserClass::OCCASIONAL_NTP_USER;
199 }
200
201 if (GetEstimatedAvgTimeToUseSuggestions() <=
202 kFrequentUserScrollsAtLeastOncePerHours) {
203 return UserClass::FREQUENT_NTP_USER;
204 }
205
206 return UserClass::NORMAL_NTP_USER;
207 }
208
209 std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
210 switch (GetUserClass()) {
211 case UserClass::OCCASIONAL_NTP_USER:
212 return "Occasional user of the NTP";
213 case UserClass::NORMAL_NTP_USER:
214 return "Normal user of the NTP";
215 case UserClass::FREQUENT_NTP_USER:
216 return "Frequent user of the NTP";
217 }
218 NOTREACHED();
219 return "Unknown user class";
220 }
221
222 void UserClassifier::ClearClassificationForDebugging() {
223 pref_service_->ClearPref(prefs::kUserClassifierAverageNTPOpenedPerHour);
224 pref_service_->ClearPref(
225 prefs::kUserClassifierAverageSuggestionsShownPerHour);
226 pref_service_->ClearPref(prefs::kUserClassifierAverageSuggestionsUsedPerHour);
227
228 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToOpenNTP);
229 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToShowSuggestions);
230 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToUseSuggestions);
231 }
232
233 double UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
234 const char* last_time_pref_name) {
235 // The pref_service_ can be null in tests.
103 if (!pref_service_) 236 if (!pref_service_)
104 return; 237 return 0;
105 238
106 double hours_since_last_time = 239 double hours_since_last_time =
107 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); 240 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
241 // If the "last time" is not defined, set it.
242 if (!hours_since_last_time)
Marc Treib 2016/09/20 10:27:02 This will check for zero - is that what you want?
jkrcal 2016/09/20 13:10:13 I agree, this a bit obscure, to say the least :) N
243 SetLastTimeToNow(last_time_pref_name);
108 // Ignore events within the same "browsing session". 244 // Ignore events within the same "browsing session".
109 if (hours_since_last_time < kMinHours) 245 if (hours_since_last_time < kMinHours)
110 return; 246 return GetUpToDateMetricValue(metric_pref_name, last_time_pref_name);
247
111 SetLastTimeToNow(last_time_pref_name); 248 SetLastTimeToNow(last_time_pref_name);
112 249
113 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); 250 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
114 // Compute and store the new discounted average according to the formula
115 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
116 double new_avg_events_per_hour = 251 double new_avg_events_per_hour =
117 1 + 252 RecomputeMetric(avg_events_per_hour, hours_since_last_time, true);
118 std::exp(discount_rate_per_hour_ * hours_since_last_time) *
119 avg_events_per_hour;
120 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); 253 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour);
254 return new_avg_events_per_hour;
121 } 255 }
122 256
123 double UserClassifier::GetEstimateHoursBetweenEvents( 257 double UserClassifier::GetUpToDateMetricValue(
124 const char* metric_pref_name) { 258 const char* metric_pref_name,
259 const char* last_time_pref_name) const {
260 // The pref_service_ can be null in tests.
261 if (!pref_service_)
262 return 0;
263
264 double hours_since_last_time =
265 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
266
125 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); 267 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
126 268 return RecomputeMetric(avg_events_per_hour, hours_since_last_time, true);
127 // Right after the first update, the metric is equal to 1.
128 if (avg_events_per_hour <= 1)
129 return kMaxHours;
130
131 // This is the estimate with the assumption that last event happened right
132 // now and the system is in the steady-state. Solve estimate_hours in the
133 // steady-state equation:
134 // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events.
135 return std::min(kMaxHours,
136 std::log(avg_events_per_hour / (avg_events_per_hour - 1)) /
137 discount_rate_per_hour_);
138 } 269 }
139 270
140 double UserClassifier::GetHoursSinceLastTime( 271 double UserClassifier::GetHoursSinceLastTime(
141 const char* last_time_pref_name) { 272 const char* last_time_pref_name) const {
142 if (!pref_service_->HasPrefPath(last_time_pref_name)) 273 if (!pref_service_->HasPrefPath(last_time_pref_name))
143 return DBL_MAX; 274 return 0;
144 275
145 base::TimeDelta since_last_time = 276 base::TimeDelta since_last_time =
146 base::Time::Now() - base::Time::FromInternalValue( 277 base::Time::Now() - base::Time::FromInternalValue(
147 pref_service_->GetInt64(last_time_pref_name)); 278 pref_service_->GetInt64(last_time_pref_name));
148 return since_last_time.InSecondsF() / 3600; 279 return since_last_time.InSecondsF() / 3600;
149 } 280 }
150 281
151 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { 282 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) {
152 pref_service_->SetInt64(last_time_pref_name, 283 pref_service_->SetInt64(last_time_pref_name,
153 base::Time::Now().ToInternalValue()); 284 base::Time::Now().ToInternalValue());
154 } 285 }
155 286
156 } // namespace ntp_snippets 287 } // namespace ntp_snippets
OLDNEW
« components/ntp_snippets/user_classifier.h ('K') | « components/ntp_snippets/user_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698