Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(236)

Side by Side Diff: components/ntp_snippets/user_classifier.cc

Issue 2346263002: Extending the UserClassifier to actually support classification. (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/ntp_snippets/user_classifier.h" 5 #include "components/ntp_snippets/user_classifier.h"
6 6
7 #include <float.h> 7 #include <float.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <string> 10 #include <string>
(...skipping 15 matching lines...) Expand all
26 // Never consider any larger interval than this (so that extreme situations such 26 // Never consider any larger interval than this (so that extreme situations such
27 // as losing your phone or going for a long offline vacation do not skew the 27 // as losing your phone or going for a long offline vacation do not skew the
28 // average too much). 28 // average too much).
29 const double kMaxHours = 7 * 24; 29 const double kMaxHours = 7 * 24;
30 30
31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is 31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is
32 // the length of the browsing session where subsequent events of the same type 32 // the length of the browsing session where subsequent events of the same type
33 // do not count again). 33 // do not count again).
34 const double kMinHours = 0.5; 34 const double kMinHours = 0.5;
35 35
36 // Classification constants.
37 const double kFrequentUserScrollsAtLeastOncePerHours = 24;
38 const double kOccasionalUserOpensNTPAtMostOncePerHours = 72;
39
40 // Default frequency values for new users.
Marc Treib 2016/09/19 15:20:16 Again, intervals, not frequencies
jkrcal 2016/09/19 18:45:25 Done.
41 const double kNTPFrequencyOfANewUserInHours = 24;
42 const double kShowFrequencyOfANewUserInHours = 36;
43 const double kUseFrequencyOfANewUserInHours = 48;
44
36 const char kHistogramAverageHoursToOpenNTP[] = 45 const char kHistogramAverageHoursToOpenNTP[] =
37 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; 46 "NewTabPage.UserClassifier.AverageHoursToOpenNTP";
38 const char kHistogramAverageHoursToShowSuggestions[] = 47 const char kHistogramAverageHoursToShowSuggestions[] =
39 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; 48 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions";
40 const char kHistogramAverageHoursToUseSuggestions[] = 49 const char kHistogramAverageHoursToUseSuggestions[] =
41 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; 50 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
42 51
52 // Computes the discount rate.
53 double GetDiscountRatePerHour() {
54 static double discount_rate_per_hour = 0;
55
56 if (discount_rate_per_hour == 0) {
57 // Compute discount_rate_per_hour such that
58 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
59 discount_rate_per_hour = (std::log(1 / (1 - kDiscountFactorPerDay)) / 24);
Marc Treib 2016/09/19 15:20:16 nit: remove the extra set of parens. nit2: Can you
jkrcal 2016/09/19 18:45:25 Done the nits. In a next CL, I want to override i
Marc Treib 2016/09/20 10:27:02 And you want the variation param to specify the pe
jkrcal 2016/09/20 13:10:13 I think the per-day value is better because it is
60 }
61
62 return discount_rate_per_hour;
63 }
64
65 // Returns the new value of the metric using its |old_value|, assuming
Marc Treib 2016/09/19 15:20:15 What's the unit of the metric? What does the retur
jkrcal 2016/09/19 18:45:25 Hmm :) Nothing intuitive. I have added a comment i
Marc Treib 2016/09/20 10:27:02 Yup, that helps, thanks!
66 // |hours_since_last_time| hours have passed since it was last recomputed.
67 // If |event_now| is true, the event is assumed to have happened right now,
68 // otherwise no event is assumed to happen within the last
69 // |hours_since_last_time| hours.
70 double RecomputeMetric(double old_value,
71 double hours_since_last_time,
72 bool event_now) {
73 // Compute and store the new discounted average according to the formula
74 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
75 return (event_now ? 1 : 0) +
76 std::exp(-GetDiscountRatePerHour() * hours_since_last_time) *
77 old_value;
78 }
79
80 // Compute the number of hours between two events for the given metric value
81 // assuming the events were equally distributed.
82 double GetEstimateHoursBetweenEvents(const double metric_value) {
83 // Right after the first update, the metric is equal to 1.
84 if (metric_value <= 1)
85 return kMaxHours;
86
87 // This is the estimate with the assumption that last event happened right
88 // now and the system is in the steady-state. Solve estimate_hours in the
89 // steady-state equation:
90 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
91 // i.e.
92 // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events),
93 // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)),
94 // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate.
95 double estimate_hours =
96 std::log(metric_value / (metric_value - 1)) / GetDiscountRatePerHour();
97 return std::max(kMinHours, std::min(kMaxHours, estimate_hours));
98 }
99
100 // The inverse of GetEstimateHoursBetweenEvents().
101 double GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours) {
102 // Keep the input value within [kMinHours, kMaxHours].
103 estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours));
104
105 // Return |metric_value| such that GetEstimateHoursBetweenEvents for
106 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
107 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
108 // i.e.
109 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
110 return 1.0 / (1.0 - std::exp(-GetDiscountRatePerHour() * estimate_hours));
111 }
112
43 } // namespace 113 } // namespace
44 114
45 namespace ntp_snippets { 115 namespace ntp_snippets {
46 116
47 UserClassifier::UserClassifier(PrefService* pref_service) 117 UserClassifier::UserClassifier(PrefService* pref_service)
48 : pref_service_(pref_service), 118 : pref_service_(pref_service) {}
49 // Compute discount_rate_per_hour such that
50 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
51 discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {}
52 119
53 UserClassifier::~UserClassifier() {} 120 UserClassifier::~UserClassifier() {}
54 121
55 // static 122 // static
56 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { 123 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
124 registry->RegisterDoublePref(prefs::kUserClassifierAverageNTPOpenedPerHour,
125 GetMetricValueForEstimateHoursBetweenEvents(
Marc Treib 2016/09/19 15:20:16 Hm, you're changing the defaults of existing prefs
jkrcal 2016/09/19 18:45:25 I think the default values are not stored anywhere
Marc Treib 2016/09/20 10:27:02 Yes, I mostly meant that users who used M54 briefl
jkrcal 2016/09/20 13:10:13 Not a big deal, IMO. The initial value has after a
126 kNTPFrequencyOfANewUserInHours));
57 registry->RegisterDoublePref( 127 registry->RegisterDoublePref(
58 prefs::kUserClassifierAverageNTPOpenedPerHour, 1); 128 prefs::kUserClassifierAverageSuggestionsShownPerHour,
129 GetMetricValueForEstimateHoursBetweenEvents(
130 kShowFrequencyOfANewUserInHours));
59 registry->RegisterDoublePref( 131 registry->RegisterDoublePref(
60 prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); 132 prefs::kUserClassifierAverageSuggestionsUsedPerHour,
61 registry->RegisterDoublePref( 133 GetMetricValueForEstimateHoursBetweenEvents(
62 prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); 134 kUseFrequencyOfANewUserInHours));
63 135
64 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); 136 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0);
65 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, 137 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions,
66 0); 138 0);
67 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, 139 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions,
68 0); 140 0);
69 } 141 }
70 142
71 void UserClassifier::OnNTPOpened() { 143 void UserClassifier::OnNTPOpened() {
72 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, 144 double metric =
73 prefs::kUserClassifierLastTimeToOpenNTP); 145 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour,
146 prefs::kUserClassifierLastTimeToOpenNTP);
74 147
75 double avg = GetEstimateHoursBetweenEvents( 148 double avg = GetEstimateHoursBetweenEvents(metric);
76 prefs::kUserClassifierAverageNTPOpenedPerHour);
77 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, 149 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
78 kMaxHours, 50); 150 kMaxHours, 50);
79 } 151 }
80 152
81 void UserClassifier::OnSuggestionsShown() { 153 void UserClassifier::OnSuggestionsShown() {
82 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, 154 double metric =
83 prefs::kUserClassifierLastTimeToShowSuggestions); 155 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour,
156 prefs::kUserClassifierLastTimeToShowSuggestions);
84 157
85 double avg = GetEstimateHoursBetweenEvents( 158 double avg = GetEstimateHoursBetweenEvents(metric);
86 prefs::kUserClassifierAverageSuggestionsShownPerHour);
87 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1, 159 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1,
88 kMaxHours, 50); 160 kMaxHours, 50);
89 } 161 }
90 162
91 void UserClassifier::OnSuggestionsUsed() { 163 void UserClassifier::OnSuggestionsUsed() {
92 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour, 164 double metric =
93 prefs::kUserClassifierLastTimeToUseSuggestions); 165 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour,
166 prefs::kUserClassifierLastTimeToUseSuggestions);
94 167
95 double avg = GetEstimateHoursBetweenEvents( 168 double avg = GetEstimateHoursBetweenEvents(metric);
96 prefs::kUserClassifierAverageSuggestionsUsedPerHour);
97 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1, 169 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1,
98 kMaxHours, 50); 170 kMaxHours, 50);
99 } 171 }
100 172
101 void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name, 173 double UserClassifier::GetEstimatedAvgTimeToOpenNTP() {
102 const char* last_time_pref_name) { 174 double metric =
175 GetUpToDateMetricValue(prefs::kUserClassifierAverageNTPOpenedPerHour,
176 prefs::kUserClassifierLastTimeToOpenNTP);
177 return GetEstimateHoursBetweenEvents(metric);
178 }
179
180 double UserClassifier::GetEstimatedAvgTimeToShowSuggestions() {
181 double metric = GetUpToDateMetricValue(
182 prefs::kUserClassifierAverageSuggestionsShownPerHour,
183 prefs::kUserClassifierLastTimeToShowSuggestions);
184
Marc Treib 2016/09/19 15:20:16 nit: extra empty line (the other similar methods d
jkrcal 2016/09/19 18:45:25 Done.
185 return GetEstimateHoursBetweenEvents(metric);
186 }
187
188 double UserClassifier::GetEstimatedAvgTimeToUseSuggestions() {
189 double metric = GetUpToDateMetricValue(
190 prefs::kUserClassifierAverageSuggestionsUsedPerHour,
191 prefs::kUserClassifierLastTimeToUseSuggestions);
192 return GetEstimateHoursBetweenEvents(metric);
193 }
194
195 UserClassifier::UserClass UserClassifier::GetUserClass() {
196 if (GetEstimatedAvgTimeToOpenNTP() >=
197 kOccasionalUserOpensNTPAtMostOncePerHours)
198 return UserClass::OCCASIONAL_NTP_USER;
Marc Treib 2016/09/19 15:20:16 nit: Braces please
jkrcal 2016/09/19 18:45:25 Done.
199
200 if (GetEstimatedAvgTimeToUseSuggestions() <=
201 kFrequentUserScrollsAtLeastOncePerHours)
202 return UserClass::FREQUENT_NTP_USER;
Marc Treib 2016/09/19 15:20:16 Also here
jkrcal 2016/09/19 18:45:25 Done.
203
204 return UserClass::NORMAL_NTP_USER;
205 }
206
207 std::string UserClassifier::GetUserClassDescription() {
208 switch (GetUserClass()) {
209 case UserClass::OCCASIONAL_NTP_USER:
210 return "Occasional user of the NTP";
211 case UserClass::NORMAL_NTP_USER:
212 return "Normal user of the NTP";
213 case UserClass::FREQUENT_NTP_USER:
214 return "Frequent user of content suggestions";
Marc Treib 2016/09/19 15:20:16 This string is inconsistent with the others.
jkrcal 2016/09/19 18:45:25 Done.
215 }
216 NOTREACHED();
217 return "Unknown user class";
218 }
219
220 void UserClassifier::ClearClassificationForTesting() {
221 pref_service_->ClearPref(prefs::kUserClassifierAverageNTPOpenedPerHour);
222 pref_service_->ClearPref(
223 prefs::kUserClassifierAverageSuggestionsShownPerHour);
224 pref_service_->ClearPref(prefs::kUserClassifierAverageSuggestionsUsedPerHour);
225
226 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToOpenNTP);
227 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToShowSuggestions);
228 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToUseSuggestions);
229 }
230
231 double UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
232 const char* last_time_pref_name) {
103 if (!pref_service_) 233 if (!pref_service_)
Marc Treib 2016/09/19 15:20:16 Pre-existing, but: Can this ever happen? If so, pl
jkrcal 2016/09/19 18:45:25 Done.
104 return; 234 return 0;
105 235
106 double hours_since_last_time = 236 double hours_since_last_time =
107 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); 237 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
108 // Ignore events within the same "browsing session". 238 // Ignore events within the same "browsing session".
109 if (hours_since_last_time < kMinHours) 239 if (hours_since_last_time < kMinHours)
110 return; 240 return GetUpToDateMetricValue(metric_pref_name, last_time_pref_name);
241
111 SetLastTimeToNow(last_time_pref_name); 242 SetLastTimeToNow(last_time_pref_name);
112 243
113 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); 244 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
114 // Compute and store the new discounted average according to the formula
115 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
116 double new_avg_events_per_hour = 245 double new_avg_events_per_hour =
117 1 + 246 RecomputeMetric(avg_events_per_hour, hours_since_last_time, true);
118 std::exp(discount_rate_per_hour_ * hours_since_last_time) *
119 avg_events_per_hour;
120 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); 247 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour);
248 return new_avg_events_per_hour;
121 } 249 }
122 250
123 double UserClassifier::GetEstimateHoursBetweenEvents( 251 double UserClassifier::GetUpToDateMetricValue(const char* metric_pref_name,
124 const char* metric_pref_name) { 252 const char* last_time_pref_name) {
253 if (!pref_service_)
254 return 0;
255
256 double hours_since_last_time =
257 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
258
125 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); 259 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
126 260 return RecomputeMetric(avg_events_per_hour, hours_since_last_time, true);
127 // Right after the first update, the metric is equal to 1.
128 if (avg_events_per_hour <= 1)
129 return kMaxHours;
130
131 // This is the estimate with the assumption that last event happened right
132 // now and the system is in the steady-state. Solve estimate_hours in the
133 // steady-state equation:
134 // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events.
135 return std::min(kMaxHours,
136 std::log(avg_events_per_hour / (avg_events_per_hour - 1)) /
137 discount_rate_per_hour_);
138 } 261 }
139 262
140 double UserClassifier::GetHoursSinceLastTime( 263 double UserClassifier::GetHoursSinceLastTime(
141 const char* last_time_pref_name) { 264 const char* last_time_pref_name) {
142 if (!pref_service_->HasPrefPath(last_time_pref_name)) 265 if (!pref_service_->HasPrefPath(last_time_pref_name)) {
143 return DBL_MAX; 266 SetLastTimeToNow(last_time_pref_name);
Marc Treib 2016/09/20 10:27:02 Was the reason for moving this out only so you can
jkrcal 2016/09/20 13:10:13 Mostly. I also think that it is clearer if a "Get"
267 return 0;
268 }
144 269
145 base::TimeDelta since_last_time = 270 base::TimeDelta since_last_time =
146 base::Time::Now() - base::Time::FromInternalValue( 271 base::Time::Now() - base::Time::FromInternalValue(
147 pref_service_->GetInt64(last_time_pref_name)); 272 pref_service_->GetInt64(last_time_pref_name));
148 return since_last_time.InSecondsF() / 3600; 273 return since_last_time.InSecondsF() / 3600;
149 } 274 }
150 275
151 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { 276 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) {
152 pref_service_->SetInt64(last_time_pref_name, 277 pref_service_->SetInt64(last_time_pref_name,
153 base::Time::Now().ToInternalValue()); 278 base::Time::Now().ToInternalValue());
154 } 279 }
155 280
156 } // namespace ntp_snippets 281 } // namespace ntp_snippets
OLDNEW
« components/ntp_snippets/user_classifier.h ('K') | « components/ntp_snippets/user_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698