Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/ntp_snippets/user_classifier.h" | 5 #include "components/ntp_snippets/user_classifier.h" |
| 6 | 6 |
| 7 #include <float.h> | 7 #include <float.h> |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <string> | 10 #include <string> |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 26 // Never consider any larger interval than this (so that extreme situations such | 26 // Never consider any larger interval than this (so that extreme situations such |
| 27 // as losing your phone or going for a long offline vacation do not skew the | 27 // as losing your phone or going for a long offline vacation do not skew the |
| 28 // average too much). | 28 // average too much). |
| 29 const double kMaxHours = 7 * 24; | 29 const double kMaxHours = 7 * 24; |
| 30 | 30 |
| 31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is | 31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is |
| 32 // the length of the browsing session where subsequent events of the same type | 32 // the length of the browsing session where subsequent events of the same type |
| 33 // do not count again). | 33 // do not count again). |
| 34 const double kMinHours = 0.5; | 34 const double kMinHours = 0.5; |
| 35 | 35 |
| 36 // Classification constants. | |
| 37 const double kFrequentUserScrollsAtLeastOncePerHours = 24; | |
| 38 const double kOccasionalUserOpensNTPAtMostOncePerHours = 72; | |
| 39 | |
| 40 // Default frequency values for new users. | |
|
Marc Treib
2016/09/19 15:20:16
Again, intervals, not frequencies
jkrcal
2016/09/19 18:45:25
Done.
| |
| 41 const double kNTPFrequencyOfANewUserInHours = 24; | |
| 42 const double kShowFrequencyOfANewUserInHours = 36; | |
| 43 const double kUseFrequencyOfANewUserInHours = 48; | |
| 44 | |
| 36 const char kHistogramAverageHoursToOpenNTP[] = | 45 const char kHistogramAverageHoursToOpenNTP[] = |
| 37 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; | 46 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; |
| 38 const char kHistogramAverageHoursToShowSuggestions[] = | 47 const char kHistogramAverageHoursToShowSuggestions[] = |
| 39 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; | 48 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; |
| 40 const char kHistogramAverageHoursToUseSuggestions[] = | 49 const char kHistogramAverageHoursToUseSuggestions[] = |
| 41 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; | 50 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; |
| 42 | 51 |
| 52 // Computes the discount rate. | |
| 53 double GetDiscountRatePerHour() { | |
| 54 static double discount_rate_per_hour = 0; | |
| 55 | |
| 56 if (discount_rate_per_hour == 0) { | |
| 57 // Compute discount_rate_per_hour such that | |
| 58 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. | |
| 59 discount_rate_per_hour = (std::log(1 / (1 - kDiscountFactorPerDay)) / 24); | |
|
Marc Treib
2016/09/19 15:20:16
nit: remove the extra set of parens.
nit2: Can you
jkrcal
2016/09/19 18:45:25
Done the nits.
In a next CL, I want to override i
Marc Treib
2016/09/20 10:27:02
And you want the variation param to specify the pe
jkrcal
2016/09/20 13:10:13
I think the per-day value is better because it is
| |
| 60 } | |
| 61 | |
| 62 return discount_rate_per_hour; | |
| 63 } | |
| 64 | |
| 65 // Returns the new value of the metric using its |old_value|, assuming | |
|
Marc Treib
2016/09/19 15:20:15
What's the unit of the metric? What does the retur
jkrcal
2016/09/19 18:45:25
Hmm :) Nothing intuitive. I have added a comment i
Marc Treib
2016/09/20 10:27:02
Yup, that helps, thanks!
| |
| 66 // |hours_since_last_time| hours have passed since it was last recomputed. | |
| 67 // If |event_now| is true, the event is assumed to have happened right now, | |
| 68 // otherwise no event is assumed to happen within the last | |
| 69 // |hours_since_last_time| hours. | |
| 70 double RecomputeMetric(double old_value, | |
| 71 double hours_since_last_time, | |
| 72 bool event_now) { | |
| 73 // Compute and store the new discounted average according to the formula | |
| 74 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. | |
| 75 return (event_now ? 1 : 0) + | |
| 76 std::exp(-GetDiscountRatePerHour() * hours_since_last_time) * | |
| 77 old_value; | |
| 78 } | |
| 79 | |
| 80 // Compute the number of hours between two events for the given metric value | |
| 81 // assuming the events were equally distributed. | |
| 82 double GetEstimateHoursBetweenEvents(const double metric_value) { | |
| 83 // Right after the first update, the metric is equal to 1. | |
| 84 if (metric_value <= 1) | |
| 85 return kMaxHours; | |
| 86 | |
| 87 // This is the estimate with the assumption that last event happened right | |
| 88 // now and the system is in the steady-state. Solve estimate_hours in the | |
| 89 // steady-state equation: | |
| 90 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value, | |
| 91 // i.e. | |
| 92 // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events), | |
| 93 // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)), | |
| 94 // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate. | |
| 95 double estimate_hours = | |
| 96 std::log(metric_value / (metric_value - 1)) / GetDiscountRatePerHour(); | |
| 97 return std::max(kMinHours, std::min(kMaxHours, estimate_hours)); | |
| 98 } | |
| 99 | |
| 100 // The inverse of GetEstimateHoursBetweenEvents(). | |
| 101 double GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours) { | |
| 102 // Keep the input value within [kMinHours, kMaxHours]. | |
| 103 estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours)); | |
| 104 | |
| 105 // Return |metric_value| such that GetEstimateHoursBetweenEvents for | |
| 106 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in | |
| 107 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value, | |
| 108 // i.e. | |
| 109 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}). | |
| 110 return 1.0 / (1.0 - std::exp(-GetDiscountRatePerHour() * estimate_hours)); | |
| 111 } | |
| 112 | |
| 43 } // namespace | 113 } // namespace |
| 44 | 114 |
| 45 namespace ntp_snippets { | 115 namespace ntp_snippets { |
| 46 | 116 |
| 47 UserClassifier::UserClassifier(PrefService* pref_service) | 117 UserClassifier::UserClassifier(PrefService* pref_service) |
| 48 : pref_service_(pref_service), | 118 : pref_service_(pref_service) {} |
| 49 // Compute discount_rate_per_hour such that | |
| 50 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. | |
| 51 discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {} | |
| 52 | 119 |
| 53 UserClassifier::~UserClassifier() {} | 120 UserClassifier::~UserClassifier() {} |
| 54 | 121 |
| 55 // static | 122 // static |
| 56 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { | 123 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { |
| 124 registry->RegisterDoublePref(prefs::kUserClassifierAverageNTPOpenedPerHour, | |
| 125 GetMetricValueForEstimateHoursBetweenEvents( | |
|
Marc Treib
2016/09/19 15:20:16
Hm, you're changing the defaults of existing prefs
jkrcal
2016/09/19 18:45:25
I think the default values are not stored anywhere
Marc Treib
2016/09/20 10:27:02
Yes, I mostly meant that users who used M54 briefl
jkrcal
2016/09/20 13:10:13
Not a big deal, IMO. The initial value has after a
| |
| 126 kNTPFrequencyOfANewUserInHours)); | |
| 57 registry->RegisterDoublePref( | 127 registry->RegisterDoublePref( |
| 58 prefs::kUserClassifierAverageNTPOpenedPerHour, 1); | 128 prefs::kUserClassifierAverageSuggestionsShownPerHour, |
| 129 GetMetricValueForEstimateHoursBetweenEvents( | |
| 130 kShowFrequencyOfANewUserInHours)); | |
| 59 registry->RegisterDoublePref( | 131 registry->RegisterDoublePref( |
| 60 prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); | 132 prefs::kUserClassifierAverageSuggestionsUsedPerHour, |
| 61 registry->RegisterDoublePref( | 133 GetMetricValueForEstimateHoursBetweenEvents( |
| 62 prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); | 134 kUseFrequencyOfANewUserInHours)); |
| 63 | 135 |
| 64 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); | 136 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); |
| 65 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, | 137 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, |
| 66 0); | 138 0); |
| 67 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, | 139 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, |
| 68 0); | 140 0); |
| 69 } | 141 } |
| 70 | 142 |
| 71 void UserClassifier::OnNTPOpened() { | 143 void UserClassifier::OnNTPOpened() { |
| 72 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, | 144 double metric = |
| 73 prefs::kUserClassifierLastTimeToOpenNTP); | 145 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, |
| 146 prefs::kUserClassifierLastTimeToOpenNTP); | |
| 74 | 147 |
| 75 double avg = GetEstimateHoursBetweenEvents( | 148 double avg = GetEstimateHoursBetweenEvents(metric); |
| 76 prefs::kUserClassifierAverageNTPOpenedPerHour); | |
| 77 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, | 149 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, |
| 78 kMaxHours, 50); | 150 kMaxHours, 50); |
| 79 } | 151 } |
| 80 | 152 |
| 81 void UserClassifier::OnSuggestionsShown() { | 153 void UserClassifier::OnSuggestionsShown() { |
| 82 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, | 154 double metric = |
| 83 prefs::kUserClassifierLastTimeToShowSuggestions); | 155 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, |
| 156 prefs::kUserClassifierLastTimeToShowSuggestions); | |
| 84 | 157 |
| 85 double avg = GetEstimateHoursBetweenEvents( | 158 double avg = GetEstimateHoursBetweenEvents(metric); |
| 86 prefs::kUserClassifierAverageSuggestionsShownPerHour); | |
| 87 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1, | 159 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1, |
| 88 kMaxHours, 50); | 160 kMaxHours, 50); |
| 89 } | 161 } |
| 90 | 162 |
| 91 void UserClassifier::OnSuggestionsUsed() { | 163 void UserClassifier::OnSuggestionsUsed() { |
| 92 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour, | 164 double metric = |
| 93 prefs::kUserClassifierLastTimeToUseSuggestions); | 165 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour, |
| 166 prefs::kUserClassifierLastTimeToUseSuggestions); | |
| 94 | 167 |
| 95 double avg = GetEstimateHoursBetweenEvents( | 168 double avg = GetEstimateHoursBetweenEvents(metric); |
| 96 prefs::kUserClassifierAverageSuggestionsUsedPerHour); | |
| 97 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1, | 169 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1, |
| 98 kMaxHours, 50); | 170 kMaxHours, 50); |
| 99 } | 171 } |
| 100 | 172 |
| 101 void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name, | 173 double UserClassifier::GetEstimatedAvgTimeToOpenNTP() { |
| 102 const char* last_time_pref_name) { | 174 double metric = |
| 175 GetUpToDateMetricValue(prefs::kUserClassifierAverageNTPOpenedPerHour, | |
| 176 prefs::kUserClassifierLastTimeToOpenNTP); | |
| 177 return GetEstimateHoursBetweenEvents(metric); | |
| 178 } | |
| 179 | |
| 180 double UserClassifier::GetEstimatedAvgTimeToShowSuggestions() { | |
| 181 double metric = GetUpToDateMetricValue( | |
| 182 prefs::kUserClassifierAverageSuggestionsShownPerHour, | |
| 183 prefs::kUserClassifierLastTimeToShowSuggestions); | |
| 184 | |
|
Marc Treib
2016/09/19 15:20:16
nit: extra empty line (the other similar methods d
jkrcal
2016/09/19 18:45:25
Done.
| |
| 185 return GetEstimateHoursBetweenEvents(metric); | |
| 186 } | |
| 187 | |
| 188 double UserClassifier::GetEstimatedAvgTimeToUseSuggestions() { | |
| 189 double metric = GetUpToDateMetricValue( | |
| 190 prefs::kUserClassifierAverageSuggestionsUsedPerHour, | |
| 191 prefs::kUserClassifierLastTimeToUseSuggestions); | |
| 192 return GetEstimateHoursBetweenEvents(metric); | |
| 193 } | |
| 194 | |
| 195 UserClassifier::UserClass UserClassifier::GetUserClass() { | |
| 196 if (GetEstimatedAvgTimeToOpenNTP() >= | |
| 197 kOccasionalUserOpensNTPAtMostOncePerHours) | |
| 198 return UserClass::OCCASIONAL_NTP_USER; | |
|
Marc Treib
2016/09/19 15:20:16
nit: Braces please
jkrcal
2016/09/19 18:45:25
Done.
| |
| 199 | |
| 200 if (GetEstimatedAvgTimeToUseSuggestions() <= | |
| 201 kFrequentUserScrollsAtLeastOncePerHours) | |
| 202 return UserClass::FREQUENT_NTP_USER; | |
|
Marc Treib
2016/09/19 15:20:16
Also here
jkrcal
2016/09/19 18:45:25
Done.
| |
| 203 | |
| 204 return UserClass::NORMAL_NTP_USER; | |
| 205 } | |
| 206 | |
| 207 std::string UserClassifier::GetUserClassDescription() { | |
| 208 switch (GetUserClass()) { | |
| 209 case UserClass::OCCASIONAL_NTP_USER: | |
| 210 return "Occasional user of the NTP"; | |
| 211 case UserClass::NORMAL_NTP_USER: | |
| 212 return "Normal user of the NTP"; | |
| 213 case UserClass::FREQUENT_NTP_USER: | |
| 214 return "Frequent user of content suggestions"; | |
|
Marc Treib
2016/09/19 15:20:16
This string is inconsistent with the others.
jkrcal
2016/09/19 18:45:25
Done.
| |
| 215 } | |
| 216 NOTREACHED(); | |
| 217 return "Unknown user class"; | |
| 218 } | |
| 219 | |
| 220 void UserClassifier::ClearClassificationForTesting() { | |
| 221 pref_service_->ClearPref(prefs::kUserClassifierAverageNTPOpenedPerHour); | |
| 222 pref_service_->ClearPref( | |
| 223 prefs::kUserClassifierAverageSuggestionsShownPerHour); | |
| 224 pref_service_->ClearPref(prefs::kUserClassifierAverageSuggestionsUsedPerHour); | |
| 225 | |
| 226 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToOpenNTP); | |
| 227 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToShowSuggestions); | |
| 228 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToUseSuggestions); | |
| 229 } | |
| 230 | |
| 231 double UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name, | |
| 232 const char* last_time_pref_name) { | |
| 103 if (!pref_service_) | 233 if (!pref_service_) |
|
Marc Treib
2016/09/19 15:20:16
Pre-existing, but: Can this ever happen? If so, pl
jkrcal
2016/09/19 18:45:25
Done.
| |
| 104 return; | 234 return 0; |
| 105 | 235 |
| 106 double hours_since_last_time = | 236 double hours_since_last_time = |
| 107 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); | 237 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); |
| 108 // Ignore events within the same "browsing session". | 238 // Ignore events within the same "browsing session". |
| 109 if (hours_since_last_time < kMinHours) | 239 if (hours_since_last_time < kMinHours) |
| 110 return; | 240 return GetUpToDateMetricValue(metric_pref_name, last_time_pref_name); |
| 241 | |
| 111 SetLastTimeToNow(last_time_pref_name); | 242 SetLastTimeToNow(last_time_pref_name); |
| 112 | 243 |
| 113 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); | 244 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); |
| 114 // Compute and store the new discounted average according to the formula | |
| 115 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. | |
| 116 double new_avg_events_per_hour = | 245 double new_avg_events_per_hour = |
| 117 1 + | 246 RecomputeMetric(avg_events_per_hour, hours_since_last_time, true); |
| 118 std::exp(discount_rate_per_hour_ * hours_since_last_time) * | |
| 119 avg_events_per_hour; | |
| 120 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); | 247 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); |
| 248 return new_avg_events_per_hour; | |
| 121 } | 249 } |
| 122 | 250 |
| 123 double UserClassifier::GetEstimateHoursBetweenEvents( | 251 double UserClassifier::GetUpToDateMetricValue(const char* metric_pref_name, |
| 124 const char* metric_pref_name) { | 252 const char* last_time_pref_name) { |
| 253 if (!pref_service_) | |
| 254 return 0; | |
| 255 | |
| 256 double hours_since_last_time = | |
| 257 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); | |
| 258 | |
| 125 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); | 259 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); |
| 126 | 260 return RecomputeMetric(avg_events_per_hour, hours_since_last_time, true); |
| 127 // Right after the first update, the metric is equal to 1. | |
| 128 if (avg_events_per_hour <= 1) | |
| 129 return kMaxHours; | |
| 130 | |
| 131 // This is the estimate with the assumption that last event happened right | |
| 132 // now and the system is in the steady-state. Solve estimate_hours in the | |
| 133 // steady-state equation: | |
| 134 // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events. | |
| 135 return std::min(kMaxHours, | |
| 136 std::log(avg_events_per_hour / (avg_events_per_hour - 1)) / | |
| 137 discount_rate_per_hour_); | |
| 138 } | 261 } |
| 139 | 262 |
| 140 double UserClassifier::GetHoursSinceLastTime( | 263 double UserClassifier::GetHoursSinceLastTime( |
| 141 const char* last_time_pref_name) { | 264 const char* last_time_pref_name) { |
| 142 if (!pref_service_->HasPrefPath(last_time_pref_name)) | 265 if (!pref_service_->HasPrefPath(last_time_pref_name)) { |
| 143 return DBL_MAX; | 266 SetLastTimeToNow(last_time_pref_name); |
|
Marc Treib
2016/09/20 10:27:02
Was the reason for moving this out only so you can
jkrcal
2016/09/20 13:10:13
Mostly. I also think that it is clearer if a "Get"
| |
| 267 return 0; | |
| 268 } | |
| 144 | 269 |
| 145 base::TimeDelta since_last_time = | 270 base::TimeDelta since_last_time = |
| 146 base::Time::Now() - base::Time::FromInternalValue( | 271 base::Time::Now() - base::Time::FromInternalValue( |
| 147 pref_service_->GetInt64(last_time_pref_name)); | 272 pref_service_->GetInt64(last_time_pref_name)); |
| 148 return since_last_time.InSecondsF() / 3600; | 273 return since_last_time.InSecondsF() / 3600; |
| 149 } | 274 } |
| 150 | 275 |
| 151 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { | 276 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { |
| 152 pref_service_->SetInt64(last_time_pref_name, | 277 pref_service_->SetInt64(last_time_pref_name, |
| 153 base::Time::Now().ToInternalValue()); | 278 base::Time::Now().ToInternalValue()); |
| 154 } | 279 } |
| 155 | 280 |
| 156 } // namespace ntp_snippets | 281 } // namespace ntp_snippets |
| OLD | NEW |