Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/ntp_snippets/user_classifier.h" | 5 #include "components/ntp_snippets/user_classifier.h" |
| 6 | 6 |
| 7 #include <float.h> | 7 #include <float.h> |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <string> | 10 #include <string> |
| 11 | 11 |
| 12 #include "base/metrics/histogram_macros.h" | 12 #include "base/metrics/histogram_macros.h" |
| 13 #include "base/strings/string_number_conversions.h" | 13 #include "base/strings/string_number_conversions.h" |
| 14 #include "components/ntp_snippets/pref_names.h" | 14 #include "components/ntp_snippets/pref_names.h" |
| 15 #include "components/prefs/pref_registry_simple.h" | 15 #include "components/prefs/pref_registry_simple.h" |
| 16 #include "components/prefs/pref_service.h" | 16 #include "components/prefs/pref_service.h" |
| 17 | 17 |
| 18 namespace ntp_snippets { | |
| 19 | |
| 18 namespace { | 20 namespace { |
| 19 | 21 |
| 20 // TODO(jkrcal): Make all of this configurable via variations_service. | 22 // TODO(jkrcal): Make all of this configurable via variations_service. |
| 21 | 23 |
| 22 // The discount factor for computing the discounted-average metrics. Must be | 24 // The discount factor for computing the discounted-average metrics. Must be |
| 23 // strictly larger than 0 and strictly smaller than 1! | 25 // strictly larger than 0 and strictly smaller than 1! |
| 24 const double kDiscountFactorPerDay = 0.25; | 26 const double kDiscountFactorPerDay = 0.25; |
| 25 | 27 |
| 26 // Never consider any larger interval than this (so that extreme situations such | 28 // Never consider any larger interval than this (so that extreme situations such |
| 27 // as losing your phone or going for a long offline vacation do not skew the | 29 // as losing your phone or going for a long offline vacation do not skew the |
| 28 // average too much). | 30 // average too much). |
| 29 const double kMaxHours = 7 * 24; | 31 const double kMaxHours = 7 * 24; |
| 30 | 32 |
| 31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is | 33 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is |
| 32 // the length of the browsing session where subsequent events of the same type | 34 // the length of the browsing session where subsequent events of the same type |
| 33 // do not count again). | 35 // do not count again). |
| 34 const double kMinHours = 0.5; | 36 const double kMinHours = 0.5; |
| 35 | 37 |
| 38 // Classification constants. | |
| 39 const double kFrequentUserScrollsAtLeastOncePerHours = 24; | |
| 40 const double kOccasionalUserOpensNTPAtMostOncePerHours = 72; | |
| 41 | |
| 36 const char kHistogramAverageHoursToOpenNTP[] = | 42 const char kHistogramAverageHoursToOpenNTP[] = |
| 37 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; | 43 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; |
| 38 const char kHistogramAverageHoursToShowSuggestions[] = | 44 const char kHistogramAverageHoursToShowSuggestions[] = |
| 39 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; | 45 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; |
| 40 const char kHistogramAverageHoursToUseSuggestions[] = | 46 const char kHistogramAverageHoursToUseSuggestions[] = |
| 41 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; | 47 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; |
| 42 | 48 |
| 43 } // namespace | 49 // The enum used for iteration. |
| 44 | 50 const UserClassifier::Metric kMetrics[] = { |
| 45 namespace ntp_snippets { | 51 UserClassifier::Metric::NTP_OPENED, |
| 46 | 52 UserClassifier::Metric::SUGGESTIONS_SHOWN, |
| 47 UserClassifier::UserClassifier(PrefService* pref_service) | 53 UserClassifier::Metric::SUGGESTIONS_USED}; |
| 48 : pref_service_(pref_service), | 54 |
| 49 // Compute discount_rate_per_hour such that | 55 // The summary of the prefs. |
| 50 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. | 56 const char* kMetricKeys[] = { |
| 51 discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {} | 57 prefs::kUserClassifierAverageNTPOpenedPerHour, |
| 52 | 58 prefs::kUserClassifierAverageSuggestionsShownPerHour, |
| 53 UserClassifier::~UserClassifier() {} | 59 prefs::kUserClassifierAverageSuggestionsUsedPerHour}; |
| 54 | 60 const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP, |
| 55 // static | 61 prefs::kUserClassifierLastTimeToShowSuggestions, |
| 56 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { | 62 prefs::kUserClassifierLastTimeToUseSuggestions}; |
| 57 registry->RegisterDoublePref( | 63 |
| 58 prefs::kUserClassifierAverageNTPOpenedPerHour, 1); | 64 // Default lengths of the intervals for new users for the metrics. |
| 59 registry->RegisterDoublePref( | 65 const double kDefaults[] = {24, 36, 48}; |
| 60 prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); | 66 |
| 61 registry->RegisterDoublePref( | 67 static_assert(arraysize(kMetrics) == |
| 62 prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); | 68 static_cast<int>(UserClassifier::Metric::COUNT) && |
| 63 | 69 arraysize(kMetricKeys) == |
| 64 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); | 70 static_cast<int>(UserClassifier::Metric::COUNT) && |
| 65 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, | 71 arraysize(kLastTimeKeys) == |
| 66 0); | 72 static_cast<int>(UserClassifier::Metric::COUNT) && |
| 67 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, | 73 arraysize(kDefaults) == |
| 68 0); | 74 static_cast<int>(UserClassifier::Metric::COUNT), |
| 69 } | 75 "Fill in info for all metrics."); |
| 70 | 76 |
| 71 void UserClassifier::OnNTPOpened() { | 77 // Computes the discount rate. |
| 72 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, | 78 double GetDiscountRatePerHour() { |
| 73 prefs::kUserClassifierLastTimeToOpenNTP); | 79 // Compute discount_rate_per_hour such that |
| 74 | 80 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. |
| 75 double avg = GetEstimateHoursBetweenEvents( | 81 return std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0; |
| 76 prefs::kUserClassifierAverageNTPOpenedPerHour); | 82 } |
| 77 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, | 83 |
| 78 kMaxHours, 50); | 84 // Returns the new value of the metric using its |old_value|, assuming |
| 79 } | 85 // |hours_since_last_time| hours have passed since it was last recomputed. |
| 80 | 86 // If |event_now| is true, the event is assumed to have happened right now, |
| 81 void UserClassifier::OnSuggestionsShown() { | 87 // otherwise no event is assumed to happen within the last |
| 82 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, | 88 // |hours_since_last_time| hours. |
| 83 prefs::kUserClassifierLastTimeToShowSuggestions); | 89 double RecomputeMetric(double old_value, |
| 84 | 90 double hours_since_last_time, |
| 85 double avg = GetEstimateHoursBetweenEvents( | 91 double discount_rate_per_hour, |
| 86 prefs::kUserClassifierAverageSuggestionsShownPerHour); | 92 bool event_now) { |
| 87 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1, | |
| 88 kMaxHours, 50); | |
| 89 } | |
| 90 | |
| 91 void UserClassifier::OnSuggestionsUsed() { | |
| 92 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour, | |
| 93 prefs::kUserClassifierLastTimeToUseSuggestions); | |
| 94 | |
| 95 double avg = GetEstimateHoursBetweenEvents( | |
| 96 prefs::kUserClassifierAverageSuggestionsUsedPerHour); | |
| 97 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1, | |
| 98 kMaxHours, 50); | |
| 99 } | |
| 100 | |
| 101 void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name, | |
| 102 const char* last_time_pref_name) { | |
| 103 if (!pref_service_) | |
| 104 return; | |
| 105 | |
| 106 double hours_since_last_time = | |
| 107 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); | |
| 108 // Ignore events within the same "browsing session". | |
| 109 if (hours_since_last_time < kMinHours) | |
| 110 return; | |
| 111 SetLastTimeToNow(last_time_pref_name); | |
| 112 | |
| 113 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); | |
| 114 // Compute and store the new discounted average according to the formula | 93 // Compute and store the new discounted average according to the formula |
| 115 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. | 94 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. |
| 116 double new_avg_events_per_hour = | 95 return (event_now ? 1 : 0) + |
| 117 1 + | 96 std::exp(-discount_rate_per_hour * hours_since_last_time) * |
| 118 std::exp(-discount_rate_per_hour_ * hours_since_last_time) * | 97 old_value; |
| 119 avg_events_per_hour; | 98 } |
| 120 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); | 99 |
| 121 } | 100 // Compute the number of hours between two events for the given metric value |
| 122 | 101 // assuming the events were equally distributed. |
| 123 double UserClassifier::GetEstimateHoursBetweenEvents( | 102 double GetEstimateHoursBetweenEvents(double metric_value, |
| 124 const char* metric_pref_name) { | 103 double discount_rate_per_hour) { |
| 125 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); | 104 // The computation below is well-defined only for |metric_value| > 1 (log of |
| 126 | 105 // negative value or division by zero). When |metric_value| -> 1, the estimate |
| 127 // Right after the first update, the metric is equal to 1. | 106 // below -> infinity, so kMaxHours is a natural result, here. |
| 128 if (avg_events_per_hour <= 1) | 107 if (metric_value <= 1) |
| 129 return kMaxHours; | 108 return kMaxHours; |
| 130 | 109 |
| 131 // This is the estimate with the assumption that last event happened right | 110 // This is the estimate with the assumption that last event happened right |
| 132 // now and the system is in the steady-state. Solve estimate_hours in the | 111 // now and the system is in the steady-state. Solve estimate_hours in the |
| 133 // steady-state equation: | 112 // steady-state equation: |
| 134 // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events, | 113 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value, |
| 135 // i.e. | 114 // i.e. |
| 136 // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events), | 115 // -discount_rate * estimate_hours = log((metric_value - 1) / metric_value), |
| 137 // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)), | 116 // discount_rate * estimate_hours = log(metric_value / (metric_value - 1)), |
| 138 // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate. | 117 // estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate. |
| 139 return std::min(kMaxHours, | 118 double estimate_hours = |
| 140 std::log(avg_events_per_hour / (avg_events_per_hour - 1)) / | 119 std::log(metric_value / (metric_value - 1)) / discount_rate_per_hour; |
| 141 discount_rate_per_hour_); | 120 return std::max(kMinHours, std::min(kMaxHours, estimate_hours)); |
| 142 } | 121 } |
| 143 | 122 |
| 144 double UserClassifier::GetHoursSinceLastTime( | 123 // The inverse of GetEstimateHoursBetweenEvents(). |
| 145 const char* last_time_pref_name) { | 124 double GetMetricValueForEstimateHoursBetweenEvents( |
| 146 if (!pref_service_->HasPrefPath(last_time_pref_name)) | 125 double estimate_hours, |
| 147 return DBL_MAX; | 126 double discount_rate_per_hour) { |
| 127 // Keep the input value within [kMinHours, kMaxHours]. | |
| 128 estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours)); | |
| 129 | |
| 130 // Return |metric_value| such that GetEstimateHoursBetweenEvents for | |
| 131 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in | |
| 132 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value, | |
| 133 // i.e. | |
| 134 // metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1, | |
| 135 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}). | |
| 136 return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours)); | |
| 137 } | |
| 138 | |
| 139 } // namespace | |
| 140 | |
| 141 UserClassifier::UserClassifier(PrefService* pref_service) | |
| 142 : pref_service_(pref_service), | |
| 143 discount_rate_per_hour_(GetDiscountRatePerHour()) { | |
| 144 // The pref_service_ can be null in tests. | |
| 145 if (!pref_service_) | |
| 146 return; | |
| 147 | |
| 148 // Initialize the prefs storing the last time: the counter has just started! | |
| 149 for (const Metric metric : kMetrics) { | |
| 150 if (!HasLastTime(metric)) | |
| 151 SetLastTimeToNow(metric); | |
| 152 } | |
| 153 } | |
| 154 | |
| 155 UserClassifier::~UserClassifier() {} | |
| 156 | |
| 157 // static | |
| 158 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { | |
| 159 for (Metric metric : kMetrics) { | |
| 160 double default_metric_value = GetMetricValueForEstimateHoursBetweenEvents( | |
| 161 kDefaults[static_cast<int>(metric)], GetDiscountRatePerHour()); | |
| 162 registry->RegisterDoublePref(kMetricKeys[static_cast<int>(metric)], | |
| 163 default_metric_value); | |
| 164 registry->RegisterInt64Pref(kLastTimeKeys[static_cast<int>(metric)], 0); | |
| 165 } | |
| 166 } | |
| 167 | |
| 168 void UserClassifier::OnEvent(Metric metric) { | |
| 169 DCHECK_NE(static_cast<int>(metric), static_cast<int>(Metric::COUNT)); | |
|
Marc Treib
2016/09/20 13:53:42
Ah, the casts are required because the enum class
jkrcal
2016/09/20 14:26:34
Ah, my mistake, the casts are not required. Thanks
| |
| 170 double metric_value = UpdateMetricOnEvent(metric); | |
| 171 | |
| 172 double avg = | |
| 173 GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_); | |
| 174 switch (metric) { | |
| 175 case Metric::NTP_OPENED: | |
| 176 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, | |
| 177 kMaxHours, 50); | |
| 178 break; | |
| 179 case Metric::SUGGESTIONS_SHOWN: | |
| 180 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, | |
| 181 1, kMaxHours, 50); | |
| 182 break; | |
| 183 case Metric::SUGGESTIONS_USED: | |
| 184 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, | |
| 185 1, kMaxHours, 50); | |
| 186 break; | |
| 187 case Metric::COUNT: | |
| 188 NOTREACHED(); | |
| 189 break; | |
| 190 } | |
| 191 } | |
| 192 | |
| 193 double UserClassifier::GetEstimatedAvgTime(Metric metric) const { | |
| 194 DCHECK_NE(static_cast<int>(metric), static_cast<int>(Metric::COUNT)); | |
| 195 double metric_value = GetUpToDateMetricValue(metric); | |
| 196 return GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_); | |
| 197 } | |
| 198 | |
| 199 UserClassifier::UserClass UserClassifier::GetUserClass() const { | |
| 200 if (GetEstimatedAvgTime(Metric::NTP_OPENED) >= | |
| 201 kOccasionalUserOpensNTPAtMostOncePerHours) { | |
| 202 return UserClass::RARE_NTP_USER; | |
| 203 } | |
| 204 | |
| 205 if (GetEstimatedAvgTime(Metric::SUGGESTIONS_SHOWN) <= | |
| 206 kFrequentUserScrollsAtLeastOncePerHours) { | |
| 207 return UserClass::ACTIVE_SUGGESTIONS_CONSUMER; | |
| 208 } | |
| 209 | |
| 210 return UserClass::ACTIVE_NTP_USER; | |
| 211 } | |
| 212 | |
| 213 std::string UserClassifier::GetUserClassDescriptionForDebugging() const { | |
| 214 switch (GetUserClass()) { | |
| 215 case UserClass::RARE_NTP_USER: | |
| 216 return "Rare user of the NTP"; | |
| 217 case UserClass::ACTIVE_NTP_USER: | |
| 218 return "Active user of the NTP"; | |
| 219 case UserClass::ACTIVE_SUGGESTIONS_CONSUMER: | |
| 220 return "Active consumer of NTP suggestions"; | |
| 221 } | |
| 222 NOTREACHED(); | |
| 223 return "Unknown user class"; | |
| 224 } | |
| 225 | |
| 226 void UserClassifier::ClearClassificationForDebugging() { | |
| 227 // The pref_service_ can be null in tests. | |
| 228 if (!pref_service_) | |
| 229 return; | |
| 230 | |
| 231 for (const Metric& metric : kMetrics) { | |
| 232 ClearMetricValue(metric); | |
| 233 SetLastTimeToNow(metric); | |
| 234 } | |
| 235 } | |
| 236 | |
| 237 double UserClassifier::UpdateMetricOnEvent(Metric metric) { | |
| 238 // The pref_service_ can be null in tests. | |
| 239 if (!pref_service_) | |
| 240 return 0; | |
| 241 | |
| 242 double hours_since_last_time = | |
| 243 std::min(kMaxHours, GetHoursSinceLastTime(metric)); | |
| 244 // Ignore events within the same "browsing session". | |
| 245 if (hours_since_last_time < kMinHours) | |
| 246 return GetUpToDateMetricValue(metric); | |
| 247 | |
| 248 SetLastTimeToNow(metric); | |
| 249 | |
| 250 double metric_value = GetMetricValue(metric); | |
| 251 double new_metric_value = | |
| 252 RecomputeMetric(metric_value, hours_since_last_time, | |
| 253 discount_rate_per_hour_, /*event_now=*/true); | |
| 254 SetMetricValue(metric, new_metric_value); | |
| 255 return new_metric_value; | |
| 256 } | |
| 257 | |
| 258 double UserClassifier::GetUpToDateMetricValue(Metric metric) const { | |
| 259 // The pref_service_ can be null in tests. | |
| 260 if (!pref_service_) | |
| 261 return 0; | |
| 262 | |
| 263 double hours_since_last_time = | |
| 264 std::min(kMaxHours, GetHoursSinceLastTime(metric)); | |
| 265 | |
| 266 double metric_value = GetMetricValue(metric); | |
| 267 return RecomputeMetric(metric_value, hours_since_last_time, | |
| 268 discount_rate_per_hour_, /*event_now=*/false); | |
| 269 } | |
| 270 | |
| 271 double UserClassifier::GetHoursSinceLastTime(Metric metric) const { | |
| 272 if (!HasLastTime(metric)) | |
| 273 return 0; | |
| 148 | 274 |
| 149 base::TimeDelta since_last_time = | 275 base::TimeDelta since_last_time = |
| 150 base::Time::Now() - base::Time::FromInternalValue( | 276 base::Time::Now() - base::Time::FromInternalValue(pref_service_->GetInt64( |
| 151 pref_service_->GetInt64(last_time_pref_name)); | 277 kLastTimeKeys[static_cast<int>(metric)])); |
| 152 return since_last_time.InSecondsF() / 3600; | 278 return since_last_time.InSecondsF() / 3600; |
| 153 } | 279 } |
| 154 | 280 |
| 155 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { | 281 bool UserClassifier::HasLastTime(Metric metric) const { |
| 156 pref_service_->SetInt64(last_time_pref_name, | 282 return pref_service_->HasPrefPath(kLastTimeKeys[static_cast<int>(metric)]); |
| 283 } | |
| 284 | |
| 285 void UserClassifier::SetLastTimeToNow(Metric metric) { | |
| 286 pref_service_->SetInt64(kLastTimeKeys[static_cast<int>(metric)], | |
| 157 base::Time::Now().ToInternalValue()); | 287 base::Time::Now().ToInternalValue()); |
| 158 } | 288 } |
| 159 | 289 |
| 290 double UserClassifier::GetMetricValue(Metric metric) const { | |
| 291 return pref_service_->GetDouble(kMetricKeys[static_cast<int>(metric)]); | |
| 292 } | |
| 293 | |
| 294 void UserClassifier::SetMetricValue(Metric metric, double metric_value) { | |
| 295 pref_service_->SetDouble(kMetricKeys[static_cast<int>(metric)], metric_value); | |
| 296 } | |
| 297 | |
| 298 void UserClassifier::ClearMetricValue(Metric metric) { | |
| 299 pref_service_->ClearPref(kMetricKeys[static_cast<int>(metric)]); | |
| 300 } | |
| 301 | |
| 160 } // namespace ntp_snippets | 302 } // namespace ntp_snippets |
| OLD | NEW |