| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/ntp_snippets/user_classifier.h" | 5 #include "components/ntp_snippets/user_classifier.h" |
| 6 | 6 |
| 7 #include <float.h> | 7 #include <float.h> |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <string> | 10 #include <string> |
| 11 | 11 |
| 12 #include "base/metrics/histogram_macros.h" | 12 #include "base/metrics/histogram_macros.h" |
| 13 #include "base/strings/string_number_conversions.h" | 13 #include "base/strings/string_number_conversions.h" |
| 14 #include "components/ntp_snippets/pref_names.h" | 14 #include "components/ntp_snippets/pref_names.h" |
| 15 #include "components/prefs/pref_registry_simple.h" | 15 #include "components/prefs/pref_registry_simple.h" |
| 16 #include "components/prefs/pref_service.h" | 16 #include "components/prefs/pref_service.h" |
| 17 | 17 |
| 18 namespace ntp_snippets { |
| 19 |
| 18 namespace { | 20 namespace { |
| 19 | 21 |
| 20 // TODO(jkrcal): Make all of this configurable via variations_service. | 22 // TODO(jkrcal): Make all of this configurable via variations_service. |
| 21 | 23 |
| 22 // The discount factor for computing the discounted-average metrics. Must be | 24 // The discount factor for computing the discounted-average metrics. Must be |
| 23 // strictly larger than 0 and strictly smaller than 1! | 25 // strictly larger than 0 and strictly smaller than 1! |
| 24 const double kDiscountFactorPerDay = 0.25; | 26 const double kDiscountFactorPerDay = 0.25; |
| 25 | 27 |
| 26 // Never consider any larger interval than this (so that extreme situations such | 28 // Never consider any larger interval than this (so that extreme situations such |
| 27 // as losing your phone or going for a long offline vacation do not skew the | 29 // as losing your phone or going for a long offline vacation do not skew the |
| 28 // average too much). | 30 // average too much). |
| 29 const double kMaxHours = 7 * 24; | 31 const double kMaxHours = 7 * 24; |
| 30 | 32 |
| 31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is | 33 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is |
| 32 // the length of the browsing session where subsequent events of the same type | 34 // the length of the browsing session where subsequent events of the same type |
| 33 // do not count again). | 35 // do not count again). |
| 34 const double kMinHours = 0.5; | 36 const double kMinHours = 0.5; |
| 35 | 37 |
| 38 // Classification constants. |
| 39 const double kFrequentUserScrollsAtLeastOncePerHours = 24; |
| 40 const double kOccasionalUserOpensNTPAtMostOncePerHours = 72; |
| 41 |
| 36 const char kHistogramAverageHoursToOpenNTP[] = | 42 const char kHistogramAverageHoursToOpenNTP[] = |
| 37 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; | 43 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; |
| 38 const char kHistogramAverageHoursToShowSuggestions[] = | 44 const char kHistogramAverageHoursToShowSuggestions[] = |
| 39 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; | 45 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; |
| 40 const char kHistogramAverageHoursToUseSuggestions[] = | 46 const char kHistogramAverageHoursToUseSuggestions[] = |
| 41 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; | 47 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; |
| 42 | 48 |
| 43 } // namespace | 49 // The enum used for iteration. |
| 44 | 50 const UserClassifier::Metric kMetrics[] = { |
| 45 namespace ntp_snippets { | 51 UserClassifier::Metric::NTP_OPENED, |
| 46 | 52 UserClassifier::Metric::SUGGESTIONS_SHOWN, |
| 47 UserClassifier::UserClassifier(PrefService* pref_service) | 53 UserClassifier::Metric::SUGGESTIONS_USED}; |
| 48 : pref_service_(pref_service), | 54 |
| 49 // Compute discount_rate_per_hour such that | 55 // The summary of the prefs. |
| 50 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. | 56 const char* kMetricKeys[] = { |
| 51 discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {} | 57 prefs::kUserClassifierAverageNTPOpenedPerHour, |
| 52 | 58 prefs::kUserClassifierAverageSuggestionsShownPerHour, |
| 53 UserClassifier::~UserClassifier() {} | 59 prefs::kUserClassifierAverageSuggestionsUsedPerHour}; |
| 54 | 60 const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP, |
| 55 // static | 61 prefs::kUserClassifierLastTimeToShowSuggestions, |
| 56 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { | 62 prefs::kUserClassifierLastTimeToUseSuggestions}; |
| 57 registry->RegisterDoublePref( | 63 |
| 58 prefs::kUserClassifierAverageNTPOpenedPerHour, 1); | 64 // Default lengths of the intervals for new users for the metrics. |
| 59 registry->RegisterDoublePref( | 65 const double kDefaults[] = {24, 36, 48}; |
| 60 prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); | 66 |
| 61 registry->RegisterDoublePref( | 67 static_assert(arraysize(kMetrics) == |
| 62 prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); | 68 static_cast<int>(UserClassifier::Metric::COUNT) && |
| 63 | 69 arraysize(kMetricKeys) == |
| 64 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); | 70 static_cast<int>(UserClassifier::Metric::COUNT) && |
| 65 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, | 71 arraysize(kLastTimeKeys) == |
| 66 0); | 72 static_cast<int>(UserClassifier::Metric::COUNT) && |
| 67 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, | 73 arraysize(kDefaults) == |
| 68 0); | 74 static_cast<int>(UserClassifier::Metric::COUNT), |
| 69 } | 75 "Fill in info for all metrics."); |
| 70 | 76 |
| 71 void UserClassifier::OnNTPOpened() { | 77 // Computes the discount rate. |
| 72 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, | 78 double GetDiscountRatePerHour() { |
| 73 prefs::kUserClassifierLastTimeToOpenNTP); | 79 // Compute discount_rate_per_hour such that |
| 74 | 80 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. |
| 75 double avg = GetEstimateHoursBetweenEvents( | 81 return std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0; |
| 76 prefs::kUserClassifierAverageNTPOpenedPerHour); | 82 } |
| 77 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, | 83 |
| 78 kMaxHours, 50); | 84 // Returns the new value of the metric using its |old_value|, assuming |
| 79 } | 85 // |hours_since_last_time| hours have passed since it was last discounted. |
| 80 | 86 double DiscountMetric(double old_value, |
| 81 void UserClassifier::OnSuggestionsShown() { | 87 double hours_since_last_time, |
| 82 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, | 88 double discount_rate_per_hour) { |
| 83 prefs::kUserClassifierLastTimeToShowSuggestions); | 89 // Compute the new discounted average according to the formula |
| 84 | 90 // avg_events := e^{-discount_rate_per_hour * hours_since} * avg_events |
| 85 double avg = GetEstimateHoursBetweenEvents( | 91 return std::exp(-discount_rate_per_hour * hours_since_last_time) * old_value; |
| 86 prefs::kUserClassifierAverageSuggestionsShownPerHour); | 92 } |
| 87 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1, | 93 |
| 88 kMaxHours, 50); | 94 // Compute the number of hours between two events for the given metric value |
| 89 } | 95 // assuming the events were equally distributed. |
| 90 | 96 double GetEstimateHoursBetweenEvents(double metric_value, |
| 91 void UserClassifier::OnSuggestionsUsed() { | 97 double discount_rate_per_hour) { |
| 92 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour, | 98 // The computation below is well-defined only for |metric_value| > 1 (log of |
| 93 prefs::kUserClassifierLastTimeToUseSuggestions); | 99 // negative value or division by zero). When |metric_value| -> 1, the estimate |
| 94 | 100 // below -> infinity, so kMaxHours is a natural result, here. |
| 95 double avg = GetEstimateHoursBetweenEvents( | 101 if (metric_value <= 1) |
| 96 prefs::kUserClassifierAverageSuggestionsUsedPerHour); | |
| 97 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1, | |
| 98 kMaxHours, 50); | |
| 99 } | |
| 100 | |
| 101 void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name, | |
| 102 const char* last_time_pref_name) { | |
| 103 if (!pref_service_) | |
| 104 return; | |
| 105 | |
| 106 double hours_since_last_time = | |
| 107 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); | |
| 108 // Ignore events within the same "browsing session". | |
| 109 if (hours_since_last_time < kMinHours) | |
| 110 return; | |
| 111 SetLastTimeToNow(last_time_pref_name); | |
| 112 | |
| 113 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); | |
| 114 // Compute and store the new discounted average according to the formula | |
| 115 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. | |
| 116 double new_avg_events_per_hour = | |
| 117 1 + | |
| 118 std::exp(-discount_rate_per_hour_ * hours_since_last_time) * | |
| 119 avg_events_per_hour; | |
| 120 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); | |
| 121 } | |
| 122 | |
| 123 double UserClassifier::GetEstimateHoursBetweenEvents( | |
| 124 const char* metric_pref_name) { | |
| 125 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); | |
| 126 | |
| 127 // Right after the first update, the metric is equal to 1. | |
| 128 if (avg_events_per_hour <= 1) | |
| 129 return kMaxHours; | 102 return kMaxHours; |
| 130 | 103 |
| 131 // This is the estimate with the assumption that last event happened right | 104 // This is the estimate with the assumption that last event happened right |
| 132 // now and the system is in the steady-state. Solve estimate_hours in the | 105 // now and the system is in the steady-state. Solve estimate_hours in the |
| 133 // steady-state equation: | 106 // steady-state equation: |
| 134 // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events, | 107 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value, |
| 135 // i.e. | 108 // i.e. |
| 136 // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events), | 109 // -discount_rate * estimate_hours = log((metric_value - 1) / metric_value), |
| 137 // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)), | 110 // discount_rate * estimate_hours = log(metric_value / (metric_value - 1)), |
| 138 // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate. | 111 // estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate. |
| 139 return std::min(kMaxHours, | 112 double estimate_hours = |
| 140 std::log(avg_events_per_hour / (avg_events_per_hour - 1)) / | 113 std::log(metric_value / (metric_value - 1)) / discount_rate_per_hour; |
| 141 discount_rate_per_hour_); | 114 return std::max(kMinHours, std::min(kMaxHours, estimate_hours)); |
| 142 } | 115 } |
| 143 | 116 |
| 144 double UserClassifier::GetHoursSinceLastTime( | 117 // The inverse of GetEstimateHoursBetweenEvents(). |
| 145 const char* last_time_pref_name) { | 118 double GetMetricValueForEstimateHoursBetweenEvents( |
| 146 if (!pref_service_->HasPrefPath(last_time_pref_name)) | 119 double estimate_hours, |
| 147 return DBL_MAX; | 120 double discount_rate_per_hour) { |
| 121 // Keep the input value within [kMinHours, kMaxHours]. |
| 122 estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours)); |
| 123 |
| 124 // Return |metric_value| such that GetEstimateHoursBetweenEvents for |
| 125 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in |
| 126 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value, |
| 127 // i.e. |
| 128 // metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1, |
| 129 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}). |
| 130 return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours)); |
| 131 } |
| 132 |
| 133 } // namespace |
| 134 |
| 135 UserClassifier::UserClassifier(PrefService* pref_service) |
| 136 : pref_service_(pref_service), |
| 137 discount_rate_per_hour_(GetDiscountRatePerHour()) { |
| 138 // The pref_service_ can be null in tests. |
| 139 if (!pref_service_) |
| 140 return; |
| 141 |
| 142 // Initialize the prefs storing the last time: the counter has just started! |
| 143 for (const Metric metric : kMetrics) { |
| 144 if (!HasLastTime(metric)) |
| 145 SetLastTimeToNow(metric); |
| 146 } |
| 147 } |
| 148 |
| 149 UserClassifier::~UserClassifier() {} |
| 150 |
| 151 // static |
| 152 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { |
| 153 for (Metric metric : kMetrics) { |
| 154 double default_metric_value = GetMetricValueForEstimateHoursBetweenEvents( |
| 155 kDefaults[static_cast<int>(metric)], GetDiscountRatePerHour()); |
| 156 registry->RegisterDoublePref(kMetricKeys[static_cast<int>(metric)], |
| 157 default_metric_value); |
| 158 registry->RegisterInt64Pref(kLastTimeKeys[static_cast<int>(metric)], 0); |
| 159 } |
| 160 } |
| 161 |
| 162 void UserClassifier::OnEvent(Metric metric) { |
| 163 DCHECK_NE(metric, Metric::COUNT); |
| 164 double metric_value = UpdateMetricOnEvent(metric); |
| 165 |
| 166 double avg = |
| 167 GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_); |
| 168 switch (metric) { |
| 169 case Metric::NTP_OPENED: |
| 170 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, |
| 171 kMaxHours, 50); |
| 172 break; |
| 173 case Metric::SUGGESTIONS_SHOWN: |
| 174 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, |
| 175 1, kMaxHours, 50); |
| 176 break; |
| 177 case Metric::SUGGESTIONS_USED: |
| 178 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, |
| 179 1, kMaxHours, 50); |
| 180 break; |
| 181 case Metric::COUNT: |
| 182 NOTREACHED(); |
| 183 break; |
| 184 } |
| 185 } |
| 186 |
| 187 double UserClassifier::GetEstimatedAvgTime(Metric metric) const { |
| 188 DCHECK_NE(metric, Metric::COUNT); |
| 189 double metric_value = GetUpToDateMetricValue(metric); |
| 190 return GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_); |
| 191 } |
| 192 |
| 193 UserClassifier::UserClass UserClassifier::GetUserClass() const { |
| 194 if (GetEstimatedAvgTime(Metric::NTP_OPENED) >= |
| 195 kOccasionalUserOpensNTPAtMostOncePerHours) { |
| 196 return UserClass::RARE_NTP_USER; |
| 197 } |
| 198 |
| 199 if (GetEstimatedAvgTime(Metric::SUGGESTIONS_SHOWN) <= |
| 200 kFrequentUserScrollsAtLeastOncePerHours) { |
| 201 return UserClass::ACTIVE_SUGGESTIONS_CONSUMER; |
| 202 } |
| 203 |
| 204 return UserClass::ACTIVE_NTP_USER; |
| 205 } |
| 206 |
| 207 std::string UserClassifier::GetUserClassDescriptionForDebugging() const { |
| 208 switch (GetUserClass()) { |
| 209 case UserClass::RARE_NTP_USER: |
| 210 return "Rare user of the NTP"; |
| 211 case UserClass::ACTIVE_NTP_USER: |
| 212 return "Active user of the NTP"; |
| 213 case UserClass::ACTIVE_SUGGESTIONS_CONSUMER: |
| 214 return "Active consumer of NTP suggestions"; |
| 215 } |
| 216 NOTREACHED(); |
| 217 return std::string(); |
| 218 } |
| 219 |
| 220 void UserClassifier::ClearClassificationForDebugging() { |
| 221 // The pref_service_ can be null in tests. |
| 222 if (!pref_service_) |
| 223 return; |
| 224 |
| 225 for (const Metric& metric : kMetrics) { |
| 226 ClearMetricValue(metric); |
| 227 SetLastTimeToNow(metric); |
| 228 } |
| 229 } |
| 230 |
| 231 double UserClassifier::UpdateMetricOnEvent(Metric metric) { |
| 232 // The pref_service_ can be null in tests. |
| 233 if (!pref_service_) |
| 234 return 0; |
| 235 |
| 236 double hours_since_last_time = |
| 237 std::min(kMaxHours, GetHoursSinceLastTime(metric)); |
| 238 // Ignore events within the same "browsing session". |
| 239 if (hours_since_last_time < kMinHours) |
| 240 return GetUpToDateMetricValue(metric); |
| 241 |
| 242 SetLastTimeToNow(metric); |
| 243 |
| 244 double metric_value = GetMetricValue(metric); |
| 245 // Add 1 to the discounted metric as the event has happened right now. |
| 246 double new_metric_value = |
| 247 1 + DiscountMetric(metric_value, hours_since_last_time, |
| 248 discount_rate_per_hour_); |
| 249 SetMetricValue(metric, new_metric_value); |
| 250 return new_metric_value; |
| 251 } |
| 252 |
| 253 double UserClassifier::GetUpToDateMetricValue(Metric metric) const { |
| 254 // The pref_service_ can be null in tests. |
| 255 if (!pref_service_) |
| 256 return 0; |
| 257 |
| 258 double hours_since_last_time = |
| 259 std::min(kMaxHours, GetHoursSinceLastTime(metric)); |
| 260 |
| 261 double metric_value = GetMetricValue(metric); |
| 262 return DiscountMetric(metric_value, hours_since_last_time, |
| 263 discount_rate_per_hour_); |
| 264 } |
| 265 |
| 266 double UserClassifier::GetHoursSinceLastTime(Metric metric) const { |
| 267 if (!HasLastTime(metric)) |
| 268 return 0; |
| 148 | 269 |
| 149 base::TimeDelta since_last_time = | 270 base::TimeDelta since_last_time = |
| 150 base::Time::Now() - base::Time::FromInternalValue( | 271 base::Time::Now() - base::Time::FromInternalValue(pref_service_->GetInt64( |
| 151 pref_service_->GetInt64(last_time_pref_name)); | 272 kLastTimeKeys[static_cast<int>(metric)])); |
| 152 return since_last_time.InSecondsF() / 3600; | 273 return since_last_time.InSecondsF() / 3600; |
| 153 } | 274 } |
| 154 | 275 |
| 155 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { | 276 bool UserClassifier::HasLastTime(Metric metric) const { |
| 156 pref_service_->SetInt64(last_time_pref_name, | 277 return pref_service_->HasPrefPath(kLastTimeKeys[static_cast<int>(metric)]); |
| 278 } |
| 279 |
| 280 void UserClassifier::SetLastTimeToNow(Metric metric) { |
| 281 pref_service_->SetInt64(kLastTimeKeys[static_cast<int>(metric)], |
| 157 base::Time::Now().ToInternalValue()); | 282 base::Time::Now().ToInternalValue()); |
| 158 } | 283 } |
| 159 | 284 |
| 285 double UserClassifier::GetMetricValue(Metric metric) const { |
| 286 return pref_service_->GetDouble(kMetricKeys[static_cast<int>(metric)]); |
| 287 } |
| 288 |
| 289 void UserClassifier::SetMetricValue(Metric metric, double metric_value) { |
| 290 pref_service_->SetDouble(kMetricKeys[static_cast<int>(metric)], metric_value); |
| 291 } |
| 292 |
| 293 void UserClassifier::ClearMetricValue(Metric metric) { |
| 294 pref_service_->ClearPref(kMetricKeys[static_cast<int>(metric)]); |
| 295 } |
| 296 |
| 160 } // namespace ntp_snippets | 297 } // namespace ntp_snippets |
| OLD | NEW |