Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/ntp_snippets/user_classifier.h" | 5 #include "components/ntp_snippets/user_classifier.h" |
| 6 | 6 |
| 7 #include <float.h> | 7 #include <float.h> |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <string> | 10 #include <string> |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 26 // Never consider any larger interval than this (so that extreme situations such | 26 // Never consider any larger interval than this (so that extreme situations such |
| 27 // as losing your phone or going for a long offline vacation do not skew the | 27 // as losing your phone or going for a long offline vacation do not skew the |
| 28 // average too much). | 28 // average too much). |
| 29 const double kMaxHours = 7 * 24; | 29 const double kMaxHours = 7 * 24; |
| 30 | 30 |
| 31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is | 31 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is |
| 32 // the length of the browsing session where subsequent events of the same type | 32 // the length of the browsing session where subsequent events of the same type |
| 33 // do not count again). | 33 // do not count again). |
| 34 const double kMinHours = 0.5; | 34 const double kMinHours = 0.5; |
| 35 | 35 |
| 36 // Classification constants. | |
| 37 const double kFrequentUserScrollsAtLeastOncePerHours = 24; | |
| 38 const double kOccasionalUserOpensNTPAtMostOncePerHours = 72; | |
| 39 | |
| 40 // Default lengths of the intervals for new users. | |
| 41 const double kNTPFrequencyOfANewUserInHours = 24; | |
| 42 const double kShowFrequencyOfANewUserInHours = 36; | |
| 43 const double kUseFrequencyOfANewUserInHours = 48; | |
| 44 | |
| 36 const char kHistogramAverageHoursToOpenNTP[] = | 45 const char kHistogramAverageHoursToOpenNTP[] = |
| 37 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; | 46 "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; |
| 38 const char kHistogramAverageHoursToShowSuggestions[] = | 47 const char kHistogramAverageHoursToShowSuggestions[] = |
| 39 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; | 48 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions"; |
| 40 const char kHistogramAverageHoursToUseSuggestions[] = | 49 const char kHistogramAverageHoursToUseSuggestions[] = |
| 41 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; | 50 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; |
| 42 | 51 |
| 52 // Computes the discount rate. | |
| 53 double GetDiscountRatePerHour() { | |
| 54 static double discount_rate_per_hour = 0.0; | |
| 55 | |
| 56 if (discount_rate_per_hour == 0.0) { | |
| 57 // Compute discount_rate_per_hour such that | |
| 58 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. | |
| 59 discount_rate_per_hour = | |
| 60 std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0; | |
| 61 } | |
| 62 | |
| 63 return discount_rate_per_hour; | |
| 64 } | |
| 65 | |
| 66 // Returns the new value of the metric using its |old_value|, assuming | |
| 67 // |hours_since_last_time| hours have passed since it was last recomputed. | |
| 68 // If |event_now| is true, the event is assumed to have happened right now, | |
| 69 // otherwise no event is assumed to happen within the last | |
| 70 // |hours_since_last_time| hours. | |
| 71 double RecomputeMetric(double old_value, | |
| 72 double hours_since_last_time, | |
| 73 bool event_now) { | |
| 74 // Compute and store the new discounted average according to the formula | |
| 75 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. | |
| 76 return (event_now ? 1 : 0) + | |
| 77 std::exp(-GetDiscountRatePerHour() * hours_since_last_time) * | |
| 78 old_value; | |
| 79 } | |
| 80 | |
| 81 // Compute the number of hours between two events for the given metric value | |
| 82 // assuming the events were equally distributed. | |
| 83 double GetEstimateHoursBetweenEvents(const double metric_value) { | |
| 84 // Right after the first update, the metric is equal to 1. | |
| 85 if (metric_value <= 1) | |
| 86 return kMaxHours; | |
| 87 | |
| 88 // This is the estimate with the assumption that last event happened right | |
| 89 // now and the system is in the steady-state. Solve estimate_hours in the | |
| 90 // steady-state equation: | |
| 91 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value, | |
| 92 // i.e. | |
| 93 // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events), | |
| 94 // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)), | |
| 95 // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate. | |
| 96 double estimate_hours = | |
| 97 std::log(metric_value / (metric_value - 1)) / GetDiscountRatePerHour(); | |
| 98 return std::max(kMinHours, std::min(kMaxHours, estimate_hours)); | |
| 99 } | |
| 100 | |
| 101 // The inverse of GetEstimateHoursBetweenEvents(). | |
| 102 double GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours) { | |
| 103 // Keep the input value within [kMinHours, kMaxHours]. | |
| 104 estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours)); | |
| 105 | |
| 106 // Return |metric_value| such that GetEstimateHoursBetweenEvents for | |
| 107 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in | |
| 108 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value, | |
| 109 // i.e. | |
| 110 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}). | |
| 111 return 1.0 / (1.0 - std::exp(-GetDiscountRatePerHour() * estimate_hours)); | |
| 112 } | |
| 113 | |
| 43 } // namespace | 114 } // namespace |
| 44 | 115 |
| 45 namespace ntp_snippets { | 116 namespace ntp_snippets { |
| 46 | 117 |
| 47 UserClassifier::UserClassifier(PrefService* pref_service) | 118 UserClassifier::UserClassifier(PrefService* pref_service) |
| 48 : pref_service_(pref_service), | 119 : pref_service_(pref_service) {} |
| 49 // Compute discount_rate_per_hour such that | |
| 50 // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. | |
| 51 discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {} | |
| 52 | 120 |
| 53 UserClassifier::~UserClassifier() {} | 121 UserClassifier::~UserClassifier() {} |
| 54 | 122 |
| 55 // static | 123 // static |
| 56 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { | 124 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { |
| 125 registry->RegisterDoublePref(prefs::kUserClassifierAverageNTPOpenedPerHour, | |
| 126 GetMetricValueForEstimateHoursBetweenEvents( | |
| 127 kNTPFrequencyOfANewUserInHours)); | |
| 57 registry->RegisterDoublePref( | 128 registry->RegisterDoublePref( |
| 58 prefs::kUserClassifierAverageNTPOpenedPerHour, 1); | 129 prefs::kUserClassifierAverageSuggestionsShownPerHour, |
| 130 GetMetricValueForEstimateHoursBetweenEvents( | |
| 131 kShowFrequencyOfANewUserInHours)); | |
| 59 registry->RegisterDoublePref( | 132 registry->RegisterDoublePref( |
| 60 prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); | 133 prefs::kUserClassifierAverageSuggestionsUsedPerHour, |
| 61 registry->RegisterDoublePref( | 134 GetMetricValueForEstimateHoursBetweenEvents( |
| 62 prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); | 135 kUseFrequencyOfANewUserInHours)); |
| 63 | 136 |
| 64 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); | 137 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); |
| 65 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, | 138 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, |
| 66 0); | 139 0); |
| 67 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, | 140 registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, |
| 68 0); | 141 0); |
| 69 } | 142 } |
| 70 | 143 |
| 71 void UserClassifier::OnNTPOpened() { | 144 void UserClassifier::OnNTPOpened() { |
| 72 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, | 145 double metric = |
| 73 prefs::kUserClassifierLastTimeToOpenNTP); | 146 UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, |
| 147 prefs::kUserClassifierLastTimeToOpenNTP); | |
| 74 | 148 |
| 75 double avg = GetEstimateHoursBetweenEvents( | 149 double avg = GetEstimateHoursBetweenEvents(metric); |
| 76 prefs::kUserClassifierAverageNTPOpenedPerHour); | |
| 77 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, | 150 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, |
| 78 kMaxHours, 50); | 151 kMaxHours, 50); |
| 79 } | 152 } |
| 80 | 153 |
| 81 void UserClassifier::OnSuggestionsShown() { | 154 void UserClassifier::OnSuggestionsShown() { |
| 82 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, | 155 double metric = |
| 83 prefs::kUserClassifierLastTimeToShowSuggestions); | 156 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, |
| 157 prefs::kUserClassifierLastTimeToShowSuggestions); | |
| 84 | 158 |
| 85 double avg = GetEstimateHoursBetweenEvents( | 159 double avg = GetEstimateHoursBetweenEvents(metric); |
| 86 prefs::kUserClassifierAverageSuggestionsShownPerHour); | |
| 87 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1, | 160 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1, |
| 88 kMaxHours, 50); | 161 kMaxHours, 50); |
| 89 } | 162 } |
| 90 | 163 |
| 91 void UserClassifier::OnSuggestionsUsed() { | 164 void UserClassifier::OnSuggestionsUsed() { |
| 92 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour, | 165 double metric = |
| 93 prefs::kUserClassifierLastTimeToUseSuggestions); | 166 UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour, |
| 167 prefs::kUserClassifierLastTimeToUseSuggestions); | |
| 94 | 168 |
| 95 double avg = GetEstimateHoursBetweenEvents( | 169 double avg = GetEstimateHoursBetweenEvents(metric); |
| 96 prefs::kUserClassifierAverageSuggestionsUsedPerHour); | |
| 97 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1, | 170 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1, |
| 98 kMaxHours, 50); | 171 kMaxHours, 50); |
| 99 } | 172 } |
| 100 | 173 |
| 101 void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name, | 174 double UserClassifier::GetEstimatedAvgTimeToOpenNTP() const { |
| 102 const char* last_time_pref_name) { | 175 double metric = |
| 176 GetUpToDateMetricValue(prefs::kUserClassifierAverageNTPOpenedPerHour, | |
| 177 prefs::kUserClassifierLastTimeToOpenNTP); | |
| 178 return GetEstimateHoursBetweenEvents(metric); | |
| 179 } | |
| 180 | |
| 181 double UserClassifier::GetEstimatedAvgTimeToShowSuggestions() const { | |
| 182 double metric = GetUpToDateMetricValue( | |
| 183 prefs::kUserClassifierAverageSuggestionsShownPerHour, | |
| 184 prefs::kUserClassifierLastTimeToShowSuggestions); | |
| 185 return GetEstimateHoursBetweenEvents(metric); | |
| 186 } | |
| 187 | |
| 188 double UserClassifier::GetEstimatedAvgTimeToUseSuggestions() const { | |
| 189 double metric = GetUpToDateMetricValue( | |
| 190 prefs::kUserClassifierAverageSuggestionsUsedPerHour, | |
| 191 prefs::kUserClassifierLastTimeToUseSuggestions); | |
| 192 return GetEstimateHoursBetweenEvents(metric); | |
| 193 } | |
| 194 | |
| 195 UserClassifier::UserClass UserClassifier::GetUserClass() const { | |
| 196 if (GetEstimatedAvgTimeToOpenNTP() >= | |
| 197 kOccasionalUserOpensNTPAtMostOncePerHours) { | |
| 198 return UserClass::OCCASIONAL_NTP_USER; | |
| 199 } | |
| 200 | |
| 201 if (GetEstimatedAvgTimeToUseSuggestions() <= | |
| 202 kFrequentUserScrollsAtLeastOncePerHours) { | |
| 203 return UserClass::FREQUENT_NTP_USER; | |
| 204 } | |
| 205 | |
| 206 return UserClass::NORMAL_NTP_USER; | |
| 207 } | |
| 208 | |
| 209 std::string UserClassifier::GetUserClassDescriptionForDebugging() const { | |
| 210 switch (GetUserClass()) { | |
| 211 case UserClass::OCCASIONAL_NTP_USER: | |
| 212 return "Occasional user of the NTP"; | |
| 213 case UserClass::NORMAL_NTP_USER: | |
| 214 return "Normal user of the NTP"; | |
| 215 case UserClass::FREQUENT_NTP_USER: | |
| 216 return "Frequent user of the NTP"; | |
| 217 } | |
| 218 NOTREACHED(); | |
| 219 return "Unknown user class"; | |
| 220 } | |
| 221 | |
| 222 void UserClassifier::ClearClassificationForDebugging() { | |
| 223 pref_service_->ClearPref(prefs::kUserClassifierAverageNTPOpenedPerHour); | |
| 224 pref_service_->ClearPref( | |
| 225 prefs::kUserClassifierAverageSuggestionsShownPerHour); | |
| 226 pref_service_->ClearPref(prefs::kUserClassifierAverageSuggestionsUsedPerHour); | |
| 227 | |
| 228 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToOpenNTP); | |
| 229 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToShowSuggestions); | |
| 230 pref_service_->ClearPref(prefs::kUserClassifierLastTimeToUseSuggestions); | |
| 231 } | |
| 232 | |
| 233 double UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name, | |
| 234 const char* last_time_pref_name) { | |
| 235 // The pref_service_ can be null in tests. | |
| 103 if (!pref_service_) | 236 if (!pref_service_) |
| 104 return; | 237 return 0; |
| 105 | 238 |
| 106 double hours_since_last_time = | 239 double hours_since_last_time = |
| 107 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); | 240 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); |
| 241 // If the "last time" is not defined, set it. | |
| 242 if (!hours_since_last_time) | |
|
Marc Treib
2016/09/20 10:27:02
This will check for zero - is that what you want?
jkrcal
2016/09/20 13:10:13
I agree, this a bit obscure, to say the least :) N
| |
| 243 SetLastTimeToNow(last_time_pref_name); | |
| 108 // Ignore events within the same "browsing session". | 244 // Ignore events within the same "browsing session". |
| 109 if (hours_since_last_time < kMinHours) | 245 if (hours_since_last_time < kMinHours) |
| 110 return; | 246 return GetUpToDateMetricValue(metric_pref_name, last_time_pref_name); |
| 247 | |
| 111 SetLastTimeToNow(last_time_pref_name); | 248 SetLastTimeToNow(last_time_pref_name); |
| 112 | 249 |
| 113 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); | 250 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); |
| 114 // Compute and store the new discounted average according to the formula | |
| 115 // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. | |
| 116 double new_avg_events_per_hour = | 251 double new_avg_events_per_hour = |
| 117 1 + | 252 RecomputeMetric(avg_events_per_hour, hours_since_last_time, true); |
| 118 std::exp(discount_rate_per_hour_ * hours_since_last_time) * | |
| 119 avg_events_per_hour; | |
| 120 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); | 253 pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); |
| 254 return new_avg_events_per_hour; | |
| 121 } | 255 } |
| 122 | 256 |
| 123 double UserClassifier::GetEstimateHoursBetweenEvents( | 257 double UserClassifier::GetUpToDateMetricValue( |
| 124 const char* metric_pref_name) { | 258 const char* metric_pref_name, |
| 259 const char* last_time_pref_name) const { | |
| 260 // The pref_service_ can be null in tests. | |
| 261 if (!pref_service_) | |
| 262 return 0; | |
| 263 | |
| 264 double hours_since_last_time = | |
| 265 std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); | |
| 266 | |
| 125 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); | 267 double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); |
| 126 | 268 return RecomputeMetric(avg_events_per_hour, hours_since_last_time, true); |
| 127 // Right after the first update, the metric is equal to 1. | |
| 128 if (avg_events_per_hour <= 1) | |
| 129 return kMaxHours; | |
| 130 | |
| 131 // This is the estimate with the assumption that last event happened right | |
| 132 // now and the system is in the steady-state. Solve estimate_hours in the | |
| 133 // steady-state equation: | |
| 134 // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events. | |
| 135 return std::min(kMaxHours, | |
| 136 std::log(avg_events_per_hour / (avg_events_per_hour - 1)) / | |
| 137 discount_rate_per_hour_); | |
| 138 } | 269 } |
| 139 | 270 |
| 140 double UserClassifier::GetHoursSinceLastTime( | 271 double UserClassifier::GetHoursSinceLastTime( |
| 141 const char* last_time_pref_name) { | 272 const char* last_time_pref_name) const { |
| 142 if (!pref_service_->HasPrefPath(last_time_pref_name)) | 273 if (!pref_service_->HasPrefPath(last_time_pref_name)) |
| 143 return DBL_MAX; | 274 return 0; |
| 144 | 275 |
| 145 base::TimeDelta since_last_time = | 276 base::TimeDelta since_last_time = |
| 146 base::Time::Now() - base::Time::FromInternalValue( | 277 base::Time::Now() - base::Time::FromInternalValue( |
| 147 pref_service_->GetInt64(last_time_pref_name)); | 278 pref_service_->GetInt64(last_time_pref_name)); |
| 148 return since_last_time.InSecondsF() / 3600; | 279 return since_last_time.InSecondsF() / 3600; |
| 149 } | 280 } |
| 150 | 281 |
| 151 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { | 282 void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { |
| 152 pref_service_->SetInt64(last_time_pref_name, | 283 pref_service_->SetInt64(last_time_pref_name, |
| 153 base::Time::Now().ToInternalValue()); | 284 base::Time::Now().ToInternalValue()); |
| 154 } | 285 } |
| 155 | 286 |
| 156 } // namespace ntp_snippets | 287 } // namespace ntp_snippets |
| OLD | NEW |