Chromium Code Reviews| Index: components/ntp_snippets/user_classifier.cc |
| diff --git a/components/ntp_snippets/user_classifier.cc b/components/ntp_snippets/user_classifier.cc |
| index df24ec962f800f5e9c391447525979f662f3c540..ed444a6397b683f76dad4258ab4f23541edfb1b0 100644 |
| --- a/components/ntp_snippets/user_classifier.cc |
| +++ b/components/ntp_snippets/user_classifier.cc |
| @@ -15,6 +15,8 @@ |
| #include "components/prefs/pref_registry_simple.h" |
| #include "components/prefs/pref_service.h" |
| +namespace ntp_snippets { |
| + |
| namespace { |
| // TODO(jkrcal): Make all of this configurable via variations_service. |
| @@ -33,6 +35,10 @@ const double kMaxHours = 7 * 24; |
| // do not count again). |
| const double kMinHours = 0.5; |
| +// Classification constants. |
| +const double kFrequentUserScrollsAtLeastOncePerHours = 24; |
| +const double kOccasionalUserOpensNTPAtMostOncePerHours = 72; |
| + |
| const char kHistogramAverageHoursToOpenNTP[] = |
| "NewTabPage.UserClassifier.AverageHoursToOpenNTP"; |
| const char kHistogramAverageHoursToShowSuggestions[] = |
| @@ -40,121 +46,260 @@ const char kHistogramAverageHoursToShowSuggestions[] = |
| const char kHistogramAverageHoursToUseSuggestions[] = |
| "NewTabPage.UserClassifier.AverageHoursToUseSuggestions"; |
| +// The summary of the prefs. |
| +const char* kMetricKeys[] = { |
| + prefs::kUserClassifierAverageNTPOpenedPerHour, |
| + prefs::kUserClassifierAverageSuggestionsShownPerHour, |
| + prefs::kUserClassifierAverageSuggestionsUsedPerHour}; |
| +const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP, |
| + prefs::kUserClassifierLastTimeToShowSuggestions, |
| + prefs::kUserClassifierLastTimeToUseSuggestions}; |
| + |
| +// Default lengths of the intervals for new users for the metrics. |
| +const double kDefaults[] = {24, 36, 48}; |
|
Marc Treib
2016/09/20 10:27:03
optional: add some static_asserts to make sure the
jkrcal
2016/09/20 13:10:13
Done.
|
| + |
| +// Computes the discount rate. |
| +double |
| +GetDiscountRatePerHour() { |
|
Marc Treib
2016/09/20 10:27:03
extra line break
jkrcal
2016/09/20 13:10:13
Done.
|
| + static double discount_rate_per_hour = 0.0; |
| + |
| + if (discount_rate_per_hour == 0.0) { |
|
tschumann
2016/09/20 11:32:47
what's the purpose of this?
Do you want to initia
jkrcal
2016/09/20 13:10:13
It was initialization, I removed the static var, a
|
| + // Compute discount_rate_per_hour such that |
| + // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. |
| + discount_rate_per_hour = |
| + std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0; |
| + } |
| + |
| + return discount_rate_per_hour; |
| +} |
| + |
| +// Returns the new value of the metric using its |old_value|, assuming |
| +// |hours_since_last_time| hours have passed since it was last recomputed. |
| +// If |event_now| is true, the event is assumed to have happened right now, |
| +// otherwise no event is assumed to happen within the last |
| +// |hours_since_last_time| hours. |
| +double RecomputeMetric(double old_value, |
| + double hours_since_last_time, |
| + bool event_now) { |
| + // Compute and store the new discounted average according to the formula |
| + // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. |
| + return (event_now ? 1 : 0) + |
| + std::exp(-GetDiscountRatePerHour() * hours_since_last_time) * |
| + old_value; |
| +} |
| + |
| +// Compute the number of hours between two events for the given metric value |
| +// assuming the events were equally distributed. |
| +double GetEstimateHoursBetweenEvents(const double metric_value) { |
| + // Right after the first update, the metric is equal to 1. |
| + if (metric_value <= 1) |
|
tschumann
2016/09/20 11:32:47
maybe adjust the comment to also explain why this
jkrcal
2016/09/20 13:10:13
Done.
|
| + return kMaxHours; |
| + |
| + // This is the estimate with the assumption that last event happened right |
| + // now and the system is in the steady-state. Solve estimate_hours in the |
| + // steady-state equation: |
| + // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value, |
| + // i.e. |
| + // -discount_rate * estimate_hours = log((metric_value - 1) / metric_value), |
| + // discount_rate * estimate_hours = log(metric_value / (metric_value - 1)), |
| + // estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate. |
| + double estimate_hours = |
| + std::log(metric_value / (metric_value - 1)) / GetDiscountRatePerHour(); |
| + return std::max(kMinHours, std::min(kMaxHours, estimate_hours)); |
| +} |
| + |
| +// The inverse of GetEstimateHoursBetweenEvents(). |
| +double GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours) { |
| + // Keep the input value within [kMinHours, kMaxHours]. |
| + estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours)); |
| + |
| + // Return |metric_value| such that GetEstimateHoursBetweenEvents for |
| + // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in |
| + // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value, |
| + // i.e. |
| + // metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1, |
| + // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}). |
| + return 1.0 / (1.0 - std::exp(-GetDiscountRatePerHour() * estimate_hours)); |
| +} |
| + |
| } // namespace |
| -namespace ntp_snippets { |
| +// static |
| +const UserClassifier::Metric UserClassifier::kMetrics[3] = { |
|
Marc Treib
2016/09/20 10:27:03
Is the "3" required?
jkrcal
2016/09/20 13:10:13
Done.
|
| + Metric::OPEN_NTP, Metric::SHOW_SUGGESTIONS, Metric::USE_SUGGESTIONS}; |
| UserClassifier::UserClassifier(PrefService* pref_service) |
| - : pref_service_(pref_service), |
| - // Compute discount_rate_per_hour such that |
| - // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}. |
| - discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {} |
| + : pref_service_(pref_service) { |
| + // The pref_service_ can be null in tests. |
| + if (!pref_service_) |
| + return; |
| + |
| + // Initialize the prefs storing the last time: the counter has just started! |
| + for (const Metric metric : kMetrics) { |
| + if (!HasLastTime(metric)) |
| + SetLastTimeToNow(metric); |
| + } |
| +} |
| UserClassifier::~UserClassifier() {} |
| // static |
| void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { |
| - registry->RegisterDoublePref( |
| - prefs::kUserClassifierAverageNTPOpenedPerHour, 1); |
| - registry->RegisterDoublePref( |
| - prefs::kUserClassifierAverageSuggestionsShownPerHour, 1); |
| - registry->RegisterDoublePref( |
| - prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1); |
| - |
| - registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0); |
| - registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions, |
| - 0); |
| - registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions, |
| - 0); |
| + for (const Metric metric : kMetrics) { |
| + registry->RegisterDoublePref(kMetricKeys[metric], |
| + GetMetricValueForEstimateHoursBetweenEvents( |
| + kDefaults[metric])); |
| + registry->RegisterInt64Pref(kLastTimeKeys[metric], 0); |
| + } |
| } |
| void UserClassifier::OnNTPOpened() { |
| - UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour, |
| - prefs::kUserClassifierLastTimeToOpenNTP); |
| + double metric = UpdateMetricOnEvent(Metric::OPEN_NTP); |
| - double avg = GetEstimateHoursBetweenEvents( |
| - prefs::kUserClassifierAverageNTPOpenedPerHour); |
| + double avg = GetEstimateHoursBetweenEvents(metric); |
| UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1, |
| kMaxHours, 50); |
| } |
| void UserClassifier::OnSuggestionsShown() { |
| - UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour, |
| - prefs::kUserClassifierLastTimeToShowSuggestions); |
| + double metric = UpdateMetricOnEvent(Metric::SHOW_SUGGESTIONS); |
| - double avg = GetEstimateHoursBetweenEvents( |
| - prefs::kUserClassifierAverageSuggestionsShownPerHour); |
| + double avg = GetEstimateHoursBetweenEvents(metric); |
| UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1, |
| kMaxHours, 50); |
| } |
| void UserClassifier::OnSuggestionsUsed() { |
| - UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour, |
| - prefs::kUserClassifierLastTimeToUseSuggestions); |
| + double metric = UpdateMetricOnEvent(Metric::USE_SUGGESTIONS); |
| - double avg = GetEstimateHoursBetweenEvents( |
| - prefs::kUserClassifierAverageSuggestionsUsedPerHour); |
| + double avg = GetEstimateHoursBetweenEvents(metric); |
| UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1, |
| kMaxHours, 50); |
| } |
| -void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name, |
| - const char* last_time_pref_name) { |
| +double UserClassifier::GetEstimatedAvgTimeToOpenNTP() const { |
|
Marc Treib
2016/09/20 10:27:03
Are the three GetEstimatedAvgTimeTo... methods sti
jkrcal
2016/09/20 13:10:13
Done.
|
| + return GetEstimatedAvgTime(Metric::OPEN_NTP); |
| +} |
| + |
| +double UserClassifier::GetEstimatedAvgTimeToShowSuggestions() const { |
| + return GetEstimatedAvgTime(Metric::SHOW_SUGGESTIONS); |
| +} |
| + |
| +double UserClassifier::GetEstimatedAvgTimeToUseSuggestions() const { |
| + return GetEstimatedAvgTime(Metric::USE_SUGGESTIONS); |
| +} |
| + |
| +UserClassifier::UserClass UserClassifier::GetUserClass() const { |
| + if (GetEstimatedAvgTimeToOpenNTP() >= |
| + kOccasionalUserOpensNTPAtMostOncePerHours) { |
| + return UserClass::OCCASIONAL_NTP_USER; |
| + } |
| + |
| + if (GetEstimatedAvgTimeToUseSuggestions() <= |
| + kFrequentUserScrollsAtLeastOncePerHours) { |
| + return UserClass::FREQUENT_NTP_USER; |
| + } |
| + |
| + return UserClass::NORMAL_NTP_USER; |
| +} |
| + |
| +std::string UserClassifier::GetUserClassDescriptionForDebugging() const { |
| + switch (GetUserClass()) { |
| + case UserClass::OCCASIONAL_NTP_USER: |
| + return "Occasional user of the NTP"; |
| + case UserClass::NORMAL_NTP_USER: |
| + return "Normal user of the NTP"; |
| + case UserClass::FREQUENT_NTP_USER: |
| + return "Frequent user of the NTP"; |
| + } |
| + NOTREACHED(); |
| + return "Unknown user class"; |
| +} |
| + |
| +void UserClassifier::ClearClassificationForDebugging() { |
| + // The pref_service_ can be null in tests. |
| if (!pref_service_) |
| return; |
| + for (const Metric& metric : kMetrics) { |
| + ClearMetricValue(metric); |
| + SetLastTimeToNow(metric); |
| + } |
| +} |
| + |
| +double UserClassifier::GetEstimatedAvgTime(Metric metric) const { |
| + double metric_value = GetUpToDateMetricValue(metric); |
| + return GetEstimateHoursBetweenEvents(metric_value); |
| +} |
| + |
| +double UserClassifier::UpdateMetricOnEvent(Metric metric) { |
| + // The pref_service_ can be null in tests. |
| + if (!pref_service_) |
| + return 0; |
| + |
| double hours_since_last_time = |
| - std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name)); |
| + std::min(kMaxHours, GetHoursSinceLastTime(metric)); |
| // Ignore events within the same "browsing session". |
| if (hours_since_last_time < kMinHours) |
| - return; |
| - SetLastTimeToNow(last_time_pref_name); |
| + return GetUpToDateMetricValue(metric); |
| - double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); |
| - // Compute and store the new discounted average according to the formula |
| - // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events. |
| - double new_avg_events_per_hour = |
| - 1 + |
| - std::exp(-discount_rate_per_hour_ * hours_since_last_time) * |
| - avg_events_per_hour; |
| - pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour); |
| + SetLastTimeToNow(metric); |
| + |
| + double metric_value = GetMetricValue(metric); |
| + double new_metric_value = RecomputeMetric(metric_value, hours_since_last_time, |
| + true /* event_now */); |
|
Marc Treib
2016/09/20 10:27:03
nit: Preferred style is /*event_now=*/true
jkrcal
2016/09/20 13:10:13
I recently had a CL discussion with Bernhard:
-
Marc Treib
2016/09/20 13:26:52
Tim suggested the above format, because for intern
jkrcal
2016/09/20 13:46:39
Done.
Bernhard Bauer
2016/09/20 15:59:54
Weeeelll… If that tool needs to be written / porte
|
| + SetMetricValue(metric, new_metric_value); |
| + return new_metric_value; |
| } |
| -double UserClassifier::GetEstimateHoursBetweenEvents( |
| - const char* metric_pref_name) { |
| - double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name); |
| +double UserClassifier::GetUpToDateMetricValue(Metric metric) const { |
| + // The pref_service_ can be null in tests. |
| + if (!pref_service_) |
| + return 0; |
| - // Right after the first update, the metric is equal to 1. |
| - if (avg_events_per_hour <= 1) |
| - return kMaxHours; |
| + double hours_since_last_time = |
| + std::min(kMaxHours, GetHoursSinceLastTime(metric)); |
| - // This is the estimate with the assumption that last event happened right |
| - // now and the system is in the steady-state. Solve estimate_hours in the |
| - // steady-state equation: |
| - // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events, |
| - // i.e. |
| - // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events), |
| - // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)), |
| - // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate. |
| - return std::min(kMaxHours, |
| - std::log(avg_events_per_hour / (avg_events_per_hour - 1)) / |
| - discount_rate_per_hour_); |
| + double metric_value = GetMetricValue(metric); |
| + return RecomputeMetric(metric_value, hours_since_last_time, |
| + false /* event_now */); |
| } |
| -double UserClassifier::GetHoursSinceLastTime( |
| - const char* last_time_pref_name) { |
| - if (!pref_service_->HasPrefPath(last_time_pref_name)) |
| - return DBL_MAX; |
| +double UserClassifier::GetHoursSinceLastTime(Metric metric) const { |
| + DCHECK(pref_service_); |
| + if (!HasLastTime(metric)) |
| + return 0; |
| base::TimeDelta since_last_time = |
| base::Time::Now() - base::Time::FromInternalValue( |
| - pref_service_->GetInt64(last_time_pref_name)); |
| + pref_service_->GetInt64(kLastTimeKeys[metric])); |
| return since_last_time.InSecondsF() / 3600; |
| } |
| -void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) { |
| - pref_service_->SetInt64(last_time_pref_name, |
| +bool UserClassifier::HasLastTime(const Metric metric) const { |
| + DCHECK(pref_service_); |
|
Marc Treib
2016/09/20 10:27:03
These DCHECKs aren't really helpful: If it were nu
jkrcal
2016/09/20 13:10:13
Done.
|
| + return pref_service_->HasPrefPath(kLastTimeKeys[metric]); |
| +} |
| + |
| +void UserClassifier::SetLastTimeToNow(Metric metric) { |
| + DCHECK(pref_service_); |
| + pref_service_->SetInt64(kLastTimeKeys[metric], |
| base::Time::Now().ToInternalValue()); |
| } |
| +double UserClassifier::GetMetricValue(const Metric metric) const { |
| + DCHECK(pref_service_); |
| + return pref_service_->GetDouble(kMetricKeys[metric]); |
| +} |
| + |
| +void UserClassifier::SetMetricValue(const Metric metric, double metric_value) { |
| + DCHECK(pref_service_); |
| + pref_service_->SetDouble(kMetricKeys[metric], metric_value); |
| +} |
| + |
| +void UserClassifier::ClearMetricValue(const Metric metric) { |
| + DCHECK(pref_service_); |
| + pref_service_->ClearPref(kMetricKeys[metric]); |
| +} |
| + |
| } // namespace ntp_snippets |