Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1103)

Unified Diff: components/ntp_snippets/user_classifier.cc

Issue 2346263002: Extending the UserClassifier to actually support classification. (Closed)
Patch Set: Minor rebase & larger refactoring Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/ntp_snippets/user_classifier.cc
diff --git a/components/ntp_snippets/user_classifier.cc b/components/ntp_snippets/user_classifier.cc
index df24ec962f800f5e9c391447525979f662f3c540..ed444a6397b683f76dad4258ab4f23541edfb1b0 100644
--- a/components/ntp_snippets/user_classifier.cc
+++ b/components/ntp_snippets/user_classifier.cc
@@ -15,6 +15,8 @@
#include "components/prefs/pref_registry_simple.h"
#include "components/prefs/pref_service.h"
+namespace ntp_snippets {
+
namespace {
// TODO(jkrcal): Make all of this configurable via variations_service.
@@ -33,6 +35,10 @@ const double kMaxHours = 7 * 24;
// do not count again).
const double kMinHours = 0.5;
+// Classification constants.
+const double kFrequentUserScrollsAtLeastOncePerHours = 24;
+const double kOccasionalUserOpensNTPAtMostOncePerHours = 72;
+
const char kHistogramAverageHoursToOpenNTP[] =
"NewTabPage.UserClassifier.AverageHoursToOpenNTP";
const char kHistogramAverageHoursToShowSuggestions[] =
@@ -40,121 +46,260 @@ const char kHistogramAverageHoursToShowSuggestions[] =
const char kHistogramAverageHoursToUseSuggestions[] =
"NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
+// The summary of the prefs.
+const char* kMetricKeys[] = {
+ prefs::kUserClassifierAverageNTPOpenedPerHour,
+ prefs::kUserClassifierAverageSuggestionsShownPerHour,
+ prefs::kUserClassifierAverageSuggestionsUsedPerHour};
+const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP,
+ prefs::kUserClassifierLastTimeToShowSuggestions,
+ prefs::kUserClassifierLastTimeToUseSuggestions};
+
+// Default lengths of the intervals for new users for the metrics.
+const double kDefaults[] = {24, 36, 48};
Marc Treib 2016/09/20 10:27:03 optional: add some static_asserts to make sure the
jkrcal 2016/09/20 13:10:13 Done.
+
+// Computes the discount rate.
+double
+GetDiscountRatePerHour() {
Marc Treib 2016/09/20 10:27:03 extra line break
jkrcal 2016/09/20 13:10:13 Done.
+ static double discount_rate_per_hour = 0.0;
+
+ if (discount_rate_per_hour == 0.0) {
tschumann 2016/09/20 11:32:47 what's the purpose of this? Do you want to initia
jkrcal 2016/09/20 13:10:13 It was initialization, I removed the static var, a
+ // Compute discount_rate_per_hour such that
+ // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
+ discount_rate_per_hour =
+ std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0;
+ }
+
+ return discount_rate_per_hour;
+}
+
+// Returns the new value of the metric using its |old_value|, assuming
+// |hours_since_last_time| hours have passed since it was last recomputed.
+// If |event_now| is true, the event is assumed to have happened right now,
+// otherwise no event is assumed to happen within the last
+// |hours_since_last_time| hours.
+double RecomputeMetric(double old_value,
+ double hours_since_last_time,
+ bool event_now) {
+ // Compute and store the new discounted average according to the formula
+ // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
+ return (event_now ? 1 : 0) +
+ std::exp(-GetDiscountRatePerHour() * hours_since_last_time) *
+ old_value;
+}
+
+// Compute the number of hours between two events for the given metric value
+// assuming the events were equally distributed.
+double GetEstimateHoursBetweenEvents(const double metric_value) {
+ // Right after the first update, the metric is equal to 1.
+ if (metric_value <= 1)
tschumann 2016/09/20 11:32:47 maybe adjust the comment to also explain why this
jkrcal 2016/09/20 13:10:13 Done.
+ return kMaxHours;
+
+ // This is the estimate with the assumption that last event happened right
+ // now and the system is in the steady-state. Solve estimate_hours in the
+ // steady-state equation:
+ // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
+ // i.e.
+ // -discount_rate * estimate_hours = log((metric_value - 1) / metric_value),
+ // discount_rate * estimate_hours = log(metric_value / (metric_value - 1)),
+ // estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate.
+ double estimate_hours =
+ std::log(metric_value / (metric_value - 1)) / GetDiscountRatePerHour();
+ return std::max(kMinHours, std::min(kMaxHours, estimate_hours));
+}
+
+// The inverse of GetEstimateHoursBetweenEvents().
+double GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours) {
+ // Keep the input value within [kMinHours, kMaxHours].
+ estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours));
+
+ // Return |metric_value| such that GetEstimateHoursBetweenEvents for
+ // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
+ // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
+ // i.e.
+ // metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1,
+ // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
+ return 1.0 / (1.0 - std::exp(-GetDiscountRatePerHour() * estimate_hours));
+}
+
} // namespace
-namespace ntp_snippets {
+// static
+const UserClassifier::Metric UserClassifier::kMetrics[3] = {
Marc Treib 2016/09/20 10:27:03 Is the "3" required?
jkrcal 2016/09/20 13:10:13 Done.
+ Metric::OPEN_NTP, Metric::SHOW_SUGGESTIONS, Metric::USE_SUGGESTIONS};
UserClassifier::UserClassifier(PrefService* pref_service)
- : pref_service_(pref_service),
- // Compute discount_rate_per_hour such that
- // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
- discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {}
+ : pref_service_(pref_service) {
+ // The pref_service_ can be null in tests.
+ if (!pref_service_)
+ return;
+
+ // Initialize the prefs storing the last time: the counter has just started!
+ for (const Metric metric : kMetrics) {
+ if (!HasLastTime(metric))
+ SetLastTimeToNow(metric);
+ }
+}
UserClassifier::~UserClassifier() {}
// static
void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
- registry->RegisterDoublePref(
- prefs::kUserClassifierAverageNTPOpenedPerHour, 1);
- registry->RegisterDoublePref(
- prefs::kUserClassifierAverageSuggestionsShownPerHour, 1);
- registry->RegisterDoublePref(
- prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1);
-
- registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0);
- registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions,
- 0);
- registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToUseSuggestions,
- 0);
+ for (const Metric metric : kMetrics) {
+ registry->RegisterDoublePref(kMetricKeys[metric],
+ GetMetricValueForEstimateHoursBetweenEvents(
+ kDefaults[metric]));
+ registry->RegisterInt64Pref(kLastTimeKeys[metric], 0);
+ }
}
void UserClassifier::OnNTPOpened() {
- UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour,
- prefs::kUserClassifierLastTimeToOpenNTP);
+ double metric = UpdateMetricOnEvent(Metric::OPEN_NTP);
- double avg = GetEstimateHoursBetweenEvents(
- prefs::kUserClassifierAverageNTPOpenedPerHour);
+ double avg = GetEstimateHoursBetweenEvents(metric);
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
kMaxHours, 50);
}
void UserClassifier::OnSuggestionsShown() {
- UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour,
- prefs::kUserClassifierLastTimeToShowSuggestions);
+ double metric = UpdateMetricOnEvent(Metric::SHOW_SUGGESTIONS);
- double avg = GetEstimateHoursBetweenEvents(
- prefs::kUserClassifierAverageSuggestionsShownPerHour);
+ double avg = GetEstimateHoursBetweenEvents(metric);
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1,
kMaxHours, 50);
}
void UserClassifier::OnSuggestionsUsed() {
- UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour,
- prefs::kUserClassifierLastTimeToUseSuggestions);
+ double metric = UpdateMetricOnEvent(Metric::USE_SUGGESTIONS);
- double avg = GetEstimateHoursBetweenEvents(
- prefs::kUserClassifierAverageSuggestionsUsedPerHour);
+ double avg = GetEstimateHoursBetweenEvents(metric);
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1,
kMaxHours, 50);
}
-void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
- const char* last_time_pref_name) {
+double UserClassifier::GetEstimatedAvgTimeToOpenNTP() const {
Marc Treib 2016/09/20 10:27:03 Are the three GetEstimatedAvgTimeTo... methods sti
jkrcal 2016/09/20 13:10:13 Done.
+ return GetEstimatedAvgTime(Metric::OPEN_NTP);
+}
+
+double UserClassifier::GetEstimatedAvgTimeToShowSuggestions() const {
+ return GetEstimatedAvgTime(Metric::SHOW_SUGGESTIONS);
+}
+
+double UserClassifier::GetEstimatedAvgTimeToUseSuggestions() const {
+ return GetEstimatedAvgTime(Metric::USE_SUGGESTIONS);
+}
+
+UserClassifier::UserClass UserClassifier::GetUserClass() const {
+ if (GetEstimatedAvgTimeToOpenNTP() >=
+ kOccasionalUserOpensNTPAtMostOncePerHours) {
+ return UserClass::OCCASIONAL_NTP_USER;
+ }
+
+ if (GetEstimatedAvgTimeToUseSuggestions() <=
+ kFrequentUserScrollsAtLeastOncePerHours) {
+ return UserClass::FREQUENT_NTP_USER;
+ }
+
+ return UserClass::NORMAL_NTP_USER;
+}
+
+std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
+ switch (GetUserClass()) {
+ case UserClass::OCCASIONAL_NTP_USER:
+ return "Occasional user of the NTP";
+ case UserClass::NORMAL_NTP_USER:
+ return "Normal user of the NTP";
+ case UserClass::FREQUENT_NTP_USER:
+ return "Frequent user of the NTP";
+ }
+ NOTREACHED();
+ return "Unknown user class";
+}
+
+void UserClassifier::ClearClassificationForDebugging() {
+ // The pref_service_ can be null in tests.
if (!pref_service_)
return;
+ for (const Metric& metric : kMetrics) {
+ ClearMetricValue(metric);
+ SetLastTimeToNow(metric);
+ }
+}
+
+double UserClassifier::GetEstimatedAvgTime(Metric metric) const {
+ double metric_value = GetUpToDateMetricValue(metric);
+ return GetEstimateHoursBetweenEvents(metric_value);
+}
+
+double UserClassifier::UpdateMetricOnEvent(Metric metric) {
+ // The pref_service_ can be null in tests.
+ if (!pref_service_)
+ return 0;
+
double hours_since_last_time =
- std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
+ std::min(kMaxHours, GetHoursSinceLastTime(metric));
// Ignore events within the same "browsing session".
if (hours_since_last_time < kMinHours)
- return;
- SetLastTimeToNow(last_time_pref_name);
+ return GetUpToDateMetricValue(metric);
- double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
- // Compute and store the new discounted average according to the formula
- // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
- double new_avg_events_per_hour =
- 1 +
- std::exp(-discount_rate_per_hour_ * hours_since_last_time) *
- avg_events_per_hour;
- pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour);
+ SetLastTimeToNow(metric);
+
+ double metric_value = GetMetricValue(metric);
+ double new_metric_value = RecomputeMetric(metric_value, hours_since_last_time,
+ true /* event_now */);
Marc Treib 2016/09/20 10:27:03 nit: Preferred style is /*event_now=*/true
jkrcal 2016/09/20 13:10:13 I recently had a CL discussion with Bernhard: -
Marc Treib 2016/09/20 13:26:52 Tim suggested the above format, because for intern
jkrcal 2016/09/20 13:46:39 Done.
Bernhard Bauer 2016/09/20 15:59:54 Weeeelll… If that tool needs to be written / porte
+ SetMetricValue(metric, new_metric_value);
+ return new_metric_value;
}
-double UserClassifier::GetEstimateHoursBetweenEvents(
- const char* metric_pref_name) {
- double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
+double UserClassifier::GetUpToDateMetricValue(Metric metric) const {
+ // The pref_service_ can be null in tests.
+ if (!pref_service_)
+ return 0;
- // Right after the first update, the metric is equal to 1.
- if (avg_events_per_hour <= 1)
- return kMaxHours;
+ double hours_since_last_time =
+ std::min(kMaxHours, GetHoursSinceLastTime(metric));
- // This is the estimate with the assumption that last event happened right
- // now and the system is in the steady-state. Solve estimate_hours in the
- // steady-state equation:
- // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events,
- // i.e.
- // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events),
- // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)),
- // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate.
- return std::min(kMaxHours,
- std::log(avg_events_per_hour / (avg_events_per_hour - 1)) /
- discount_rate_per_hour_);
+ double metric_value = GetMetricValue(metric);
+ return RecomputeMetric(metric_value, hours_since_last_time,
+ false /* event_now */);
}
-double UserClassifier::GetHoursSinceLastTime(
- const char* last_time_pref_name) {
- if (!pref_service_->HasPrefPath(last_time_pref_name))
- return DBL_MAX;
+double UserClassifier::GetHoursSinceLastTime(Metric metric) const {
+ DCHECK(pref_service_);
+ if (!HasLastTime(metric))
+ return 0;
base::TimeDelta since_last_time =
base::Time::Now() - base::Time::FromInternalValue(
- pref_service_->GetInt64(last_time_pref_name));
+ pref_service_->GetInt64(kLastTimeKeys[metric]));
return since_last_time.InSecondsF() / 3600;
}
-void UserClassifier::SetLastTimeToNow(const char* last_time_pref_name) {
- pref_service_->SetInt64(last_time_pref_name,
+bool UserClassifier::HasLastTime(const Metric metric) const {
+ DCHECK(pref_service_);
Marc Treib 2016/09/20 10:27:03 These DCHECKs aren't really helpful: If it were nu
jkrcal 2016/09/20 13:10:13 Done.
+ return pref_service_->HasPrefPath(kLastTimeKeys[metric]);
+}
+
+void UserClassifier::SetLastTimeToNow(Metric metric) {
+ DCHECK(pref_service_);
+ pref_service_->SetInt64(kLastTimeKeys[metric],
base::Time::Now().ToInternalValue());
}
+double UserClassifier::GetMetricValue(const Metric metric) const {
+ DCHECK(pref_service_);
+ return pref_service_->GetDouble(kMetricKeys[metric]);
+}
+
+void UserClassifier::SetMetricValue(const Metric metric, double metric_value) {
+ DCHECK(pref_service_);
+ pref_service_->SetDouble(kMetricKeys[metric], metric_value);
+}
+
+void UserClassifier::ClearMetricValue(const Metric metric) {
+ DCHECK(pref_service_);
+ pref_service_->ClearPref(kMetricKeys[metric]);
+}
+
} // namespace ntp_snippets
« components/ntp_snippets/user_classifier.h ('K') | « components/ntp_snippets/user_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698