Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(793)

Unified Diff: components/ntp_snippets/user_classifier.cc

Issue 2346263002: Extending the UserClassifier to actually support classification. (Closed)
Patch Set: Marc's comments Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/ntp_snippets/user_classifier.cc
diff --git a/components/ntp_snippets/user_classifier.cc b/components/ntp_snippets/user_classifier.cc
index ef179b320a6bcba3e06b1a5530f06a9810dd557e..9e74f3a2e806376ca704f9b42c3282b0c259402c 100644
--- a/components/ntp_snippets/user_classifier.cc
+++ b/components/ntp_snippets/user_classifier.cc
@@ -33,6 +33,15 @@ const double kMaxHours = 7 * 24;
// do not count again).
const double kMinHours = 0.5;
+// Classification constants.
+const double kFrequentUserScrollsAtLeastOncePerHours = 24;
+const double kOccasionalUserOpensNTPAtMostOncePerHours = 72;
+
+// Default lengths of the intervals for new users.
+const double kNTPFrequencyOfANewUserInHours = 24;
+const double kShowFrequencyOfANewUserInHours = 36;
+const double kUseFrequencyOfANewUserInHours = 48;
+
const char kHistogramAverageHoursToOpenNTP[] =
"NewTabPage.UserClassifier.AverageHoursToOpenNTP";
const char kHistogramAverageHoursToShowSuggestions[] =
@@ -40,26 +49,90 @@ const char kHistogramAverageHoursToShowSuggestions[] =
const char kHistogramAverageHoursToUseSuggestions[] =
"NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
+// Computes the discount rate.
+double GetDiscountRatePerHour() {
+ static double discount_rate_per_hour = 0.0;
+
+ if (discount_rate_per_hour == 0.0) {
+ // Compute discount_rate_per_hour such that
+ // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
+ discount_rate_per_hour =
+ std::log(1.0 / (1.0 - kDiscountFactorPerDay)) / 24.0;
+ }
+
+ return discount_rate_per_hour;
+}
+
+// Returns the new value of the metric using its |old_value|, assuming
+// |hours_since_last_time| hours have passed since it was last recomputed.
+// If |event_now| is true, the event is assumed to have happened right now,
+// otherwise no event is assumed to happen within the last
+// |hours_since_last_time| hours.
+double RecomputeMetric(double old_value,
+ double hours_since_last_time,
+ bool event_now) {
+ // Compute and store the new discounted average according to the formula
+ // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
+ return (event_now ? 1 : 0) +
+ std::exp(-GetDiscountRatePerHour() * hours_since_last_time) *
+ old_value;
+}
+
+// Compute the number of hours between two events for the given metric value
+// assuming the events were equally distributed.
+double GetEstimateHoursBetweenEvents(const double metric_value) {
+ // Right after the first update, the metric is equal to 1.
+ if (metric_value <= 1)
+ return kMaxHours;
+
+ // This is the estimate with the assumption that last event happened right
+ // now and the system is in the steady-state. Solve estimate_hours in the
+ // steady-state equation:
+ // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
+ // i.e.
+ // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events),
+ // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)),
+ // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate.
+ double estimate_hours =
+ std::log(metric_value / (metric_value - 1)) / GetDiscountRatePerHour();
+ return std::max(kMinHours, std::min(kMaxHours, estimate_hours));
+}
+
+// The inverse of GetEstimateHoursBetweenEvents().
+double GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours) {
+ // Keep the input value within [kMinHours, kMaxHours].
+ estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours));
+
+ // Return |metric_value| such that GetEstimateHoursBetweenEvents for
+ // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
+ // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
+ // i.e.
+ // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
+ return 1.0 / (1.0 - std::exp(-GetDiscountRatePerHour() * estimate_hours));
+}
+
} // namespace
namespace ntp_snippets {
UserClassifier::UserClassifier(PrefService* pref_service)
- : pref_service_(pref_service),
- // Compute discount_rate_per_hour such that
- // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
- discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {}
+ : pref_service_(pref_service) {}
UserClassifier::~UserClassifier() {}
// static
void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
+ registry->RegisterDoublePref(prefs::kUserClassifierAverageNTPOpenedPerHour,
+ GetMetricValueForEstimateHoursBetweenEvents(
+ kNTPFrequencyOfANewUserInHours));
registry->RegisterDoublePref(
- prefs::kUserClassifierAverageNTPOpenedPerHour, 1);
+ prefs::kUserClassifierAverageSuggestionsShownPerHour,
+ GetMetricValueForEstimateHoursBetweenEvents(
+ kShowFrequencyOfANewUserInHours));
registry->RegisterDoublePref(
- prefs::kUserClassifierAverageSuggestionsShownPerHour, 1);
- registry->RegisterDoublePref(
- prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1);
+ prefs::kUserClassifierAverageSuggestionsUsedPerHour,
+ GetMetricValueForEstimateHoursBetweenEvents(
+ kUseFrequencyOfANewUserInHours));
registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0);
registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions,
@@ -69,78 +142,136 @@ void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
}
void UserClassifier::OnNTPOpened() {
- UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour,
- prefs::kUserClassifierLastTimeToOpenNTP);
+ double metric =
+ UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour,
+ prefs::kUserClassifierLastTimeToOpenNTP);
- double avg = GetEstimateHoursBetweenEvents(
- prefs::kUserClassifierAverageNTPOpenedPerHour);
+ double avg = GetEstimateHoursBetweenEvents(metric);
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
kMaxHours, 50);
}
void UserClassifier::OnSuggestionsShown() {
- UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour,
- prefs::kUserClassifierLastTimeToShowSuggestions);
+ double metric =
+ UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour,
+ prefs::kUserClassifierLastTimeToShowSuggestions);
- double avg = GetEstimateHoursBetweenEvents(
- prefs::kUserClassifierAverageSuggestionsShownPerHour);
+ double avg = GetEstimateHoursBetweenEvents(metric);
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1,
kMaxHours, 50);
}
void UserClassifier::OnSuggestionsUsed() {
- UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour,
- prefs::kUserClassifierLastTimeToUseSuggestions);
+ double metric =
+ UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour,
+ prefs::kUserClassifierLastTimeToUseSuggestions);
- double avg = GetEstimateHoursBetweenEvents(
- prefs::kUserClassifierAverageSuggestionsUsedPerHour);
+ double avg = GetEstimateHoursBetweenEvents(metric);
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1,
kMaxHours, 50);
}
-void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
- const char* last_time_pref_name) {
+double UserClassifier::GetEstimatedAvgTimeToOpenNTP() const {
+ double metric =
+ GetUpToDateMetricValue(prefs::kUserClassifierAverageNTPOpenedPerHour,
+ prefs::kUserClassifierLastTimeToOpenNTP);
+ return GetEstimateHoursBetweenEvents(metric);
+}
+
+double UserClassifier::GetEstimatedAvgTimeToShowSuggestions() const {
+ double metric = GetUpToDateMetricValue(
+ prefs::kUserClassifierAverageSuggestionsShownPerHour,
+ prefs::kUserClassifierLastTimeToShowSuggestions);
+ return GetEstimateHoursBetweenEvents(metric);
+}
+
+double UserClassifier::GetEstimatedAvgTimeToUseSuggestions() const {
+ double metric = GetUpToDateMetricValue(
+ prefs::kUserClassifierAverageSuggestionsUsedPerHour,
+ prefs::kUserClassifierLastTimeToUseSuggestions);
+ return GetEstimateHoursBetweenEvents(metric);
+}
+
+UserClassifier::UserClass UserClassifier::GetUserClass() const {
+ if (GetEstimatedAvgTimeToOpenNTP() >=
+ kOccasionalUserOpensNTPAtMostOncePerHours) {
+ return UserClass::OCCASIONAL_NTP_USER;
+ }
+
+ if (GetEstimatedAvgTimeToUseSuggestions() <=
+ kFrequentUserScrollsAtLeastOncePerHours) {
+ return UserClass::FREQUENT_NTP_USER;
+ }
+
+ return UserClass::NORMAL_NTP_USER;
+}
+
+std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
+ switch (GetUserClass()) {
+ case UserClass::OCCASIONAL_NTP_USER:
+ return "Occasional user of the NTP";
+ case UserClass::NORMAL_NTP_USER:
+ return "Normal user of the NTP";
+ case UserClass::FREQUENT_NTP_USER:
+ return "Frequent user of the NTP";
+ }
+ NOTREACHED();
+ return "Unknown user class";
+}
+
+void UserClassifier::ClearClassificationForDebugging() {
+ pref_service_->ClearPref(prefs::kUserClassifierAverageNTPOpenedPerHour);
+ pref_service_->ClearPref(
+ prefs::kUserClassifierAverageSuggestionsShownPerHour);
+ pref_service_->ClearPref(prefs::kUserClassifierAverageSuggestionsUsedPerHour);
+
+ pref_service_->ClearPref(prefs::kUserClassifierLastTimeToOpenNTP);
+ pref_service_->ClearPref(prefs::kUserClassifierLastTimeToShowSuggestions);
+ pref_service_->ClearPref(prefs::kUserClassifierLastTimeToUseSuggestions);
+}
+
+double UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
+ const char* last_time_pref_name) {
+ // The pref_service_ can be null in tests.
if (!pref_service_)
- return;
+ return 0;
double hours_since_last_time =
std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
+ // If the "last time" is not defined, set it.
+ if (!hours_since_last_time)
Marc Treib 2016/09/20 10:27:02 This will check for zero - is that what you want?
jkrcal 2016/09/20 13:10:13 I agree, this a bit obscure, to say the least :) N
+ SetLastTimeToNow(last_time_pref_name);
// Ignore events within the same "browsing session".
if (hours_since_last_time < kMinHours)
- return;
+ return GetUpToDateMetricValue(metric_pref_name, last_time_pref_name);
+
SetLastTimeToNow(last_time_pref_name);
double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
- // Compute and store the new discounted average according to the formula
- // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
double new_avg_events_per_hour =
- 1 +
- std::exp(discount_rate_per_hour_ * hours_since_last_time) *
- avg_events_per_hour;
+ RecomputeMetric(avg_events_per_hour, hours_since_last_time, true);
pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour);
+ return new_avg_events_per_hour;
}
-double UserClassifier::GetEstimateHoursBetweenEvents(
- const char* metric_pref_name) {
- double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
+double UserClassifier::GetUpToDateMetricValue(
+ const char* metric_pref_name,
+ const char* last_time_pref_name) const {
+ // The pref_service_ can be null in tests.
+ if (!pref_service_)
+ return 0;
- // Right after the first update, the metric is equal to 1.
- if (avg_events_per_hour <= 1)
- return kMaxHours;
+ double hours_since_last_time =
+ std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
- // This is the estimate with the assumption that last event happened right
- // now and the system is in the steady-state. Solve estimate_hours in the
- // steady-state equation:
- // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events.
- return std::min(kMaxHours,
- std::log(avg_events_per_hour / (avg_events_per_hour - 1)) /
- discount_rate_per_hour_);
+ double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
+ return RecomputeMetric(avg_events_per_hour, hours_since_last_time, true);
}
double UserClassifier::GetHoursSinceLastTime(
- const char* last_time_pref_name) {
+ const char* last_time_pref_name) const {
if (!pref_service_->HasPrefPath(last_time_pref_name))
- return DBL_MAX;
+ return 0;
base::TimeDelta since_last_time =
base::Time::Now() - base::Time::FromInternalValue(
« components/ntp_snippets/user_classifier.h ('K') | « components/ntp_snippets/user_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698