Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(954)

Unified Diff: components/ntp_snippets/user_classifier.cc

Issue 2346263002: Extending the UserClassifier to actually support classification. (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/ntp_snippets/user_classifier.cc
diff --git a/components/ntp_snippets/user_classifier.cc b/components/ntp_snippets/user_classifier.cc
index ef179b320a6bcba3e06b1a5530f06a9810dd557e..72d06095ca753fb1e7772bc2f420b6d1b1fbbc15 100644
--- a/components/ntp_snippets/user_classifier.cc
+++ b/components/ntp_snippets/user_classifier.cc
@@ -33,6 +33,15 @@ const double kMaxHours = 7 * 24;
// do not count again).
const double kMinHours = 0.5;
+// Classification constants.
+const double kFrequentUserScrollsAtLeastOncePerHours = 24;
+const double kOccasionalUserOpensNTPAtMostOncePerHours = 72;
+
+// Default frequency values for new users.
Marc Treib 2016/09/19 15:20:16 Again, intervals, not frequencies
jkrcal 2016/09/19 18:45:25 Done.
+const double kNTPFrequencyOfANewUserInHours = 24;
+const double kShowFrequencyOfANewUserInHours = 36;
+const double kUseFrequencyOfANewUserInHours = 48;
+
const char kHistogramAverageHoursToOpenNTP[] =
"NewTabPage.UserClassifier.AverageHoursToOpenNTP";
const char kHistogramAverageHoursToShowSuggestions[] =
@@ -40,26 +49,89 @@ const char kHistogramAverageHoursToShowSuggestions[] =
const char kHistogramAverageHoursToUseSuggestions[] =
"NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
+// Computes the discount rate.
+double GetDiscountRatePerHour() {
+ static double discount_rate_per_hour = 0;
+
+ if (discount_rate_per_hour == 0) {
+ // Compute discount_rate_per_hour such that
+ // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
+ discount_rate_per_hour = (std::log(1 / (1 - kDiscountFactorPerDay)) / 24);
Marc Treib 2016/09/19 15:20:16 nit: remove the extra set of parens. nit2: Can you
jkrcal 2016/09/19 18:45:25 Done the nits. In a next CL, I want to override i
Marc Treib 2016/09/20 10:27:02 And you want the variation param to specify the pe
jkrcal 2016/09/20 13:10:13 I think the per-day value is better because it is
+ }
+
+ return discount_rate_per_hour;
+}
+
+// Returns the new value of the metric using its |old_value|, assuming
Marc Treib 2016/09/19 15:20:15 What's the unit of the metric? What does the retur
jkrcal 2016/09/19 18:45:25 Hmm :) Nothing intuitive. I have added a comment i
Marc Treib 2016/09/20 10:27:02 Yup, that helps, thanks!
+// |hours_since_last_time| hours have passed since it was last recomputed.
+// If |event_now| is true, the event is assumed to have happened right now,
+// otherwise no event is assumed to happen within the last
+// |hours_since_last_time| hours.
+double RecomputeMetric(double old_value,
+ double hours_since_last_time,
+ bool event_now) {
+ // Compute and store the new discounted average according to the formula
+ // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
+ return (event_now ? 1 : 0) +
+ std::exp(-GetDiscountRatePerHour() * hours_since_last_time) *
+ old_value;
+}
+
+// Compute the number of hours between two events for the given metric value
+// assuming the events were equally distributed.
+double GetEstimateHoursBetweenEvents(const double metric_value) {
+ // Right after the first update, the metric is equal to 1.
+ if (metric_value <= 1)
+ return kMaxHours;
+
+ // This is the estimate with the assumption that last event happened right
+ // now and the system is in the steady-state. Solve estimate_hours in the
+ // steady-state equation:
+ // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
+ // i.e.
+ // -discount_rate * estimate_hours = log((avg_events - 1) / avg_events),
+ // discount_rate * estimate_hours = log(avg_events / (avg_events - 1)),
+ // estimate_hours = log(avg_events / (avg_events - 1)) / discount_rate.
+ double estimate_hours =
+ std::log(metric_value / (metric_value - 1)) / GetDiscountRatePerHour();
+ return std::max(kMinHours, std::min(kMaxHours, estimate_hours));
+}
+
+// The inverse of GetEstimateHoursBetweenEvents().
+double GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours) {
+ // Keep the input value within [kMinHours, kMaxHours].
+ estimate_hours = std::max(kMinHours, std::min(kMaxHours, estimate_hours));
+
+ // Return |metric_value| such that GetEstimateHoursBetweenEvents for
+ // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
+ // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
+ // i.e.
+ // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
+ return 1.0 / (1.0 - std::exp(-GetDiscountRatePerHour() * estimate_hours));
+}
+
} // namespace
namespace ntp_snippets {
UserClassifier::UserClassifier(PrefService* pref_service)
- : pref_service_(pref_service),
- // Compute discount_rate_per_hour such that
- // kDiscountFactorPerDay = 1 - e^{-discount_rate_per_hour * 24}.
- discount_rate_per_hour_(std::log(1 / (1 - kDiscountFactorPerDay)) / 24) {}
+ : pref_service_(pref_service) {}
UserClassifier::~UserClassifier() {}
// static
void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
+ registry->RegisterDoublePref(prefs::kUserClassifierAverageNTPOpenedPerHour,
+ GetMetricValueForEstimateHoursBetweenEvents(
Marc Treib 2016/09/19 15:20:16 Hm, you're changing the defaults of existing prefs
jkrcal 2016/09/19 18:45:25 I think the default values are not stored anywhere
Marc Treib 2016/09/20 10:27:02 Yes, I mostly meant that users who used M54 briefl
jkrcal 2016/09/20 13:10:13 Not a big deal, IMO. The initial value has after a
+ kNTPFrequencyOfANewUserInHours));
registry->RegisterDoublePref(
- prefs::kUserClassifierAverageNTPOpenedPerHour, 1);
+ prefs::kUserClassifierAverageSuggestionsShownPerHour,
+ GetMetricValueForEstimateHoursBetweenEvents(
+ kShowFrequencyOfANewUserInHours));
registry->RegisterDoublePref(
- prefs::kUserClassifierAverageSuggestionsShownPerHour, 1);
- registry->RegisterDoublePref(
- prefs::kUserClassifierAverageSuggestionsUsedPerHour, 1);
+ prefs::kUserClassifierAverageSuggestionsUsedPerHour,
+ GetMetricValueForEstimateHoursBetweenEvents(
+ kUseFrequencyOfANewUserInHours));
registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToOpenNTP, 0);
registry->RegisterInt64Pref(prefs::kUserClassifierLastTimeToShowSuggestions,
@@ -69,78 +141,131 @@ void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
}
void UserClassifier::OnNTPOpened() {
- UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour,
- prefs::kUserClassifierLastTimeToOpenNTP);
+ double metric =
+ UpdateMetricOnEvent(prefs::kUserClassifierAverageNTPOpenedPerHour,
+ prefs::kUserClassifierLastTimeToOpenNTP);
- double avg = GetEstimateHoursBetweenEvents(
- prefs::kUserClassifierAverageNTPOpenedPerHour);
+ double avg = GetEstimateHoursBetweenEvents(metric);
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
kMaxHours, 50);
}
void UserClassifier::OnSuggestionsShown() {
- UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour,
- prefs::kUserClassifierLastTimeToShowSuggestions);
+ double metric =
+ UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsShownPerHour,
+ prefs::kUserClassifierLastTimeToShowSuggestions);
- double avg = GetEstimateHoursBetweenEvents(
- prefs::kUserClassifierAverageSuggestionsShownPerHour);
+ double avg = GetEstimateHoursBetweenEvents(metric);
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg, 1,
kMaxHours, 50);
}
void UserClassifier::OnSuggestionsUsed() {
- UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour,
- prefs::kUserClassifierLastTimeToUseSuggestions);
+ double metric =
+ UpdateMetricOnEvent(prefs::kUserClassifierAverageSuggestionsUsedPerHour,
+ prefs::kUserClassifierLastTimeToUseSuggestions);
- double avg = GetEstimateHoursBetweenEvents(
- prefs::kUserClassifierAverageSuggestionsUsedPerHour);
+ double avg = GetEstimateHoursBetweenEvents(metric);
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg, 1,
kMaxHours, 50);
}
-void UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
- const char* last_time_pref_name) {
+double UserClassifier::GetEstimatedAvgTimeToOpenNTP() {
+ double metric =
+ GetUpToDateMetricValue(prefs::kUserClassifierAverageNTPOpenedPerHour,
+ prefs::kUserClassifierLastTimeToOpenNTP);
+ return GetEstimateHoursBetweenEvents(metric);
+}
+
+double UserClassifier::GetEstimatedAvgTimeToShowSuggestions() {
+ double metric = GetUpToDateMetricValue(
+ prefs::kUserClassifierAverageSuggestionsShownPerHour,
+ prefs::kUserClassifierLastTimeToShowSuggestions);
+
Marc Treib 2016/09/19 15:20:16 nit: extra empty line (the other similar methods d
jkrcal 2016/09/19 18:45:25 Done.
+ return GetEstimateHoursBetweenEvents(metric);
+}
+
+double UserClassifier::GetEstimatedAvgTimeToUseSuggestions() {
+ double metric = GetUpToDateMetricValue(
+ prefs::kUserClassifierAverageSuggestionsUsedPerHour,
+ prefs::kUserClassifierLastTimeToUseSuggestions);
+ return GetEstimateHoursBetweenEvents(metric);
+}
+
+UserClassifier::UserClass UserClassifier::GetUserClass() {
+ if (GetEstimatedAvgTimeToOpenNTP() >=
+ kOccasionalUserOpensNTPAtMostOncePerHours)
+ return UserClass::OCCASIONAL_NTP_USER;
Marc Treib 2016/09/19 15:20:16 nit: Braces please
jkrcal 2016/09/19 18:45:25 Done.
+
+ if (GetEstimatedAvgTimeToUseSuggestions() <=
+ kFrequentUserScrollsAtLeastOncePerHours)
+ return UserClass::FREQUENT_NTP_USER;
Marc Treib 2016/09/19 15:20:16 Also here
jkrcal 2016/09/19 18:45:25 Done.
+
+ return UserClass::NORMAL_NTP_USER;
+}
+
+std::string UserClassifier::GetUserClassDescription() {
+ switch (GetUserClass()) {
+ case UserClass::OCCASIONAL_NTP_USER:
+ return "Occasional user of the NTP";
+ case UserClass::NORMAL_NTP_USER:
+ return "Normal user of the NTP";
+ case UserClass::FREQUENT_NTP_USER:
+ return "Frequent user of content suggestions";
Marc Treib 2016/09/19 15:20:16 This string is inconsistent with the others.
jkrcal 2016/09/19 18:45:25 Done.
+ }
+ NOTREACHED();
+ return "Unknown user class";
+}
+
+void UserClassifier::ClearClassificationForTesting() {
+ pref_service_->ClearPref(prefs::kUserClassifierAverageNTPOpenedPerHour);
+ pref_service_->ClearPref(
+ prefs::kUserClassifierAverageSuggestionsShownPerHour);
+ pref_service_->ClearPref(prefs::kUserClassifierAverageSuggestionsUsedPerHour);
+
+ pref_service_->ClearPref(prefs::kUserClassifierLastTimeToOpenNTP);
+ pref_service_->ClearPref(prefs::kUserClassifierLastTimeToShowSuggestions);
+ pref_service_->ClearPref(prefs::kUserClassifierLastTimeToUseSuggestions);
+}
+
+double UserClassifier::UpdateMetricOnEvent(const char* metric_pref_name,
+ const char* last_time_pref_name) {
if (!pref_service_)
Marc Treib 2016/09/19 15:20:16 Pre-existing, but: Can this ever happen? If so, pl
jkrcal 2016/09/19 18:45:25 Done.
- return;
+ return 0;
double hours_since_last_time =
std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
// Ignore events within the same "browsing session".
if (hours_since_last_time < kMinHours)
- return;
+ return GetUpToDateMetricValue(metric_pref_name, last_time_pref_name);
+
SetLastTimeToNow(last_time_pref_name);
double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
- // Compute and store the new discounted average according to the formula
- // avg_events := 1 + e^{-discount_rate_per_hour * hours_since} * avg_events.
double new_avg_events_per_hour =
- 1 +
- std::exp(discount_rate_per_hour_ * hours_since_last_time) *
- avg_events_per_hour;
+ RecomputeMetric(avg_events_per_hour, hours_since_last_time, true);
pref_service_->SetDouble(metric_pref_name, new_avg_events_per_hour);
+ return new_avg_events_per_hour;
}
-double UserClassifier::GetEstimateHoursBetweenEvents(
- const char* metric_pref_name) {
- double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
+double UserClassifier::GetUpToDateMetricValue(const char* metric_pref_name,
+ const char* last_time_pref_name) {
+ if (!pref_service_)
+ return 0;
- // Right after the first update, the metric is equal to 1.
- if (avg_events_per_hour <= 1)
- return kMaxHours;
+ double hours_since_last_time =
+ std::min(kMaxHours, GetHoursSinceLastTime(last_time_pref_name));
- // This is the estimate with the assumption that last event happened right
- // now and the system is in the steady-state. Solve estimate_hours in the
- // steady-state equation:
- // avg_events = 1 + e^{-discount_rate * estimate_hours} * avg_events.
- return std::min(kMaxHours,
- std::log(avg_events_per_hour / (avg_events_per_hour - 1)) /
- discount_rate_per_hour_);
+ double avg_events_per_hour = pref_service_->GetDouble(metric_pref_name);
+ return RecomputeMetric(avg_events_per_hour, hours_since_last_time, true);
}
double UserClassifier::GetHoursSinceLastTime(
const char* last_time_pref_name) {
- if (!pref_service_->HasPrefPath(last_time_pref_name))
- return DBL_MAX;
+ if (!pref_service_->HasPrefPath(last_time_pref_name)) {
+ SetLastTimeToNow(last_time_pref_name);
Marc Treib 2016/09/20 10:27:02 Was the reason for moving this out only so you can
jkrcal 2016/09/20 13:10:13 Mostly. I also think that it is clearer if a "Get"
+ return 0;
+ }
base::TimeDelta since_last_time =
base::Time::Now() - base::Time::FromInternalValue(
« components/ntp_snippets/user_classifier.h ('K') | « components/ntp_snippets/user_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698