Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(277)

Side by Side Diff: chrome/browser/android/preferences/important_sites_util.cc

Issue 2393103002: [Durable] Updated Durable heuristic to use 'important sites' (Closed)
Patch Set: windows test fix Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/android/preferences/important_sites_util.h"
6
7 #include <algorithm>
8 #include <map>
9 #include <memory>
10 #include <set>
11 #include <utility>
12
13 #include "base/containers/hash_tables.h"
14 #include "base/memory/ptr_util.h"
15 #include "base/metrics/histogram_macros.h"
16 #include "base/stl_util.h"
17 #include "base/time/time.h"
18 #include "base/values.h"
19 #include "chrome/browser/banners/app_banner_settings_helper.h"
20 #include "chrome/browser/bookmarks/bookmark_model_factory.h"
21 #include "chrome/browser/content_settings/host_content_settings_map_factory.h"
22 #include "chrome/browser/engagement/site_engagement_score.h"
23 #include "chrome/browser/engagement/site_engagement_service.h"
24 #include "chrome/browser/profiles/profile.h"
25 #include "components/bookmarks/browser/bookmark_model.h"
26 #include "components/content_settings/core/browser/host_content_settings_map.h"
27 #include "components/content_settings/core/common/content_settings.h"
28 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
29 #include "url/gurl.h"
30
31 namespace {
32 using bookmarks::BookmarkModel;
33 using ImportantDomainInfo = ImportantSitesUtil::ImportantDomainInfo;
34
35 static const char kNumTimesIgnoredName[] = "NumTimesIgnored";
36 static const int kTimesIgnoredForBlacklist = 3;
37
38 // These are the maximum # of bookmarks we can use as signals. If the user has
39 // <= kMaxBookmarks, then we just use those bookmarks. Otherwise we filter all
40 // bookmarks on site engagement > 0, sort, and trim to kMaxBookmarks.
41 static const int kMaxBookmarks = 5;
42
43 // Do not change the values here, as they are used for UMA histograms.
44 enum ImportantReason {
45 ENGAGEMENT = 0,
46 DURABLE = 1,
47 BOOKMARKS = 2,
48 HOME_SCREEN = 3,
49 NOTIFICATIONS = 4,
50 REASON_BOUNDARY
51 };
52
53 // We need this to be a macro, as the histogram macros cache their pointers
54 // after the first call, so when we change the uma name we check fail if we're
55 // just a method.
56 #define RECORD_UMA_FOR_IMPORTANT_REASON(uma_name, uma_count_name, \
57 reason_bitfield) \
58 do { \
59 int count = 0; \
60 int32_t bitfield = (reason_bitfield); \
61 for (int i = 0; i < ImportantReason::REASON_BOUNDARY; i++) { \
62 if ((bitfield >> i) & 1) { \
63 count++; \
64 UMA_HISTOGRAM_ENUMERATION((uma_name), i, \
65 ImportantReason::REASON_BOUNDARY); \
66 } \
67 } \
68 UMA_HISTOGRAM_ENUMERATION((uma_count_name), count, \
69 ImportantReason::REASON_BOUNDARY); \
70 } while (0)
71
72 // Do not change the values here, as they are used for UMA histograms and
73 // testing in important_sites_util_unittest.
74 enum CrossedReason {
75 CROSSED_DURABLE = 0,
76 CROSSED_NOTIFICATIONS = 1,
77 CROSSED_ENGAGEMENT = 2,
78 CROSSED_NOTIFICATIONS_AND_ENGAGEMENT = 3,
79 CROSSED_DURABLE_AND_ENGAGEMENT = 4,
80 CROSSED_NOTIFICATIONS_AND_DURABLE = 5,
81 CROSSED_NOTIFICATIONS_AND_DURABLE_AND_ENGAGEMENT = 6,
82 CROSSED_REASON_UNKNOWN = 7,
83 CROSSED_REASON_BOUNDARY
84 };
85
86 CrossedReason GetCrossedReasonFromBitfield(int32_t reason_bitfield) {
87 bool durable = reason_bitfield & (1 << ImportantReason::DURABLE);
88 bool notifications = reason_bitfield & (1 << ImportantReason::NOTIFICATIONS);
89 bool engagement = reason_bitfield & (1 << ImportantReason::ENGAGEMENT);
90 if (durable && notifications && engagement)
91 return CROSSED_NOTIFICATIONS_AND_DURABLE_AND_ENGAGEMENT;
92 else if (notifications && durable)
93 return CROSSED_NOTIFICATIONS_AND_DURABLE;
94 else if (notifications && engagement)
95 return CROSSED_NOTIFICATIONS_AND_ENGAGEMENT;
96 else if (durable && engagement)
97 return CROSSED_DURABLE_AND_ENGAGEMENT;
98 else if (notifications)
99 return CROSSED_NOTIFICATIONS;
100 else if (durable)
101 return CROSSED_DURABLE;
102 else if (engagement)
103 return CROSSED_ENGAGEMENT;
104 return CROSSED_REASON_UNKNOWN;
105 }
106
107 std::string GetRegisterableDomainOrIP(const GURL& url) {
108 std::string registerable_domain =
109 net::registry_controlled_domains::GetDomainAndRegistry(
110 url, net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
111 if (registerable_domain.empty() && url.HostIsIPAddress())
112 registerable_domain = url.host();
113 return registerable_domain;
114 }
115
116 void MaybePopulateImportantInfoForReason(
117 const GURL& origin,
118 std::set<GURL>* visited_origins,
119 ImportantReason reason,
120 base::hash_map<std::string, ImportantDomainInfo>* output) {
121 if (!origin.is_valid() || !visited_origins->insert(origin).second)
122 return;
123 std::string registerable_domain = GetRegisterableDomainOrIP(origin);
124 ImportantDomainInfo& info = (*output)[registerable_domain];
125 info.reason_bitfield |= 1 << reason;
126 if (info.example_origin.is_empty()) {
127 info.registerable_domain = registerable_domain;
128 info.example_origin = origin;
129 }
130 }
131
132 // Returns the score associated with the given reason. The order of
133 // ImportantReason does not need to correspond to the score order. The higher
134 // the score, the more important the reason is.
135 int GetScoreForReason(ImportantReason reason) {
136 switch (reason) {
137 case ImportantReason::ENGAGEMENT:
138 return 1 << 0;
139 case ImportantReason::DURABLE:
140 return 1 << 1;
141 case ImportantReason::BOOKMARKS:
142 return 1 << 2;
143 case ImportantReason::HOME_SCREEN:
144 return 1 << 3;
145 case ImportantReason::NOTIFICATIONS:
146 return 1 << 4;
147 case ImportantReason::REASON_BOUNDARY:
148 return 0;
149 }
150 return 0;
151 }
152
153 int GetScoreForReasonsBitfield(int32_t reason_bitfield) {
154 int score = 0;
155 for (int i = 0; i < ImportantReason::REASON_BOUNDARY; i++) {
156 if ((reason_bitfield >> i) & 1) {
157 score += GetScoreForReason(static_cast<ImportantReason>(i));
158 }
159 }
160 return score;
161 }
162
163 // Returns if |a| has a higher score than |b|, so that when we sort the higher
164 // score is first.
165 bool CompareDescendingImportantInfo(
166 const std::pair<std::string, ImportantDomainInfo>& a,
167 const std::pair<std::string, ImportantDomainInfo>& b) {
168 int score_a = GetScoreForReasonsBitfield(a.second.reason_bitfield);
169 int score_b = GetScoreForReasonsBitfield(b.second.reason_bitfield);
170 int bitfield_diff = score_a - score_b;
171 if (bitfield_diff != 0)
172 return bitfield_diff > 0;
173 return a.second.engagement_score > b.second.engagement_score;
174 }
175
176 base::hash_set<std::string> GetBlacklistedImportantDomains(Profile* profile) {
177 ContentSettingsForOneType content_settings_list;
178 HostContentSettingsMap* map =
179 HostContentSettingsMapFactory::GetForProfile(profile);
180 map->GetSettingsForOneType(CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO,
181 content_settings::ResourceIdentifier(),
182 &content_settings_list);
183 base::hash_set<std::string> ignoring_domains;
184 for (const ContentSettingPatternSource& site : content_settings_list) {
185 GURL origin(site.primary_pattern.ToString());
186 if (!origin.is_valid() ||
187 base::ContainsKey(ignoring_domains, origin.host())) {
188 continue;
189 }
190
191 std::unique_ptr<base::DictionaryValue> dict =
192 base::DictionaryValue::From(map->GetWebsiteSetting(
193 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "",
194 nullptr));
195
196 if (!dict)
197 continue;
198
199 int times_ignored = 0;
200 if (!dict->GetInteger(kNumTimesIgnoredName, &times_ignored) ||
201 times_ignored < kTimesIgnoredForBlacklist) {
202 continue;
203 }
204
205 ignoring_domains.insert(origin.host());
206 }
207 return ignoring_domains;
208 }
209
210 void PopulateInfoMapWithSiteEngagement(
211 Profile* profile,
212 SiteEngagementService::EngagementLevel minimum_engagement,
213 std::map<GURL, double>* engagement_map,
214 base::hash_map<std::string, ImportantDomainInfo>* output) {
215 SiteEngagementService* service = SiteEngagementService::Get(profile);
216 *engagement_map = service->GetScoreMap();
217 // We can have multiple origins for a single domain, so we record the one
218 // with the highest engagement score.
219 for (const auto& url_engagement_pair : *engagement_map) {
220 if (!service->IsEngagementAtLeast(url_engagement_pair.first,
221 minimum_engagement)) {
222 continue;
223 }
224 std::string registerable_domain =
225 GetRegisterableDomainOrIP(url_engagement_pair.first);
226 ImportantDomainInfo& info = (*output)[registerable_domain];
227 if (url_engagement_pair.second > info.engagement_score) {
228 info.registerable_domain = registerable_domain;
229 info.engagement_score = url_engagement_pair.second;
230 info.example_origin = url_engagement_pair.first;
231 info.reason_bitfield |= 1 << ImportantReason::ENGAGEMENT;
232 }
233 }
234 }
235
236 void PopulateInfoMapWithContentTypeAllowed(
237 Profile* profile,
238 ContentSettingsType content_type,
239 ImportantReason reason,
240 base::hash_map<std::string, ImportantDomainInfo>* output) {
241 // Grab our content settings list.
242 ContentSettingsForOneType content_settings_list;
243 HostContentSettingsMapFactory::GetForProfile(profile)->GetSettingsForOneType(
244 content_type, content_settings::ResourceIdentifier(),
245 &content_settings_list);
246 // Extract a set of urls, using the primary pattern. We don't handle
247 // wildcard patterns.
248 std::set<GURL> content_origins;
249 for (const ContentSettingPatternSource& site : content_settings_list) {
250 if (site.setting != CONTENT_SETTING_ALLOW)
251 continue;
252 MaybePopulateImportantInfoForReason(GURL(site.primary_pattern.ToString()),
253 &content_origins, reason, output);
254 }
255 }
256
257 void PopulateInfoMapWithBookmarks(
258 Profile* profile,
259 const std::map<GURL, double>& engagement_map,
260 base::hash_map<std::string, ImportantDomainInfo>* output) {
261 SiteEngagementService* service = SiteEngagementService::Get(profile);
262 BookmarkModel* model =
263 BookmarkModelFactory::GetForBrowserContextIfExists(profile);
264 if (!model)
265 return;
266 std::vector<BookmarkModel::URLAndTitle> untrimmed_bookmarks;
267 model->GetBookmarks(&untrimmed_bookmarks);
268
269 // Process the bookmarks and optionally trim them if we have too many.
270 std::vector<BookmarkModel::URLAndTitle> result_bookmarks;
271 if (untrimmed_bookmarks.size() > kMaxBookmarks) {
272 std::copy_if(untrimmed_bookmarks.begin(), untrimmed_bookmarks.end(),
273 std::back_inserter(result_bookmarks),
274 [service](const BookmarkModel::URLAndTitle& entry) {
275 return service->IsEngagementAtLeast(
276 entry.url.GetOrigin(),
277 SiteEngagementService::ENGAGEMENT_LEVEL_LOW);
278 });
279 std::sort(result_bookmarks.begin(), result_bookmarks.end(),
280 [&engagement_map](const BookmarkModel::URLAndTitle& a,
281 const BookmarkModel::URLAndTitle& b) {
282 double a_score = engagement_map.at(a.url.GetOrigin());
283 double b_score = engagement_map.at(b.url.GetOrigin());
284 return a_score > b_score;
285 });
286 if (result_bookmarks.size() > kMaxBookmarks)
287 result_bookmarks.resize(kMaxBookmarks);
288 } else {
289 result_bookmarks = std::move(untrimmed_bookmarks);
290 }
291
292 std::set<GURL> content_origins;
293 for (const BookmarkModel::URLAndTitle& bookmark : result_bookmarks) {
294 MaybePopulateImportantInfoForReason(bookmark.url, &content_origins,
295 ImportantReason::BOOKMARKS, output);
296 }
297 }
298
299 void PopulateInfoMapWithHomeScreen(
300 Profile* profile,
301 base::hash_map<std::string, ImportantDomainInfo>* output) {
302 ContentSettingsForOneType content_settings_list;
303 HostContentSettingsMapFactory::GetForProfile(profile)->GetSettingsForOneType(
304 CONTENT_SETTINGS_TYPE_APP_BANNER, content_settings::ResourceIdentifier(),
305 &content_settings_list);
306 // Extract a set of urls, using the primary pattern. We don't handle
307 // wildcard patterns.
308 std::set<GURL> content_origins;
309 base::Time now = base::Time::Now();
310 for (const ContentSettingPatternSource& site : content_settings_list) {
311 GURL origin(site.primary_pattern.ToString());
312 if (!AppBannerSettingsHelper::WasLaunchedRecently(profile, origin, now))
313 continue;
314 MaybePopulateImportantInfoForReason(origin, &content_origins,
315 ImportantReason::HOME_SCREEN, output);
316 }
317 }
318
319 } // namespace
320
321 std::vector<ImportantDomainInfo>
322 ImportantSitesUtil::GetImportantRegisterableDomains(Profile* profile,
323 size_t max_results) {
324 base::hash_map<std::string, ImportantDomainInfo> important_info;
325 std::map<GURL, double> engagement_map;
326
327 PopulateInfoMapWithSiteEngagement(
328 profile, SiteEngagementService::ENGAGEMENT_LEVEL_MEDIUM, &engagement_map,
329 &important_info);
330
331 PopulateInfoMapWithContentTypeAllowed(
332 profile, CONTENT_SETTINGS_TYPE_NOTIFICATIONS,
333 ImportantReason::NOTIFICATIONS, &important_info);
334
335 PopulateInfoMapWithContentTypeAllowed(
336 profile, CONTENT_SETTINGS_TYPE_DURABLE_STORAGE, ImportantReason::DURABLE,
337 &important_info);
338
339 PopulateInfoMapWithBookmarks(profile, engagement_map, &important_info);
340
341 PopulateInfoMapWithHomeScreen(profile, &important_info);
342
343 base::hash_set<std::string> blacklisted_domains =
344 GetBlacklistedImportantDomains(profile);
345
346 std::vector<std::pair<std::string, ImportantDomainInfo>> items(
347 important_info.begin(), important_info.end());
348 std::sort(items.begin(), items.end(), &CompareDescendingImportantInfo);
349
350 std::vector<ImportantDomainInfo> final_list;
351 for (std::pair<std::string, ImportantDomainInfo>& domain_info : items) {
352 if (final_list.size() >= max_results)
353 return final_list;
354 if (blacklisted_domains.find(domain_info.first) !=
355 blacklisted_domains.end()) {
356 continue;
357 }
358 final_list.push_back(domain_info.second);
359 RECORD_UMA_FOR_IMPORTANT_REASON(
360 "Storage.ImportantSites.GeneratedReason",
361 "Storage.ImportantSites.GeneratedReasonCount",
362 domain_info.second.reason_bitfield);
363 }
364
365 return final_list;
366 }
367
368 void ImportantSitesUtil::RecordBlacklistedAndIgnoredImportantSites(
369 Profile* profile,
370 const std::vector<std::string>& blacklisted_sites,
371 const std::vector<int32_t>& blacklisted_sites_reason_bitfield,
372 const std::vector<std::string>& ignored_sites,
373 const std::vector<int32_t>& ignored_sites_reason_bitfield) {
374 // First, record the metrics for blacklisted and ignored sites.
375 for (int32_t reason_bitfield : blacklisted_sites_reason_bitfield) {
376 RECORD_UMA_FOR_IMPORTANT_REASON(
377 "Storage.ImportantSites.CBDChosenReason",
378 "Storage.ImportantSites.CBDChosenReasonCount", reason_bitfield);
379 }
380 for (int32_t reason_bitfield : ignored_sites_reason_bitfield) {
381 RECORD_UMA_FOR_IMPORTANT_REASON(
382 "Storage.ImportantSites.CBDIgnoredReason",
383 "Storage.ImportantSites.CBDIgnoredReasonCount", reason_bitfield);
384 }
385
386 // We use the ignored sites to update our important sites blacklist.
387 HostContentSettingsMap* map =
388 HostContentSettingsMapFactory::GetForProfile(profile);
389 for (const std::string& ignored_site : ignored_sites) {
390 GURL origin("http://" + ignored_site);
391 std::unique_ptr<base::Value> value = map->GetWebsiteSetting(
392 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", nullptr);
393
394 std::unique_ptr<base::DictionaryValue> dict =
395 base::DictionaryValue::From(map->GetWebsiteSetting(
396 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "",
397 nullptr));
398
399 int times_ignored = 0;
400 if (dict)
401 dict->GetInteger(kNumTimesIgnoredName, &times_ignored);
402 else
403 dict = base::MakeUnique<base::DictionaryValue>();
404 dict->SetInteger(kNumTimesIgnoredName, ++times_ignored);
405
406 map->SetWebsiteSettingDefaultScope(
407 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "",
408 std::move(dict));
409 }
410
411 // We clear our blacklist for sites that the user chose.
412 for (const std::string& ignored_site : blacklisted_sites) {
413 GURL origin("http://" + ignored_site);
414 std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue());
415 dict->SetInteger(kNumTimesIgnoredName, 0);
416 map->SetWebsiteSettingDefaultScope(
417 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "",
418 std::move(dict));
419 }
420
421 // Finally, record our old crossed-stats.
422 // Note: we don't plan on adding new metrics here, this is just for the finch
423 // experiment to give us initial data on what signals actually mattered.
424 for (int32_t reason_bitfield : blacklisted_sites_reason_bitfield) {
425 UMA_HISTOGRAM_ENUMERATION("Storage.BlacklistedImportantSites.Reason",
426 GetCrossedReasonFromBitfield(reason_bitfield),
427 CROSSED_REASON_BOUNDARY);
428 }
429 }
430
431 void ImportantSitesUtil::MarkOriginAsImportantForTesting(Profile* profile,
432 const GURL& origin) {
433 // First get data from site engagement.
434 SiteEngagementService* site_engagement_service =
435 SiteEngagementService::Get(profile);
436 site_engagement_service->ResetScoreForURL(
437 origin, SiteEngagementScore::GetMediumEngagementBoundary());
438 DCHECK(site_engagement_service->IsEngagementAtLeast(
439 origin, SiteEngagementService::ENGAGEMENT_LEVEL_MEDIUM));
440 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698