OLD | NEW |
| (Empty) |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/android/preferences/important_sites_util.h" | |
6 | |
7 #include <algorithm> | |
8 #include <map> | |
9 #include <memory> | |
10 #include <set> | |
11 #include <utility> | |
12 | |
13 #include "base/containers/hash_tables.h" | |
14 #include "base/memory/ptr_util.h" | |
15 #include "base/metrics/histogram_macros.h" | |
16 #include "base/stl_util.h" | |
17 #include "base/time/time.h" | |
18 #include "base/values.h" | |
19 #include "chrome/browser/banners/app_banner_settings_helper.h" | |
20 #include "chrome/browser/bookmarks/bookmark_model_factory.h" | |
21 #include "chrome/browser/content_settings/host_content_settings_map_factory.h" | |
22 #include "chrome/browser/engagement/site_engagement_score.h" | |
23 #include "chrome/browser/engagement/site_engagement_service.h" | |
24 #include "chrome/browser/profiles/profile.h" | |
25 #include "components/bookmarks/browser/bookmark_model.h" | |
26 #include "components/content_settings/core/browser/host_content_settings_map.h" | |
27 #include "components/content_settings/core/common/content_settings.h" | |
28 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" | |
29 #include "url/gurl.h" | |
30 | |
31 namespace { | |
32 using bookmarks::BookmarkModel; | |
33 using ImportantDomainInfo = ImportantSitesUtil::ImportantDomainInfo; | |
34 | |
35 static const char kNumTimesIgnoredName[] = "NumTimesIgnored"; | |
36 static const int kTimesIgnoredForBlacklist = 3; | |
37 | |
38 // These are the maximum # of bookmarks we can use as signals. If the user has | |
39 // <= kMaxBookmarks, then we just use those bookmarks. Otherwise we filter all | |
40 // bookmarks on site engagement > 0, sort, and trim to kMaxBookmarks. | |
41 static const int kMaxBookmarks = 5; | |
42 | |
43 // Do not change the values here, as they are used for UMA histograms. | |
44 enum ImportantReason { | |
45 ENGAGEMENT = 0, | |
46 DURABLE = 1, | |
47 BOOKMARKS = 2, | |
48 HOME_SCREEN = 3, | |
49 NOTIFICATIONS = 4, | |
50 REASON_BOUNDARY | |
51 }; | |
52 | |
53 // We need this to be a macro, as the histogram macros cache their pointers | |
54 // after the first call, so when we change the uma name we check fail if we're | |
55 // just a method. | |
56 #define RECORD_UMA_FOR_IMPORTANT_REASON(uma_name, uma_count_name, \ | |
57 reason_bitfield) \ | |
58 do { \ | |
59 int count = 0; \ | |
60 int32_t bitfield = (reason_bitfield); \ | |
61 for (int i = 0; i < ImportantReason::REASON_BOUNDARY; i++) { \ | |
62 if ((bitfield >> i) & 1) { \ | |
63 count++; \ | |
64 UMA_HISTOGRAM_ENUMERATION((uma_name), i, \ | |
65 ImportantReason::REASON_BOUNDARY); \ | |
66 } \ | |
67 } \ | |
68 UMA_HISTOGRAM_ENUMERATION((uma_count_name), count, \ | |
69 ImportantReason::REASON_BOUNDARY); \ | |
70 } while (0) | |
71 | |
72 // Do not change the values here, as they are used for UMA histograms and | |
73 // testing in important_sites_util_unittest. | |
74 enum CrossedReason { | |
75 CROSSED_DURABLE = 0, | |
76 CROSSED_NOTIFICATIONS = 1, | |
77 CROSSED_ENGAGEMENT = 2, | |
78 CROSSED_NOTIFICATIONS_AND_ENGAGEMENT = 3, | |
79 CROSSED_DURABLE_AND_ENGAGEMENT = 4, | |
80 CROSSED_NOTIFICATIONS_AND_DURABLE = 5, | |
81 CROSSED_NOTIFICATIONS_AND_DURABLE_AND_ENGAGEMENT = 6, | |
82 CROSSED_REASON_UNKNOWN = 7, | |
83 CROSSED_REASON_BOUNDARY | |
84 }; | |
85 | |
86 CrossedReason GetCrossedReasonFromBitfield(int32_t reason_bitfield) { | |
87 bool durable = reason_bitfield & (1 << ImportantReason::DURABLE); | |
88 bool notifications = reason_bitfield & (1 << ImportantReason::NOTIFICATIONS); | |
89 bool engagement = reason_bitfield & (1 << ImportantReason::ENGAGEMENT); | |
90 if (durable && notifications && engagement) | |
91 return CROSSED_NOTIFICATIONS_AND_DURABLE_AND_ENGAGEMENT; | |
92 else if (notifications && durable) | |
93 return CROSSED_NOTIFICATIONS_AND_DURABLE; | |
94 else if (notifications && engagement) | |
95 return CROSSED_NOTIFICATIONS_AND_ENGAGEMENT; | |
96 else if (durable && engagement) | |
97 return CROSSED_DURABLE_AND_ENGAGEMENT; | |
98 else if (notifications) | |
99 return CROSSED_NOTIFICATIONS; | |
100 else if (durable) | |
101 return CROSSED_DURABLE; | |
102 else if (engagement) | |
103 return CROSSED_ENGAGEMENT; | |
104 return CROSSED_REASON_UNKNOWN; | |
105 } | |
106 | |
107 std::string GetRegisterableDomainOrIP(const GURL& url) { | |
108 std::string registerable_domain = | |
109 net::registry_controlled_domains::GetDomainAndRegistry( | |
110 url, net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); | |
111 if (registerable_domain.empty() && url.HostIsIPAddress()) | |
112 registerable_domain = url.host(); | |
113 return registerable_domain; | |
114 } | |
115 | |
116 void MaybePopulateImportantInfoForReason( | |
117 const GURL& origin, | |
118 std::set<GURL>* visited_origins, | |
119 ImportantReason reason, | |
120 base::hash_map<std::string, ImportantDomainInfo>* output) { | |
121 if (!origin.is_valid() || !visited_origins->insert(origin).second) | |
122 return; | |
123 std::string registerable_domain = GetRegisterableDomainOrIP(origin); | |
124 ImportantDomainInfo& info = (*output)[registerable_domain]; | |
125 info.reason_bitfield |= 1 << reason; | |
126 if (info.example_origin.is_empty()) { | |
127 info.registerable_domain = registerable_domain; | |
128 info.example_origin = origin; | |
129 } | |
130 } | |
131 | |
132 // Returns the score associated with the given reason. The order of | |
133 // ImportantReason does not need to correspond to the score order. The higher | |
134 // the score, the more important the reason is. | |
135 int GetScoreForReason(ImportantReason reason) { | |
136 switch (reason) { | |
137 case ImportantReason::ENGAGEMENT: | |
138 return 1 << 0; | |
139 case ImportantReason::DURABLE: | |
140 return 1 << 1; | |
141 case ImportantReason::BOOKMARKS: | |
142 return 1 << 2; | |
143 case ImportantReason::HOME_SCREEN: | |
144 return 1 << 3; | |
145 case ImportantReason::NOTIFICATIONS: | |
146 return 1 << 4; | |
147 case ImportantReason::REASON_BOUNDARY: | |
148 return 0; | |
149 } | |
150 return 0; | |
151 } | |
152 | |
153 int GetScoreForReasonsBitfield(int32_t reason_bitfield) { | |
154 int score = 0; | |
155 for (int i = 0; i < ImportantReason::REASON_BOUNDARY; i++) { | |
156 if ((reason_bitfield >> i) & 1) { | |
157 score += GetScoreForReason(static_cast<ImportantReason>(i)); | |
158 } | |
159 } | |
160 return score; | |
161 } | |
162 | |
163 // Returns if |a| has a higher score than |b|, so that when we sort the higher | |
164 // score is first. | |
165 bool CompareDescendingImportantInfo( | |
166 const std::pair<std::string, ImportantDomainInfo>& a, | |
167 const std::pair<std::string, ImportantDomainInfo>& b) { | |
168 int score_a = GetScoreForReasonsBitfield(a.second.reason_bitfield); | |
169 int score_b = GetScoreForReasonsBitfield(b.second.reason_bitfield); | |
170 int bitfield_diff = score_a - score_b; | |
171 if (bitfield_diff != 0) | |
172 return bitfield_diff > 0; | |
173 return a.second.engagement_score > b.second.engagement_score; | |
174 } | |
175 | |
176 base::hash_set<std::string> GetBlacklistedImportantDomains(Profile* profile) { | |
177 ContentSettingsForOneType content_settings_list; | |
178 HostContentSettingsMap* map = | |
179 HostContentSettingsMapFactory::GetForProfile(profile); | |
180 map->GetSettingsForOneType(CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, | |
181 content_settings::ResourceIdentifier(), | |
182 &content_settings_list); | |
183 base::hash_set<std::string> ignoring_domains; | |
184 for (const ContentSettingPatternSource& site : content_settings_list) { | |
185 GURL origin(site.primary_pattern.ToString()); | |
186 if (!origin.is_valid() || | |
187 base::ContainsKey(ignoring_domains, origin.host())) { | |
188 continue; | |
189 } | |
190 | |
191 std::unique_ptr<base::DictionaryValue> dict = | |
192 base::DictionaryValue::From(map->GetWebsiteSetting( | |
193 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", | |
194 nullptr)); | |
195 | |
196 if (!dict) | |
197 continue; | |
198 | |
199 int times_ignored = 0; | |
200 if (!dict->GetInteger(kNumTimesIgnoredName, ×_ignored) || | |
201 times_ignored < kTimesIgnoredForBlacklist) { | |
202 continue; | |
203 } | |
204 | |
205 ignoring_domains.insert(origin.host()); | |
206 } | |
207 return ignoring_domains; | |
208 } | |
209 | |
210 void PopulateInfoMapWithSiteEngagement( | |
211 Profile* profile, | |
212 SiteEngagementService::EngagementLevel minimum_engagement, | |
213 std::map<GURL, double>* engagement_map, | |
214 base::hash_map<std::string, ImportantDomainInfo>* output) { | |
215 SiteEngagementService* service = SiteEngagementService::Get(profile); | |
216 *engagement_map = service->GetScoreMap(); | |
217 // We can have multiple origins for a single domain, so we record the one | |
218 // with the highest engagement score. | |
219 for (const auto& url_engagement_pair : *engagement_map) { | |
220 if (!service->IsEngagementAtLeast(url_engagement_pair.first, | |
221 minimum_engagement)) { | |
222 continue; | |
223 } | |
224 std::string registerable_domain = | |
225 GetRegisterableDomainOrIP(url_engagement_pair.first); | |
226 ImportantDomainInfo& info = (*output)[registerable_domain]; | |
227 if (url_engagement_pair.second > info.engagement_score) { | |
228 info.registerable_domain = registerable_domain; | |
229 info.engagement_score = url_engagement_pair.second; | |
230 info.example_origin = url_engagement_pair.first; | |
231 info.reason_bitfield |= 1 << ImportantReason::ENGAGEMENT; | |
232 } | |
233 } | |
234 } | |
235 | |
236 void PopulateInfoMapWithContentTypeAllowed( | |
237 Profile* profile, | |
238 ContentSettingsType content_type, | |
239 ImportantReason reason, | |
240 base::hash_map<std::string, ImportantDomainInfo>* output) { | |
241 // Grab our content settings list. | |
242 ContentSettingsForOneType content_settings_list; | |
243 HostContentSettingsMapFactory::GetForProfile(profile)->GetSettingsForOneType( | |
244 content_type, content_settings::ResourceIdentifier(), | |
245 &content_settings_list); | |
246 // Extract a set of urls, using the primary pattern. We don't handle | |
247 // wildcard patterns. | |
248 std::set<GURL> content_origins; | |
249 for (const ContentSettingPatternSource& site : content_settings_list) { | |
250 if (site.setting != CONTENT_SETTING_ALLOW) | |
251 continue; | |
252 MaybePopulateImportantInfoForReason(GURL(site.primary_pattern.ToString()), | |
253 &content_origins, reason, output); | |
254 } | |
255 } | |
256 | |
257 void PopulateInfoMapWithBookmarks( | |
258 Profile* profile, | |
259 const std::map<GURL, double>& engagement_map, | |
260 base::hash_map<std::string, ImportantDomainInfo>* output) { | |
261 SiteEngagementService* service = SiteEngagementService::Get(profile); | |
262 BookmarkModel* model = | |
263 BookmarkModelFactory::GetForBrowserContextIfExists(profile); | |
264 if (!model) | |
265 return; | |
266 std::vector<BookmarkModel::URLAndTitle> untrimmed_bookmarks; | |
267 model->GetBookmarks(&untrimmed_bookmarks); | |
268 | |
269 // Process the bookmarks and optionally trim them if we have too many. | |
270 std::vector<BookmarkModel::URLAndTitle> result_bookmarks; | |
271 if (untrimmed_bookmarks.size() > kMaxBookmarks) { | |
272 std::copy_if(untrimmed_bookmarks.begin(), untrimmed_bookmarks.end(), | |
273 std::back_inserter(result_bookmarks), | |
274 [service](const BookmarkModel::URLAndTitle& entry) { | |
275 return service->IsEngagementAtLeast( | |
276 entry.url.GetOrigin(), | |
277 SiteEngagementService::ENGAGEMENT_LEVEL_LOW); | |
278 }); | |
279 std::sort(result_bookmarks.begin(), result_bookmarks.end(), | |
280 [&engagement_map](const BookmarkModel::URLAndTitle& a, | |
281 const BookmarkModel::URLAndTitle& b) { | |
282 double a_score = engagement_map.at(a.url.GetOrigin()); | |
283 double b_score = engagement_map.at(b.url.GetOrigin()); | |
284 return a_score > b_score; | |
285 }); | |
286 if (result_bookmarks.size() > kMaxBookmarks) | |
287 result_bookmarks.resize(kMaxBookmarks); | |
288 } else { | |
289 result_bookmarks = std::move(untrimmed_bookmarks); | |
290 } | |
291 | |
292 std::set<GURL> content_origins; | |
293 for (const BookmarkModel::URLAndTitle& bookmark : result_bookmarks) { | |
294 MaybePopulateImportantInfoForReason(bookmark.url, &content_origins, | |
295 ImportantReason::BOOKMARKS, output); | |
296 } | |
297 } | |
298 | |
299 void PopulateInfoMapWithHomeScreen( | |
300 Profile* profile, | |
301 base::hash_map<std::string, ImportantDomainInfo>* output) { | |
302 ContentSettingsForOneType content_settings_list; | |
303 HostContentSettingsMapFactory::GetForProfile(profile)->GetSettingsForOneType( | |
304 CONTENT_SETTINGS_TYPE_APP_BANNER, content_settings::ResourceIdentifier(), | |
305 &content_settings_list); | |
306 // Extract a set of urls, using the primary pattern. We don't handle | |
307 // wildcard patterns. | |
308 std::set<GURL> content_origins; | |
309 base::Time now = base::Time::Now(); | |
310 for (const ContentSettingPatternSource& site : content_settings_list) { | |
311 GURL origin(site.primary_pattern.ToString()); | |
312 if (!AppBannerSettingsHelper::WasLaunchedRecently(profile, origin, now)) | |
313 continue; | |
314 MaybePopulateImportantInfoForReason(origin, &content_origins, | |
315 ImportantReason::HOME_SCREEN, output); | |
316 } | |
317 } | |
318 | |
319 } // namespace | |
320 | |
321 std::vector<ImportantDomainInfo> | |
322 ImportantSitesUtil::GetImportantRegisterableDomains(Profile* profile, | |
323 size_t max_results) { | |
324 base::hash_map<std::string, ImportantDomainInfo> important_info; | |
325 std::map<GURL, double> engagement_map; | |
326 | |
327 PopulateInfoMapWithSiteEngagement( | |
328 profile, SiteEngagementService::ENGAGEMENT_LEVEL_MEDIUM, &engagement_map, | |
329 &important_info); | |
330 | |
331 PopulateInfoMapWithContentTypeAllowed( | |
332 profile, CONTENT_SETTINGS_TYPE_NOTIFICATIONS, | |
333 ImportantReason::NOTIFICATIONS, &important_info); | |
334 | |
335 PopulateInfoMapWithContentTypeAllowed( | |
336 profile, CONTENT_SETTINGS_TYPE_DURABLE_STORAGE, ImportantReason::DURABLE, | |
337 &important_info); | |
338 | |
339 PopulateInfoMapWithBookmarks(profile, engagement_map, &important_info); | |
340 | |
341 PopulateInfoMapWithHomeScreen(profile, &important_info); | |
342 | |
343 base::hash_set<std::string> blacklisted_domains = | |
344 GetBlacklistedImportantDomains(profile); | |
345 | |
346 std::vector<std::pair<std::string, ImportantDomainInfo>> items( | |
347 important_info.begin(), important_info.end()); | |
348 std::sort(items.begin(), items.end(), &CompareDescendingImportantInfo); | |
349 | |
350 std::vector<ImportantDomainInfo> final_list; | |
351 for (std::pair<std::string, ImportantDomainInfo>& domain_info : items) { | |
352 if (final_list.size() >= max_results) | |
353 return final_list; | |
354 if (blacklisted_domains.find(domain_info.first) != | |
355 blacklisted_domains.end()) { | |
356 continue; | |
357 } | |
358 final_list.push_back(domain_info.second); | |
359 RECORD_UMA_FOR_IMPORTANT_REASON( | |
360 "Storage.ImportantSites.GeneratedReason", | |
361 "Storage.ImportantSites.GeneratedReasonCount", | |
362 domain_info.second.reason_bitfield); | |
363 } | |
364 | |
365 return final_list; | |
366 } | |
367 | |
368 void ImportantSitesUtil::RecordBlacklistedAndIgnoredImportantSites( | |
369 Profile* profile, | |
370 const std::vector<std::string>& blacklisted_sites, | |
371 const std::vector<int32_t>& blacklisted_sites_reason_bitfield, | |
372 const std::vector<std::string>& ignored_sites, | |
373 const std::vector<int32_t>& ignored_sites_reason_bitfield) { | |
374 // First, record the metrics for blacklisted and ignored sites. | |
375 for (int32_t reason_bitfield : blacklisted_sites_reason_bitfield) { | |
376 RECORD_UMA_FOR_IMPORTANT_REASON( | |
377 "Storage.ImportantSites.CBDChosenReason", | |
378 "Storage.ImportantSites.CBDChosenReasonCount", reason_bitfield); | |
379 } | |
380 for (int32_t reason_bitfield : ignored_sites_reason_bitfield) { | |
381 RECORD_UMA_FOR_IMPORTANT_REASON( | |
382 "Storage.ImportantSites.CBDIgnoredReason", | |
383 "Storage.ImportantSites.CBDIgnoredReasonCount", reason_bitfield); | |
384 } | |
385 | |
386 // We use the ignored sites to update our important sites blacklist. | |
387 HostContentSettingsMap* map = | |
388 HostContentSettingsMapFactory::GetForProfile(profile); | |
389 for (const std::string& ignored_site : ignored_sites) { | |
390 GURL origin("http://" + ignored_site); | |
391 std::unique_ptr<base::Value> value = map->GetWebsiteSetting( | |
392 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", nullptr); | |
393 | |
394 std::unique_ptr<base::DictionaryValue> dict = | |
395 base::DictionaryValue::From(map->GetWebsiteSetting( | |
396 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", | |
397 nullptr)); | |
398 | |
399 int times_ignored = 0; | |
400 if (dict) | |
401 dict->GetInteger(kNumTimesIgnoredName, ×_ignored); | |
402 else | |
403 dict = base::MakeUnique<base::DictionaryValue>(); | |
404 dict->SetInteger(kNumTimesIgnoredName, ++times_ignored); | |
405 | |
406 map->SetWebsiteSettingDefaultScope( | |
407 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", | |
408 std::move(dict)); | |
409 } | |
410 | |
411 // We clear our blacklist for sites that the user chose. | |
412 for (const std::string& ignored_site : blacklisted_sites) { | |
413 GURL origin("http://" + ignored_site); | |
414 std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue()); | |
415 dict->SetInteger(kNumTimesIgnoredName, 0); | |
416 map->SetWebsiteSettingDefaultScope( | |
417 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", | |
418 std::move(dict)); | |
419 } | |
420 | |
421 // Finally, record our old crossed-stats. | |
422 // Note: we don't plan on adding new metrics here, this is just for the finch | |
423 // experiment to give us initial data on what signals actually mattered. | |
424 for (int32_t reason_bitfield : blacklisted_sites_reason_bitfield) { | |
425 UMA_HISTOGRAM_ENUMERATION("Storage.BlacklistedImportantSites.Reason", | |
426 GetCrossedReasonFromBitfield(reason_bitfield), | |
427 CROSSED_REASON_BOUNDARY); | |
428 } | |
429 } | |
430 | |
431 void ImportantSitesUtil::MarkOriginAsImportantForTesting(Profile* profile, | |
432 const GURL& origin) { | |
433 // First get data from site engagement. | |
434 SiteEngagementService* site_engagement_service = | |
435 SiteEngagementService::Get(profile); | |
436 site_engagement_service->ResetScoreForURL( | |
437 origin, SiteEngagementScore::GetMediumEngagementBoundary()); | |
438 DCHECK(site_engagement_service->IsEngagementAtLeast( | |
439 origin, SiteEngagementService::ENGAGEMENT_LEVEL_MEDIUM)); | |
440 } | |
OLD | NEW |