| OLD | NEW |
| (Empty) |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/android/preferences/important_sites_util.h" | |
| 6 | |
| 7 #include <algorithm> | |
| 8 #include <map> | |
| 9 #include <memory> | |
| 10 #include <set> | |
| 11 #include <utility> | |
| 12 | |
| 13 #include "base/containers/hash_tables.h" | |
| 14 #include "base/memory/ptr_util.h" | |
| 15 #include "base/metrics/histogram_macros.h" | |
| 16 #include "base/stl_util.h" | |
| 17 #include "base/time/time.h" | |
| 18 #include "base/values.h" | |
| 19 #include "chrome/browser/banners/app_banner_settings_helper.h" | |
| 20 #include "chrome/browser/bookmarks/bookmark_model_factory.h" | |
| 21 #include "chrome/browser/content_settings/host_content_settings_map_factory.h" | |
| 22 #include "chrome/browser/engagement/site_engagement_score.h" | |
| 23 #include "chrome/browser/engagement/site_engagement_service.h" | |
| 24 #include "chrome/browser/profiles/profile.h" | |
| 25 #include "components/bookmarks/browser/bookmark_model.h" | |
| 26 #include "components/content_settings/core/browser/host_content_settings_map.h" | |
| 27 #include "components/content_settings/core/common/content_settings.h" | |
| 28 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" | |
| 29 #include "url/gurl.h" | |
| 30 | |
| 31 namespace { | |
| 32 using bookmarks::BookmarkModel; | |
| 33 using ImportantDomainInfo = ImportantSitesUtil::ImportantDomainInfo; | |
| 34 | |
| 35 static const char kNumTimesIgnoredName[] = "NumTimesIgnored"; | |
| 36 static const int kTimesIgnoredForBlacklist = 3; | |
| 37 | |
| 38 // These are the maximum # of bookmarks we can use as signals. If the user has | |
| 39 // <= kMaxBookmarks, then we just use those bookmarks. Otherwise we filter all | |
| 40 // bookmarks on site engagement > 0, sort, and trim to kMaxBookmarks. | |
| 41 static const int kMaxBookmarks = 5; | |
| 42 | |
| 43 // Do not change the values here, as they are used for UMA histograms. | |
| 44 enum ImportantReason { | |
| 45 ENGAGEMENT = 0, | |
| 46 DURABLE = 1, | |
| 47 BOOKMARKS = 2, | |
| 48 HOME_SCREEN = 3, | |
| 49 NOTIFICATIONS = 4, | |
| 50 REASON_BOUNDARY | |
| 51 }; | |
| 52 | |
| 53 // We need this to be a macro, as the histogram macros cache their pointers | |
| 54 // after the first call, so when we change the uma name we check fail if we're | |
| 55 // just a method. | |
| 56 #define RECORD_UMA_FOR_IMPORTANT_REASON(uma_name, uma_count_name, \ | |
| 57 reason_bitfield) \ | |
| 58 do { \ | |
| 59 int count = 0; \ | |
| 60 int32_t bitfield = (reason_bitfield); \ | |
| 61 for (int i = 0; i < ImportantReason::REASON_BOUNDARY; i++) { \ | |
| 62 if ((bitfield >> i) & 1) { \ | |
| 63 count++; \ | |
| 64 UMA_HISTOGRAM_ENUMERATION((uma_name), i, \ | |
| 65 ImportantReason::REASON_BOUNDARY); \ | |
| 66 } \ | |
| 67 } \ | |
| 68 UMA_HISTOGRAM_ENUMERATION((uma_count_name), count, \ | |
| 69 ImportantReason::REASON_BOUNDARY); \ | |
| 70 } while (0) | |
| 71 | |
| 72 // Do not change the values here, as they are used for UMA histograms and | |
| 73 // testing in important_sites_util_unittest. | |
| 74 enum CrossedReason { | |
| 75 CROSSED_DURABLE = 0, | |
| 76 CROSSED_NOTIFICATIONS = 1, | |
| 77 CROSSED_ENGAGEMENT = 2, | |
| 78 CROSSED_NOTIFICATIONS_AND_ENGAGEMENT = 3, | |
| 79 CROSSED_DURABLE_AND_ENGAGEMENT = 4, | |
| 80 CROSSED_NOTIFICATIONS_AND_DURABLE = 5, | |
| 81 CROSSED_NOTIFICATIONS_AND_DURABLE_AND_ENGAGEMENT = 6, | |
| 82 CROSSED_REASON_UNKNOWN = 7, | |
| 83 CROSSED_REASON_BOUNDARY | |
| 84 }; | |
| 85 | |
| 86 CrossedReason GetCrossedReasonFromBitfield(int32_t reason_bitfield) { | |
| 87 bool durable = reason_bitfield & (1 << ImportantReason::DURABLE); | |
| 88 bool notifications = reason_bitfield & (1 << ImportantReason::NOTIFICATIONS); | |
| 89 bool engagement = reason_bitfield & (1 << ImportantReason::ENGAGEMENT); | |
| 90 if (durable && notifications && engagement) | |
| 91 return CROSSED_NOTIFICATIONS_AND_DURABLE_AND_ENGAGEMENT; | |
| 92 else if (notifications && durable) | |
| 93 return CROSSED_NOTIFICATIONS_AND_DURABLE; | |
| 94 else if (notifications && engagement) | |
| 95 return CROSSED_NOTIFICATIONS_AND_ENGAGEMENT; | |
| 96 else if (durable && engagement) | |
| 97 return CROSSED_DURABLE_AND_ENGAGEMENT; | |
| 98 else if (notifications) | |
| 99 return CROSSED_NOTIFICATIONS; | |
| 100 else if (durable) | |
| 101 return CROSSED_DURABLE; | |
| 102 else if (engagement) | |
| 103 return CROSSED_ENGAGEMENT; | |
| 104 return CROSSED_REASON_UNKNOWN; | |
| 105 } | |
| 106 | |
| 107 std::string GetRegisterableDomainOrIP(const GURL& url) { | |
| 108 std::string registerable_domain = | |
| 109 net::registry_controlled_domains::GetDomainAndRegistry( | |
| 110 url, net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); | |
| 111 if (registerable_domain.empty() && url.HostIsIPAddress()) | |
| 112 registerable_domain = url.host(); | |
| 113 return registerable_domain; | |
| 114 } | |
| 115 | |
| 116 void MaybePopulateImportantInfoForReason( | |
| 117 const GURL& origin, | |
| 118 std::set<GURL>* visited_origins, | |
| 119 ImportantReason reason, | |
| 120 base::hash_map<std::string, ImportantDomainInfo>* output) { | |
| 121 if (!origin.is_valid() || !visited_origins->insert(origin).second) | |
| 122 return; | |
| 123 std::string registerable_domain = GetRegisterableDomainOrIP(origin); | |
| 124 ImportantDomainInfo& info = (*output)[registerable_domain]; | |
| 125 info.reason_bitfield |= 1 << reason; | |
| 126 if (info.example_origin.is_empty()) { | |
| 127 info.registerable_domain = registerable_domain; | |
| 128 info.example_origin = origin; | |
| 129 } | |
| 130 } | |
| 131 | |
| 132 // Returns the score associated with the given reason. The order of | |
| 133 // ImportantReason does not need to correspond to the score order. The higher | |
| 134 // the score, the more important the reason is. | |
| 135 int GetScoreForReason(ImportantReason reason) { | |
| 136 switch (reason) { | |
| 137 case ImportantReason::ENGAGEMENT: | |
| 138 return 1 << 0; | |
| 139 case ImportantReason::DURABLE: | |
| 140 return 1 << 1; | |
| 141 case ImportantReason::BOOKMARKS: | |
| 142 return 1 << 2; | |
| 143 case ImportantReason::HOME_SCREEN: | |
| 144 return 1 << 3; | |
| 145 case ImportantReason::NOTIFICATIONS: | |
| 146 return 1 << 4; | |
| 147 case ImportantReason::REASON_BOUNDARY: | |
| 148 return 0; | |
| 149 } | |
| 150 return 0; | |
| 151 } | |
| 152 | |
| 153 int GetScoreForReasonsBitfield(int32_t reason_bitfield) { | |
| 154 int score = 0; | |
| 155 for (int i = 0; i < ImportantReason::REASON_BOUNDARY; i++) { | |
| 156 if ((reason_bitfield >> i) & 1) { | |
| 157 score += GetScoreForReason(static_cast<ImportantReason>(i)); | |
| 158 } | |
| 159 } | |
| 160 return score; | |
| 161 } | |
| 162 | |
| 163 // Returns if |a| has a higher score than |b|, so that when we sort the higher | |
| 164 // score is first. | |
| 165 bool CompareDescendingImportantInfo( | |
| 166 const std::pair<std::string, ImportantDomainInfo>& a, | |
| 167 const std::pair<std::string, ImportantDomainInfo>& b) { | |
| 168 int score_a = GetScoreForReasonsBitfield(a.second.reason_bitfield); | |
| 169 int score_b = GetScoreForReasonsBitfield(b.second.reason_bitfield); | |
| 170 int bitfield_diff = score_a - score_b; | |
| 171 if (bitfield_diff != 0) | |
| 172 return bitfield_diff > 0; | |
| 173 return a.second.engagement_score > b.second.engagement_score; | |
| 174 } | |
| 175 | |
| 176 base::hash_set<std::string> GetBlacklistedImportantDomains(Profile* profile) { | |
| 177 ContentSettingsForOneType content_settings_list; | |
| 178 HostContentSettingsMap* map = | |
| 179 HostContentSettingsMapFactory::GetForProfile(profile); | |
| 180 map->GetSettingsForOneType(CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, | |
| 181 content_settings::ResourceIdentifier(), | |
| 182 &content_settings_list); | |
| 183 base::hash_set<std::string> ignoring_domains; | |
| 184 for (const ContentSettingPatternSource& site : content_settings_list) { | |
| 185 GURL origin(site.primary_pattern.ToString()); | |
| 186 if (!origin.is_valid() || | |
| 187 base::ContainsKey(ignoring_domains, origin.host())) { | |
| 188 continue; | |
| 189 } | |
| 190 | |
| 191 std::unique_ptr<base::DictionaryValue> dict = | |
| 192 base::DictionaryValue::From(map->GetWebsiteSetting( | |
| 193 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", | |
| 194 nullptr)); | |
| 195 | |
| 196 if (!dict) | |
| 197 continue; | |
| 198 | |
| 199 int times_ignored = 0; | |
| 200 if (!dict->GetInteger(kNumTimesIgnoredName, ×_ignored) || | |
| 201 times_ignored < kTimesIgnoredForBlacklist) { | |
| 202 continue; | |
| 203 } | |
| 204 | |
| 205 ignoring_domains.insert(origin.host()); | |
| 206 } | |
| 207 return ignoring_domains; | |
| 208 } | |
| 209 | |
| 210 void PopulateInfoMapWithSiteEngagement( | |
| 211 Profile* profile, | |
| 212 SiteEngagementService::EngagementLevel minimum_engagement, | |
| 213 std::map<GURL, double>* engagement_map, | |
| 214 base::hash_map<std::string, ImportantDomainInfo>* output) { | |
| 215 SiteEngagementService* service = SiteEngagementService::Get(profile); | |
| 216 *engagement_map = service->GetScoreMap(); | |
| 217 // We can have multiple origins for a single domain, so we record the one | |
| 218 // with the highest engagement score. | |
| 219 for (const auto& url_engagement_pair : *engagement_map) { | |
| 220 if (!service->IsEngagementAtLeast(url_engagement_pair.first, | |
| 221 minimum_engagement)) { | |
| 222 continue; | |
| 223 } | |
| 224 std::string registerable_domain = | |
| 225 GetRegisterableDomainOrIP(url_engagement_pair.first); | |
| 226 ImportantDomainInfo& info = (*output)[registerable_domain]; | |
| 227 if (url_engagement_pair.second > info.engagement_score) { | |
| 228 info.registerable_domain = registerable_domain; | |
| 229 info.engagement_score = url_engagement_pair.second; | |
| 230 info.example_origin = url_engagement_pair.first; | |
| 231 info.reason_bitfield |= 1 << ImportantReason::ENGAGEMENT; | |
| 232 } | |
| 233 } | |
| 234 } | |
| 235 | |
| 236 void PopulateInfoMapWithContentTypeAllowed( | |
| 237 Profile* profile, | |
| 238 ContentSettingsType content_type, | |
| 239 ImportantReason reason, | |
| 240 base::hash_map<std::string, ImportantDomainInfo>* output) { | |
| 241 // Grab our content settings list. | |
| 242 ContentSettingsForOneType content_settings_list; | |
| 243 HostContentSettingsMapFactory::GetForProfile(profile)->GetSettingsForOneType( | |
| 244 content_type, content_settings::ResourceIdentifier(), | |
| 245 &content_settings_list); | |
| 246 // Extract a set of urls, using the primary pattern. We don't handle | |
| 247 // wildcard patterns. | |
| 248 std::set<GURL> content_origins; | |
| 249 for (const ContentSettingPatternSource& site : content_settings_list) { | |
| 250 if (site.setting != CONTENT_SETTING_ALLOW) | |
| 251 continue; | |
| 252 MaybePopulateImportantInfoForReason(GURL(site.primary_pattern.ToString()), | |
| 253 &content_origins, reason, output); | |
| 254 } | |
| 255 } | |
| 256 | |
| 257 void PopulateInfoMapWithBookmarks( | |
| 258 Profile* profile, | |
| 259 const std::map<GURL, double>& engagement_map, | |
| 260 base::hash_map<std::string, ImportantDomainInfo>* output) { | |
| 261 SiteEngagementService* service = SiteEngagementService::Get(profile); | |
| 262 BookmarkModel* model = | |
| 263 BookmarkModelFactory::GetForBrowserContextIfExists(profile); | |
| 264 if (!model) | |
| 265 return; | |
| 266 std::vector<BookmarkModel::URLAndTitle> untrimmed_bookmarks; | |
| 267 model->GetBookmarks(&untrimmed_bookmarks); | |
| 268 | |
| 269 // Process the bookmarks and optionally trim them if we have too many. | |
| 270 std::vector<BookmarkModel::URLAndTitle> result_bookmarks; | |
| 271 if (untrimmed_bookmarks.size() > kMaxBookmarks) { | |
| 272 std::copy_if(untrimmed_bookmarks.begin(), untrimmed_bookmarks.end(), | |
| 273 std::back_inserter(result_bookmarks), | |
| 274 [service](const BookmarkModel::URLAndTitle& entry) { | |
| 275 return service->IsEngagementAtLeast( | |
| 276 entry.url.GetOrigin(), | |
| 277 SiteEngagementService::ENGAGEMENT_LEVEL_LOW); | |
| 278 }); | |
| 279 std::sort(result_bookmarks.begin(), result_bookmarks.end(), | |
| 280 [&engagement_map](const BookmarkModel::URLAndTitle& a, | |
| 281 const BookmarkModel::URLAndTitle& b) { | |
| 282 double a_score = engagement_map.at(a.url.GetOrigin()); | |
| 283 double b_score = engagement_map.at(b.url.GetOrigin()); | |
| 284 return a_score > b_score; | |
| 285 }); | |
| 286 if (result_bookmarks.size() > kMaxBookmarks) | |
| 287 result_bookmarks.resize(kMaxBookmarks); | |
| 288 } else { | |
| 289 result_bookmarks = std::move(untrimmed_bookmarks); | |
| 290 } | |
| 291 | |
| 292 std::set<GURL> content_origins; | |
| 293 for (const BookmarkModel::URLAndTitle& bookmark : result_bookmarks) { | |
| 294 MaybePopulateImportantInfoForReason(bookmark.url, &content_origins, | |
| 295 ImportantReason::BOOKMARKS, output); | |
| 296 } | |
| 297 } | |
| 298 | |
| 299 void PopulateInfoMapWithHomeScreen( | |
| 300 Profile* profile, | |
| 301 base::hash_map<std::string, ImportantDomainInfo>* output) { | |
| 302 ContentSettingsForOneType content_settings_list; | |
| 303 HostContentSettingsMapFactory::GetForProfile(profile)->GetSettingsForOneType( | |
| 304 CONTENT_SETTINGS_TYPE_APP_BANNER, content_settings::ResourceIdentifier(), | |
| 305 &content_settings_list); | |
| 306 // Extract a set of urls, using the primary pattern. We don't handle | |
| 307 // wildcard patterns. | |
| 308 std::set<GURL> content_origins; | |
| 309 base::Time now = base::Time::Now(); | |
| 310 for (const ContentSettingPatternSource& site : content_settings_list) { | |
| 311 GURL origin(site.primary_pattern.ToString()); | |
| 312 if (!AppBannerSettingsHelper::WasLaunchedRecently(profile, origin, now)) | |
| 313 continue; | |
| 314 MaybePopulateImportantInfoForReason(origin, &content_origins, | |
| 315 ImportantReason::HOME_SCREEN, output); | |
| 316 } | |
| 317 } | |
| 318 | |
| 319 } // namespace | |
| 320 | |
| 321 std::vector<ImportantDomainInfo> | |
| 322 ImportantSitesUtil::GetImportantRegisterableDomains(Profile* profile, | |
| 323 size_t max_results) { | |
| 324 base::hash_map<std::string, ImportantDomainInfo> important_info; | |
| 325 std::map<GURL, double> engagement_map; | |
| 326 | |
| 327 PopulateInfoMapWithSiteEngagement( | |
| 328 profile, SiteEngagementService::ENGAGEMENT_LEVEL_MEDIUM, &engagement_map, | |
| 329 &important_info); | |
| 330 | |
| 331 PopulateInfoMapWithContentTypeAllowed( | |
| 332 profile, CONTENT_SETTINGS_TYPE_NOTIFICATIONS, | |
| 333 ImportantReason::NOTIFICATIONS, &important_info); | |
| 334 | |
| 335 PopulateInfoMapWithContentTypeAllowed( | |
| 336 profile, CONTENT_SETTINGS_TYPE_DURABLE_STORAGE, ImportantReason::DURABLE, | |
| 337 &important_info); | |
| 338 | |
| 339 PopulateInfoMapWithBookmarks(profile, engagement_map, &important_info); | |
| 340 | |
| 341 PopulateInfoMapWithHomeScreen(profile, &important_info); | |
| 342 | |
| 343 base::hash_set<std::string> blacklisted_domains = | |
| 344 GetBlacklistedImportantDomains(profile); | |
| 345 | |
| 346 std::vector<std::pair<std::string, ImportantDomainInfo>> items( | |
| 347 important_info.begin(), important_info.end()); | |
| 348 std::sort(items.begin(), items.end(), &CompareDescendingImportantInfo); | |
| 349 | |
| 350 std::vector<ImportantDomainInfo> final_list; | |
| 351 for (std::pair<std::string, ImportantDomainInfo>& domain_info : items) { | |
| 352 if (final_list.size() >= max_results) | |
| 353 return final_list; | |
| 354 if (blacklisted_domains.find(domain_info.first) != | |
| 355 blacklisted_domains.end()) { | |
| 356 continue; | |
| 357 } | |
| 358 final_list.push_back(domain_info.second); | |
| 359 RECORD_UMA_FOR_IMPORTANT_REASON( | |
| 360 "Storage.ImportantSites.GeneratedReason", | |
| 361 "Storage.ImportantSites.GeneratedReasonCount", | |
| 362 domain_info.second.reason_bitfield); | |
| 363 } | |
| 364 | |
| 365 return final_list; | |
| 366 } | |
| 367 | |
| 368 void ImportantSitesUtil::RecordBlacklistedAndIgnoredImportantSites( | |
| 369 Profile* profile, | |
| 370 const std::vector<std::string>& blacklisted_sites, | |
| 371 const std::vector<int32_t>& blacklisted_sites_reason_bitfield, | |
| 372 const std::vector<std::string>& ignored_sites, | |
| 373 const std::vector<int32_t>& ignored_sites_reason_bitfield) { | |
| 374 // First, record the metrics for blacklisted and ignored sites. | |
| 375 for (int32_t reason_bitfield : blacklisted_sites_reason_bitfield) { | |
| 376 RECORD_UMA_FOR_IMPORTANT_REASON( | |
| 377 "Storage.ImportantSites.CBDChosenReason", | |
| 378 "Storage.ImportantSites.CBDChosenReasonCount", reason_bitfield); | |
| 379 } | |
| 380 for (int32_t reason_bitfield : ignored_sites_reason_bitfield) { | |
| 381 RECORD_UMA_FOR_IMPORTANT_REASON( | |
| 382 "Storage.ImportantSites.CBDIgnoredReason", | |
| 383 "Storage.ImportantSites.CBDIgnoredReasonCount", reason_bitfield); | |
| 384 } | |
| 385 | |
| 386 // We use the ignored sites to update our important sites blacklist. | |
| 387 HostContentSettingsMap* map = | |
| 388 HostContentSettingsMapFactory::GetForProfile(profile); | |
| 389 for (const std::string& ignored_site : ignored_sites) { | |
| 390 GURL origin("http://" + ignored_site); | |
| 391 std::unique_ptr<base::Value> value = map->GetWebsiteSetting( | |
| 392 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", nullptr); | |
| 393 | |
| 394 std::unique_ptr<base::DictionaryValue> dict = | |
| 395 base::DictionaryValue::From(map->GetWebsiteSetting( | |
| 396 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", | |
| 397 nullptr)); | |
| 398 | |
| 399 int times_ignored = 0; | |
| 400 if (dict) | |
| 401 dict->GetInteger(kNumTimesIgnoredName, ×_ignored); | |
| 402 else | |
| 403 dict = base::MakeUnique<base::DictionaryValue>(); | |
| 404 dict->SetInteger(kNumTimesIgnoredName, ++times_ignored); | |
| 405 | |
| 406 map->SetWebsiteSettingDefaultScope( | |
| 407 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", | |
| 408 std::move(dict)); | |
| 409 } | |
| 410 | |
| 411 // We clear our blacklist for sites that the user chose. | |
| 412 for (const std::string& ignored_site : blacklisted_sites) { | |
| 413 GURL origin("http://" + ignored_site); | |
| 414 std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue()); | |
| 415 dict->SetInteger(kNumTimesIgnoredName, 0); | |
| 416 map->SetWebsiteSettingDefaultScope( | |
| 417 origin, origin, CONTENT_SETTINGS_TYPE_IMPORTANT_SITE_INFO, "", | |
| 418 std::move(dict)); | |
| 419 } | |
| 420 | |
| 421 // Finally, record our old crossed-stats. | |
| 422 // Note: we don't plan on adding new metrics here, this is just for the finch | |
| 423 // experiment to give us initial data on what signals actually mattered. | |
| 424 for (int32_t reason_bitfield : blacklisted_sites_reason_bitfield) { | |
| 425 UMA_HISTOGRAM_ENUMERATION("Storage.BlacklistedImportantSites.Reason", | |
| 426 GetCrossedReasonFromBitfield(reason_bitfield), | |
| 427 CROSSED_REASON_BOUNDARY); | |
| 428 } | |
| 429 } | |
| 430 | |
| 431 void ImportantSitesUtil::MarkOriginAsImportantForTesting(Profile* profile, | |
| 432 const GURL& origin) { | |
| 433 // First get data from site engagement. | |
| 434 SiteEngagementService* site_engagement_service = | |
| 435 SiteEngagementService::Get(profile); | |
| 436 site_engagement_service->ResetScoreForURL( | |
| 437 origin, SiteEngagementScore::GetMediumEngagementBoundary()); | |
| 438 DCHECK(site_engagement_service->IsEngagementAtLeast( | |
| 439 origin, SiteEngagementService::ENGAGEMENT_LEVEL_MEDIUM)); | |
| 440 } | |
| OLD | NEW |