Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(631)

Side by Side Diff: chrome/browser/extensions/activity_log/activity_actions.cc

Issue 292313006: Improve activity log ad metrics (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/extensions/activity_log/activity_actions.h" 5 #include "chrome/browser/extensions/activity_log/activity_actions.h"
6 6
7 #include <algorithm> // for std::find. 7 #include <algorithm> // for std::find.
8 #include <string> 8 #include <string>
9 9
10 #include "base/command_line.h" 10 #include "base/command_line.h"
11 #include "base/format_macros.h" 11 #include "base/format_macros.h"
12 #include "base/json/json_string_value_serializer.h" 12 #include "base/json/json_string_value_serializer.h"
13 #include "base/logging.h" 13 #include "base/logging.h"
14 #include "base/macros.h" 14 #include "base/macros.h"
15 #include "base/memory/singleton.h" 15 #include "base/memory/singleton.h"
16 #include "base/metrics/histogram.h"
16 #include "base/strings/string_number_conversions.h" 17 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_util.h" 18 #include "base/strings/string_util.h"
18 #include "base/strings/stringprintf.h" 19 #include "base/strings/stringprintf.h"
19 #include "base/values.h" 20 #include "base/values.h"
20 #include "chrome/browser/extensions/activity_log/activity_action_constants.h" 21 #include "chrome/browser/extensions/activity_log/activity_action_constants.h"
21 #include "chrome/browser/extensions/activity_log/ad_network_database.h" 22 #include "chrome/browser/extensions/activity_log/ad_network_database.h"
22 #include "chrome/browser/extensions/activity_log/fullstream_ui_policy.h" 23 #include "chrome/browser/extensions/activity_log/fullstream_ui_policy.h"
23 #include "chrome/browser/ui/browser.h" 24 #include "chrome/browser/ui/browser.h"
24 #include "chrome/common/chrome_switches.h" 25 #include "chrome/common/chrome_switches.h"
25 #include "components/rappor/rappor_service.h" 26 #include "components/rappor/rappor_service.h"
26 #include "content/public/browser/web_contents.h" 27 #include "content/public/browser/web_contents.h"
27 #include "extensions/common/ad_injection_constants.h" 28 #include "extensions/common/ad_injection_constants.h"
28 #include "extensions/common/constants.h" 29 #include "extensions/common/constants.h"
29 #include "extensions/common/dom_action_types.h" 30 #include "extensions/common/dom_action_types.h"
30 #include "sql/statement.h" 31 #include "sql/statement.h"
31 #include "url/gurl.h" 32 #include "url/gurl.h"
32 33
33 namespace constants = activity_log_constants; 34 namespace constants = activity_log_constants;
34 35
35 namespace extensions { 36 namespace extensions {
36 37
37 namespace { 38 namespace {
38 39
39 namespace keys = ad_injection_constants::keys; 40 namespace keys = ad_injection_constants::keys;
40 41
41 // The list of APIs for which we upload the URL to RAPPOR. 42 // The list of APIs for which we upload the URL to RAPPOR.
42 const char* kApisForRapporMetric[] = { 43 const char* kApisForRapporMetric[] = {
43 "HTMLIFrameElement.src", 44 ad_injection_constants::kHtmlIframeSrcApiName,
44 "HTMLEmbedElement.src", 45 ad_injection_constants::kHtmlEmbedSrcApiName,
45 "HTMLAnchorElement.href", 46 ad_injection_constants::kHtmlAnchorHrefApiName
46 }; 47 };
47 48
48 const char* kExtensionAdInjectionRapporMetricName = 49 const char* kExtensionAdInjectionRapporMetricName =
49 "Extensions.PossibleAdInjection"; 50 "Extensions.PossibleAdInjection";
50 51
51 // The elements for which we check the 'src' attribute to look for ads. 52 // The names of different types of HTML elements we check for ad injection.
52 const char* kSrcElements[] = { 53 const char* kIframeElementType = "HTMLIFrameElement";
53 "HTMLIFrameElement", 54 const char* kEmbedElementType = "HTMLEmbedElement";
54 "HTMLEmbedElement" 55 const char* kAnchorElementType = "HTMLAnchorElement";
55 };
56
57 // The elements for which we check the 'href' attribute to look for ads.
58 const char* kHrefElements[] = {
59 "HTMLAnchorElement",
60 };
61
62 bool IsSrcElement(const std::string& str) {
63 static const char** end = kSrcElements + arraysize(kSrcElements);
64 return std::find(kSrcElements, end, str) != end;
65 }
66
67 bool IsHrefElement(const std::string& str) {
68 static const char** end = kHrefElements + arraysize(kHrefElements);
69 return std::find(kHrefElements, end, str) != end;
70 }
71 56
72 std::string Serialize(const base::Value* value) { 57 std::string Serialize(const base::Value* value) {
73 std::string value_as_text; 58 std::string value_as_text;
74 if (!value) { 59 if (!value) {
75 value_as_text = "null"; 60 value_as_text = "null";
76 } else { 61 } else {
77 JSONStringValueSerializer serializer(&value_as_text); 62 JSONStringValueSerializer serializer(&value_as_text);
78 serializer.SerializeAndOmitBinaryValues(*value); 63 serializer.SerializeAndOmitBinaryValues(*value);
79 } 64 }
80 return value_as_text; 65 return value_as_text;
81 } 66 }
82 67
83 Action::InjectionType CheckDomObject(const base::DictionaryValue* object) {
84 std::string type;
85 object->GetString(keys::kType, &type);
86
87 std::string url_key;
88 if (IsSrcElement(type))
89 url_key = keys::kSrc;
90 else if (IsHrefElement(type))
91 url_key = keys::kHref;
92
93 if (!url_key.empty()) {
94 std::string url;
95 if (object->GetString(url_key, &url)) {
96 GURL gurl(url);
97 if (AdNetworkDatabase::Get()->IsAdNetwork(gurl))
98 return Action::INJECTION_NEW_AD;
99 // If the extension injected an URL which is not local to itself, there is
100 // a good chance it could be a new ad, and our database missed it.
101 // This could be noisier than other metrics, because there are perfectly
102 // acceptable uses for this, like "Show my mail".
103 if (gurl.is_valid() &&
104 !gurl.is_empty() &&
105 !gurl.SchemeIs(kExtensionScheme)) {
106 return Action::INJECTION_LIKELY_NEW_AD;
107 }
108 }
109 }
110
111 const base::ListValue* children = NULL;
112 if (object->GetList(keys::kChildren, &children)) {
113 const base::DictionaryValue* child = NULL;
114 for (size_t i = 0;
115 i < children->GetSize() &&
116 i < ad_injection_constants::kMaximumChildrenToCheck;
117 ++i) {
118 if (children->GetDictionary(i, &child)) {
119 Action::InjectionType type = CheckDomObject(child);
120 if (type != Action::NO_AD_INJECTION)
121 return type;
122 }
123 }
124 }
125
126 return Action::NO_AD_INJECTION;
127 }
128
129 } // namespace 68 } // namespace
130 69
131 using api::activity_log_private::ExtensionActivity; 70 using api::activity_log_private::ExtensionActivity;
132 71
133 Action::Action(const std::string& extension_id, 72 Action::Action(const std::string& extension_id,
134 const base::Time& time, 73 const base::Time& time,
135 const ActionType action_type, 74 const ActionType action_type,
136 const std::string& api_name, 75 const std::string& api_name,
137 int64 action_id) 76 int64 action_id)
138 : extension_id_(extension_id), 77 : extension_id_(extension_id),
(...skipping 23 matching lines...) Expand all
162 clone->set_arg_incognito(arg_incognito()); 101 clone->set_arg_incognito(arg_incognito());
163 if (other()) 102 if (other())
164 clone->set_other(make_scoped_ptr(other()->DeepCopy())); 103 clone->set_other(make_scoped_ptr(other()->DeepCopy()));
165 return clone; 104 return clone;
166 } 105 }
167 106
168 Action::InjectionType Action::DidInjectAd( 107 Action::InjectionType Action::DidInjectAd(
169 rappor::RapporService* rappor_service) const { 108 rappor::RapporService* rappor_service) const {
170 MaybeUploadUrl(rappor_service); 109 MaybeUploadUrl(rappor_service);
171 110
172 // Currently, we do not have the list of ad networks, so we exit immediately 111 // We should always have an AdNetworkDatabase, but, on the offchance we don't,
173 // with NO_AD_INJECTION (unless the database has been set by a test). 112 // don't crash in a release build.
174 if (!AdNetworkDatabase::Get()) 113 if (!AdNetworkDatabase::Get()) {
114 NOTREACHED();
175 return NO_AD_INJECTION; 115 return NO_AD_INJECTION;
176
177 if (api_name_ == ad_injection_constants::kHtmlIframeSrcApiName ||
178 api_name_ == ad_injection_constants::kHtmlEmbedSrcApiName ||
179 api_name_ == ad_injection_constants::kHtmlAnchorHrefApiName) {
180 return CheckSrcModification();
181 } else if (EndsWith(api_name_,
182 ad_injection_constants::kAppendChildApiSuffix,
183 true /* case senstive */)) {
184 return CheckAppendChild();
185 } 116 }
186 117
187 return NO_AD_INJECTION; 118 AdType ad_type = AD_TYPE_NONE;
119 InjectionType injection_type = NO_AD_INJECTION;
120
121 if (EndsWith(api_name_,
122 ad_injection_constants::kAppendChildApiSuffix,
123 true /* case senstive */)) {
124 injection_type = CheckAppendChild(&ad_type);
125 } else {
126 // Check if the action modified an element's src/href.
127 if (api_name_ == ad_injection_constants::kHtmlIframeSrcApiName)
128 ad_type = AD_TYPE_IFRAME;
129 else if (api_name_ == ad_injection_constants::kHtmlEmbedSrcApiName)
130 ad_type = AD_TYPE_EMBED;
131 else if (api_name_ == ad_injection_constants::kHtmlAnchorHrefApiName)
132 ad_type = AD_TYPE_ANCHOR;
133
134 if (ad_type != AD_TYPE_NONE)
135 injection_type = CheckSrcModification();
136 }
137
138 if (injection_type != NO_AD_INJECTION) {
139 UMA_HISTOGRAM_ENUMERATION(
140 "Extensions.AdInjection.Type", ad_type, Action::NUM_AD_TYPES);
141 }
142
143 return injection_type;
188 } 144 }
189 145
190 void Action::set_args(scoped_ptr<base::ListValue> args) { 146 void Action::set_args(scoped_ptr<base::ListValue> args) {
191 args_.reset(args.release()); 147 args_.reset(args.release());
192 } 148 }
193 149
194 base::ListValue* Action::mutable_args() { 150 base::ListValue* Action::mutable_args() {
195 if (!args_.get()) { 151 if (!args_.get()) {
196 args_.reset(new base::ListValue()); 152 args_.reset(new base::ListValue());
197 } 153 }
(...skipping 198 matching lines...) Expand 10 before | Expand all | Expand 10 after
396 result += " ARG_URL=" + arg_url_.spec(); 352 result += " ARG_URL=" + arg_url_.spec();
397 } 353 }
398 if (other_.get()) { 354 if (other_.get()) {
399 result += " OTHER=" + Serialize(other_.get()); 355 result += " OTHER=" + Serialize(other_.get());
400 } 356 }
401 357
402 result += base::StringPrintf(" COUNT=%d", count_); 358 result += base::StringPrintf(" COUNT=%d", count_);
403 return result; 359 return result;
404 } 360 }
405 361
362 bool Action::UrlCouldBeAd(const GURL& url) const {
363 // Ads can only be valid urls that don't match the page's host (linking to the
364 // current page should be considered valid use), and aren't local to the
365 // extension.
366 return url.is_valid() &&
367 !url.is_empty() &&
368 url.host() != page_url_.host() &&
369 !url.SchemeIs(kExtensionScheme);
370 }
371
406 void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const { 372 void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const {
407 // If there's no given |rappor_service|, abort immediately. 373 // Don't bother recording if the url is innocuous (or no |rappor_service|).
408 if (!rappor_service) 374 if (!rappor_service || !UrlCouldBeAd(arg_url_))
409 return;
410
411 // If the action has no url, or the url is empty, then return.
412 if (!arg_url_.is_valid() || arg_url_.is_empty())
413 return;
414 std::string host = arg_url_.host();
415 if (host.empty())
416 return; 375 return;
417 376
418 bool can_inject_ads = false; 377 bool can_inject_ads = false;
419 for (size_t i = 0; i < arraysize(kApisForRapporMetric); ++i) { 378 for (size_t i = 0; i < arraysize(kApisForRapporMetric); ++i) {
420 if (api_name_ == kApisForRapporMetric[i]) { 379 if (api_name_ == kApisForRapporMetric[i]) {
421 can_inject_ads = true; 380 can_inject_ads = true;
422 break; 381 break;
423 } 382 }
424 } 383 }
425 384
426 if (!can_inject_ads) 385 if (!can_inject_ads)
427 return; 386 return;
428 387
429 // Record the URL - an ad *may* have been injected. 388 // Record the URL - an ad *may* have been injected.
430 rappor_service->RecordSample(kExtensionAdInjectionRapporMetricName, 389 rappor_service->RecordSample(kExtensionAdInjectionRapporMetricName,
431 rappor::ETLD_PLUS_ONE_RAPPOR_TYPE, 390 rappor::ETLD_PLUS_ONE_RAPPOR_TYPE,
432 host); 391 arg_url_.host());
433 } 392 }
434 393
435 Action::InjectionType Action::CheckSrcModification() const { 394 Action::InjectionType Action::CheckSrcModification() const {
436 const AdNetworkDatabase* database = AdNetworkDatabase::Get(); 395 const AdNetworkDatabase* database = AdNetworkDatabase::Get();
437 396
438 bool arg_url_valid = arg_url_.is_valid() && !arg_url_.is_empty(); 397 bool arg_url_could_be_ad = UrlCouldBeAd(arg_url_);
439 398
440 GURL prev_url; 399 GURL prev_url;
441 std::string prev_url_string; 400 std::string prev_url_string;
442 if (args_.get() && args_->GetString(1u, &prev_url_string)) 401 if (args_.get() && args_->GetString(1u, &prev_url_string))
443 prev_url = GURL(prev_url_string); 402 prev_url = GURL(prev_url_string);
444 403
445 bool prev_url_valid = prev_url.is_valid() && !prev_url.is_empty(); 404 bool prev_url_valid = prev_url.is_valid() && !prev_url.is_empty();
446 405
447 bool injected_ad = arg_url_valid && database->IsAdNetwork(arg_url_); 406 bool injected_ad = arg_url_could_be_ad && database->IsAdNetwork(arg_url_);
448 bool replaced_ad = prev_url_valid && database->IsAdNetwork(prev_url); 407 bool replaced_ad = prev_url_valid && database->IsAdNetwork(prev_url);
449 408
450 if (injected_ad && replaced_ad) 409 if (injected_ad && replaced_ad)
451 return INJECTION_REPLACED_AD; 410 return INJECTION_REPLACED_AD;
452 if (injected_ad) 411 if (injected_ad)
453 return INJECTION_NEW_AD; 412 return INJECTION_NEW_AD;
454 if (replaced_ad) 413 if (replaced_ad)
455 return INJECTION_REMOVED_AD; 414 return INJECTION_REMOVED_AD;
456 415
457 // If the extension modified the URL with an external, valid URL then there's 416 // If the extension modified the URL with an external, valid URL then there's
458 // a good chance it's ad injection. Log it as a likely one, which also helps 417 // a good chance it's ad injection. Log it as a likely one, which also helps
459 // us determine the effectiveness of our IsAdNetwork() recognition. 418 // us determine the effectiveness of our IsAdNetwork() recognition.
460 if (arg_url_valid && !arg_url_.SchemeIs(kExtensionScheme)) { 419 if (arg_url_could_be_ad) {
461 if (prev_url_valid) 420 if (prev_url_valid)
462 return INJECTION_LIKELY_REPLACED_AD; 421 return INJECTION_LIKELY_REPLACED_AD;
463 return INJECTION_LIKELY_NEW_AD; 422 return INJECTION_LIKELY_NEW_AD;
464 } 423 }
465 424
466 return NO_AD_INJECTION; 425 return NO_AD_INJECTION;
467 } 426 }
468 427
469 Action::InjectionType Action::CheckAppendChild() const { 428 Action::InjectionType Action::CheckAppendChild(AdType* ad_type_out) const {
470 const base::DictionaryValue* child = NULL; 429 const base::DictionaryValue* child = NULL;
471 if (!args_->GetDictionary(0u, &child)) 430 if (!args_->GetDictionary(0u, &child))
472 return NO_AD_INJECTION; 431 return NO_AD_INJECTION;
473 432
474 return CheckDomObject(child); 433 return CheckDomObject(child, ad_type_out);
434 }
435
436 Action::InjectionType Action::CheckDomObject(
437 const base::DictionaryValue* object,
438 AdType* ad_type_out) const {
439 DCHECK(ad_type_out);
440 std::string type;
441 object->GetString(keys::kType, &type);
442
443 AdType ad_type = AD_TYPE_NONE;
444 std::string url_key;
445 if (type == kIframeElementType) {
446 ad_type = AD_TYPE_IFRAME;
447 url_key = keys::kSrc;
448 } else if (type == kEmbedElementType) {
449 ad_type = AD_TYPE_EMBED;
450 url_key = keys::kSrc;
451 } else if (type == kAnchorElementType) {
452 ad_type = AD_TYPE_ANCHOR;
453 url_key = keys::kHref;
454 }
455
456 if (!url_key.empty()) {
457 std::string url;
458 if (object->GetString(url_key, &url)) {
459 GURL gurl(url);
460 if (UrlCouldBeAd(gurl)) {
461 *ad_type_out = ad_type;
462 if (AdNetworkDatabase::Get()->IsAdNetwork(gurl))
463 return INJECTION_NEW_AD;
464 // If the extension injected an URL which is not local to itself or the
465 // page, there is a good chance it could be a new ad, and our database
466 // missed it.
467 return INJECTION_LIKELY_NEW_AD;
468 }
469 }
470 }
471
472 const base::ListValue* children = NULL;
473 if (object->GetList(keys::kChildren, &children)) {
474 const base::DictionaryValue* child = NULL;
475 for (size_t i = 0;
476 i < children->GetSize() &&
477 i < ad_injection_constants::kMaximumChildrenToCheck;
478 ++i) {
479 if (children->GetDictionary(i, &child)) {
480 InjectionType type = CheckDomObject(child, ad_type_out);
481 if (type != NO_AD_INJECTION)
482 return type;
483 }
484 }
485 }
486
487 return NO_AD_INJECTION;
475 } 488 }
476 489
477 bool ActionComparator::operator()( 490 bool ActionComparator::operator()(
478 const scoped_refptr<Action>& lhs, 491 const scoped_refptr<Action>& lhs,
479 const scoped_refptr<Action>& rhs) const { 492 const scoped_refptr<Action>& rhs) const {
480 if (lhs->time() != rhs->time()) 493 if (lhs->time() != rhs->time())
481 return lhs->time() < rhs->time(); 494 return lhs->time() < rhs->time();
482 else if (lhs->action_id() != rhs->action_id()) 495 else if (lhs->action_id() != rhs->action_id())
483 return lhs->action_id() < rhs->action_id(); 496 return lhs->action_id() < rhs->action_id();
484 else 497 else
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
532 std::string rhs_other = ActivityLogPolicy::Util::Serialize(rhs->other()); 545 std::string rhs_other = ActivityLogPolicy::Util::Serialize(rhs->other());
533 if (lhs_other != rhs_other) 546 if (lhs_other != rhs_other)
534 return lhs_other < rhs_other; 547 return lhs_other < rhs_other;
535 } 548 }
536 549
537 // All fields compare as equal if this point is reached. 550 // All fields compare as equal if this point is reached.
538 return false; 551 return false;
539 } 552 }
540 553
541 } // namespace extensions 554 } // namespace extensions
OLDNEW
« no previous file with comments | « chrome/browser/extensions/activity_log/activity_actions.h ('k') | chrome/browser/extensions/activity_log/uma_policy.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698