webkit/child/site_isolation_policy.cc - Issue 22254005: UMA data collector for cross-site documents(XSD)

Unified Diff: webkit/child/site_isolation_policy.cc

Issue 22254005: UMA data collector for cross-site documents(XSD) (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@lkgr

Patch Set: switched to using UMA_HISTOGRAM_ENUMERATION from COUNTS Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: webkit/child/site_isolation_policy.cc

diff --git a/webkit/child/site_isolation_policy.cc b/webkit/child/site_isolation_policy.cc

new file mode 100644

index 0000000000000000000000000000000000000000..c8e7984885f651b59ad0b630acb39768376081e9

--- /dev/null

+++ b/webkit/child/site_isolation_policy.cc

@@ -0,0 +1,512 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "webkit/child/site_isolation_policy.h"

+#include "base/basictypes.h"

+#include "base/logging.h"

+#include "base/metrics/histogram.h"

+#include "base/strings/string_util.h"

+#include "net/base/registry_controlled_domains/registry_controlled_domain.h"

+#include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"

+#include "third_party/WebKit/public/platform/WebString.h"

+#include "third_party/WebKit/public/platform/WebURL.h"

+#include "third_party/WebKit/public/platform/WebURLRequest.h"

+#include "third_party/WebKit/public/platform/WebURLResponse.h"

+#include "third_party/WebKit/public/web/WebDocument.h"

+#include "third_party/WebKit/public/web/WebFrame.h"

+#include "third_party/WebKit/public/web/WebFrameClient.h"

+#include "third_party/WebKit/public/web/WebSecurityOrigin.h"

+using base::strncasecmp;

+using WebKit::WebURLResponse;

+using WebKit::WebURLRequest;

+using WebKit::WebURL;

+using WebKit::WebString;

+using WebKit::WebDocument;

Charlie Reis 2013/08/07 21:02:02 These should be alphabetized.

dsjang 2013/08/08 21:21:01 Done.

+namespace webkit_glue {

+std::map<unsigned, WebURLRequest::TargetType>

+ SiteIsolationPolicy::id_target_map_;

+std::map<std::string, ResponseMetaData>

+ SiteIsolationPolicy::url_responsedata_map_;

+std::map<unsigned, std::string> SiteIsolationPolicy::id_url_map_;

+void SiteIsolationPolicy::WillSendRequest(

+ unsigned identifier,

+ WebURLRequest::TargetType target_type) {

+ // This happens when the original request is redirected.

+ if (id_target_map_.count(identifier) != 0) {

+ // This check actually can fail. If it is, which target_type do we

+ // have to record between the old one and the new one? When

+ // redirection happens, target_type becomes 2. TODO(dsjang):

+ // let's disable this code and see what happens on onclickads.com

+ // for googleads JavaScript code assigned to an image. To disable

+ // this, we need a guarntee that target_type is always erased at

+ // the end of a transaction.

+ if (id_target_map_[identifier] != target_type) {

+ id_target_map_[identifier] = target_type;

Charlie Reis 2013/08/07 21:02:02 I can't understand this comment or code. It looks

dsjang 2013/08/08 21:21:01 Done.

+ }

+ id_target_map_[identifier] = target_type;

+void SiteIsolationPolicy::DidReceiveResponse(WebKit::WebFrame* frame,

+ unsigned identifier,

+ const WebURLResponse& response) {

+ DCHECK(id_target_map_.count(identifier) == 1);

+ UMA_HISTOGRAM_COUNTS("XSDP.ALL", 1);

+ GURL response_url = response.url();

+ WebURLRequest::TargetType target_type = id_target_map_[identifier];

+ id_target_map_.erase(identifier);

+ // See if this is for navigation. If it is, let it pass.

+ if (IsFrameNotCommitted(frame)) {

+ LOG(INFO) << "SiteIsolationPolicy.FrameNotCommitted";

+ return;

+ }

+ GURL frame_origin(frame->document().securityOrigin().toString().utf8());

+ // TODO(dsjang): Find out all network related schemes here.

+ if (!IsNetworkScheme(frame_origin)) {

+ LOG(INFO) << "SiteIsolationPolicy.NotNetworkScheme:" << frame_origin;

+ return;

+ }

+ if (IsSameSite(frame_origin, response_url)) {

+ LOG(INFO) << "SiteIsolationPolicy.SameSite:" << frame_origin << ","

+ << response_url;

+ return;

+ }

+ ResponseMetaData::CanonicalMimeType canonical_mime_type =

+ GetCanonicalMimeType(response);

+ if (canonical_mime_type == ResponseMetaData::IsOthers) {

+ LOG(INFO) << "SiteIsolationPolicy.mimetype:" << frame_origin << ","

+ << response_url << ",[" << response.mimeType().utf8() << "]";

+ return;

+ }

+ // There was a possiblity that a CORS request preceded by a

+ // pre-flight request does not have "Access-Control-Allow-Origin"

+ // header. But it turns out that every CORS request should have the

+ // header no matter what CORS request it is. Therefore, if this is a

+ // CORS request, it has this header.

+ std::string access_control_origin = response

+ .httpHeaderField(

+ WebKit::WebString::fromUTF8("Access-Control-Allow-Origin")).utf8();

+ if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) {

+ LOG(INFO) << "SiteIsolationPolicy.CorsIsSafe:";

+ return;

+ }

+ // Real XSD data collection starts from here.

+ LOG(INFO) << "SiteIsolationPolicy.XSD!!!:" << canonical_mime_type <<

+ ":" << response_url;

+ // TODO(dsjang): Apply X-Content-Type option here.

+ ResponseMetaData resp_data;

+ resp_data.frame_origin = frame_origin.spec();

+ resp_data.response_url = response_url.spec();

+ resp_data.identifier = identifier;

+ resp_data.target_type = target_type;

+ resp_data.canonical_mime_type = canonical_mime_type;

+ resp_data.http_status_code = response.httpStatusCode();

+ url_responsedata_map_[resp_data.response_url] = resp_data;

+ id_url_map_[identifier] = resp_data.response_url;

+ return;

+void SiteIsolationPolicy::DidReceiveData(const char* data,

+ int length,

+ WebURL& web_response_url) {

Charlie Reis 2013/08/09 00:39:03 Add a comment to the .h file that there's a risk t

+ // We only record XSDs whose content is actually non-zero.

+ GURL response_url(web_response_url);

+ std::string response_url_str = response_url.spec();

+ if (url_responsedata_map_.count(response_url_str) == 0)

+ return;

+ DCHECK(url_responsedata_map_.count(response_url_str) == 1);

+ ResponseMetaData resp_data = url_responsedata_map_[response_url_str];

+ url_responsedata_map_.erase(response_url_str);

+ // Record the length of the first received network packet to see if

+ // it's enough for sniffing.

+ UMA_HISTOGRAM_COUNTS("XSDP.XSD.DataLength", length);

Charlie Reis 2013/08/07 21:02:02 Why would we need to collect this?

dsjang 2013/08/08 21:21:01 I wanted to see if that's not the case that most o

+ // Record the entire number of responses with a specific mime

+ // type(text/html, text/xml, etc).

+ UMA_HISTOGRAM_ENUMERATION("XSDP.XSD.MimeType",

+ resp_data.canonical_mime_type,

+ ResponseMetaData::IsOthers + 1);

+ // TODO(dsjang): sometimes the length of payload can be not enough to do

+ // correct content sniffing. If that happens, put it into a buffer

+ // so that we can do it later.

+ bool verified_for_blocking = false;

+ ResponseMetaData::CanonicalMimeType sniffed_type =

+ ResponseMetaData::IsOthers;

+ switch (resp_data.canonical_mime_type) {

+ // Record the number of responses whose content is sniffed for

+ // what its mime type claims it to be. For example, we apply a

+ // HTML sniffer for a document tagged with text/html here, and

+ // increments the count of "XSDP.XSD.HTML.Verified".

+ case ResponseMetaData::IsHTML:

+ if (SniffForHTML(data, length)) {

+ UMA_HISTOGRAM_COUNTS("XSDP.XSD.MimeType.HTML.Verified", 1);