content/child/site_isolation_policy.cc - Issue 22876029: Revert 219383 "UMA data collector for cross-site documents(XSD)"

Unified Diff: content/child/site_isolation_policy.cc

Issue 22876029: Revert 219383 "UMA data collector for cross-site documents(XSD)" (Closed) Base URL: svn://svn.chromium.org/chrome/branches/1610/src/

Patch Set: Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: content/child/site_isolation_policy.cc

===================================================================

--- content/child/site_isolation_policy.cc (revision 219467)

+++ content/child/site_isolation_policy.cc (working copy)

@@ -1,561 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include "content/child/site_isolation_policy.h"

-#include "base/basictypes.h"

-#include "base/command_line.h"

-#include "base/logging.h"

-#include "base/metrics/histogram.h"

-#include "base/strings/string_util.h"

-#include "content/public/common/content_switches.h"

-#include "net/base/registry_controlled_domains/registry_controlled_domain.h"

-#include "net/http/http_response_headers.h"

-#include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"

-#include "third_party/WebKit/public/platform/WebString.h"

-#include "third_party/WebKit/public/platform/WebURL.h"

-#include "third_party/WebKit/public/platform/WebURLRequest.h"

-#include "third_party/WebKit/public/platform/WebURLResponse.h"

-#include "third_party/WebKit/public/web/WebDocument.h"

-#include "third_party/WebKit/public/web/WebFrame.h"

-#include "third_party/WebKit/public/web/WebFrameClient.h"

-#include "third_party/WebKit/public/web/WebSecurityOrigin.h"

-using WebKit::WebDocument;

-using WebKit::WebString;

-using WebKit::WebURL;

-using WebKit::WebURLResponse;

-using WebKit::WebURLRequest;

-namespace content {

-namespace {

-// MIME types

-const char kTextHtml[] = "text/html";

-const char kTextXml[] = "text/xml";

-const char xAppRssXml[] = "application/rss+xml";

-const char kAppXml[] = "application/xml";

-const char kAppJson[] = "application/json";

-const char kTextJson[] = "text/json";

-const char kTextXjson[] = "text/x-json";

-const char kTextPlain[] = "text/plain";

-} // anonymous namespace

-SiteIsolationPolicy::ResponseMetaData::ResponseMetaData() {}

-void SiteIsolationPolicy::OnReceivedResponse(

- int request_id,

- GURL& frame_origin,

- GURL& response_url,

- ResourceType::Type resource_type,

- const webkit_glue::ResourceResponseInfo& info) {

- UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);

- // See if this is for navigation. If it is, don't block it, under the

- // assumption that we will put it in an appropriate process.

- if (ResourceType::IsFrame(resource_type))

- return;

- if (!IsBlockableScheme(response_url))

- return;

- if (IsSameSite(frame_origin, response_url))

- return;

- SiteIsolationPolicy::ResponseMetaData::CanonicalMimeType canonical_mime_type =

- GetCanonicalMimeType(info.mime_type);

- if (canonical_mime_type == SiteIsolationPolicy::ResponseMetaData::Others)

- return;

- // Every CORS request should have the Access-Control-Allow-Origin header even

- // if it is preceded by a pre-flight request. Therefore, if this is a CORS

- // request, it has this header. response.httpHeaderField() internally uses

- // case-insensitive matching for the header name.

- std::string access_control_origin;

- // We can use a case-insensitive header name for EnumerateHeader().

- info.headers->EnumerateHeader(

- NULL, "access-control-allow-origin", &access_control_origin);

- if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin))

- return;

- // Real XSD data collection starts from here.

- std::string no_sniff;

- info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);

- ResponseMetaData resp_data;

- resp_data.frame_origin = frame_origin.spec();

- resp_data.response_url = response_url;

- resp_data.resource_type = resource_type;

- resp_data.canonical_mime_type = canonical_mime_type;

- resp_data.http_status_code = info.headers->response_code();

- resp_data.no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff");

- RequestIdToMetaDataMap* metadata_map = GetRequestIdToMetaDataMap();

- (*metadata_map)[request_id] = resp_data;

-// These macros are defined here so that we prevent code size bloat-up due to

-// the UMA_HISTOGRAM_* macros. Similar logic is used for recording UMA stats for

-// different MIME types, but we cannot create a helper function for this since

-// UMA_HISTOGRAM_* macros do not accept variables as their bucket names. As a

-// solution, macros are used instead to capture the repeated pattern for

-// recording UMA stats. TODO(dsjang): this is only needed for collecting UMA

-// stat. Will be deleted when this class is used for actual blocking.

-#define SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \

- UMA_HISTOGRAM_COUNTS( BUCKET_PREFIX ".Blocked", 1); \

- result = true; \

- if (renderable_status_code) { \

- UMA_HISTOGRAM_ENUMERATION( \

- BUCKET_PREFIX ".Blocked.RenderableStatusCode", \

- resp_data.resource_type, \

- WebURLRequest::TargetIsUnspecified + 1); \

- } else { \

- UMA_HISTOGRAM_COUNTS(BUCKET_PREFIX ".Blocked.NonRenderableStatusCode",1);\

- }

-#define SITE_ISOLATION_POLICY_COUNT_NO_SNIFF_BLOCK(BUCKET_PREFIX) \

- UMA_HISTOGRAM_COUNTS( BUCKET_PREFIX ".NoSniffBlocked", 1); \

- result = true; \

- if (renderable_status_code) { \

- UMA_HISTOGRAM_ENUMERATION( \

- BUCKET_PREFIX ".NoSniffBlocked.RenderableStatusCode", \

- resp_data.resource_type, \

- WebURLRequest::TargetIsUnspecified + 1); \

- } else { \

- UMA_HISTOGRAM_ENUMERATION( \

- BUCKET_PREFIX ".NoSniffBlocked.NonRenderableStatusCode", \

- resp_data.resource_type, \

- WebURLRequest::TargetIsUnspecified + 1); \

- }

-#define SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \

- UMA_HISTOGRAM_COUNTS(BUCKET_PREFIX ".NotBlocked", 1); \

- if (is_sniffed_for_js) \

- UMA_HISTOGRAM_COUNTS(BUCKET_PREFIX ".NotBlocked.MaybeJS", 1); \

-#define SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SNIFF_EXPR,BUCKET_PREFIX) \

- if (SNIFF_EXPR) { \

- SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \

- } else { \

- if (resp_data.no_sniff) { \

- SITE_ISOLATION_POLICY_COUNT_NO_SNIFF_BLOCK(BUCKET_PREFIX) \

- } else { \

- SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \

- } \

- }

-bool SiteIsolationPolicy::ShouldBlockResponse(

- int request_id,

- const char* data,

- int length,

- std::string* alternative_data) {

- RequestIdToMetaDataMap* metadata_map = GetRequestIdToMetaDataMap();

- RequestIdToResultMap* result_map = GetRequestIdToResultMap();

- // If there's an entry for |request_id| in blocked_map, this request's first

- // data packet has already been examined. We can return the result here.

- if (result_map->count(request_id) != 0) {

- if ((*result_map)[request_id]) {

- // Here, the blocking result has been set for the previous run of

- // ShouldBlockResponse(), so we set alternative data to an empty string so

- // that ResourceDispatcher doesn't call its peer's onReceivedData() with

- // the alternative data.

- alternative_data->erase();

- return true;

- }

- return false;

- }

- // If result_map doesn't have an entry for |request_id|, we're receiving the

- // first data packet for request_id. If request_id is not registered, this

- // request is identified as a non-target of our policy. So we return true.

- if (metadata_map->count(request_id) == 0) {

- // We set request_id to true so that we always return true for this request.

- (*result_map)[request_id] = false;

- return false;

- }

- // We now look at the first data packet received for request_id.

- ResponseMetaData resp_data = (*metadata_map)[request_id];

- metadata_map->erase(request_id);

- // Record the length of the first received network packet to see if it's

- // enough for sniffing.

- UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", length);

- // Record the number of cross-site document responses with a specific mime

- // type (text/html, text/xml, etc).

- UMA_HISTOGRAM_ENUMERATION(

- "SiteIsolation.XSD.MimeType",

- resp_data.canonical_mime_type,

- SiteIsolationPolicy::ResponseMetaData::MaxCanonicalMimeType);

- // Store the result of cross-site document blocking analysis. True means we

- // can return this document to the renderer, false means that we have to block

- // the response data.

- bool result = false;

- // The content is blocked if it is sniffed for HTML/JSON/XML. When the blocked

- // response is with an error status code, it is not disruptive by the

- // following reasons : 1) the blocked content is not a binary object (such as

- // an image) since it is sniffed for text; 2) then, this blocking only breaks

- // the renderer behavior only if it is either JavaScript or CSS. However, the

- // renderer doesn't use the contents of JS/CSS with unaffected status code

- // (e.g, 404). 3) the renderer is expected not to use the cross-site document

- // content for purposes other than JS/CSS (e.g, XHR).

- bool renderable_status_code = IsRenderableStatusCodeForDocument(

- resp_data.http_status_code);

- // This is only used for false-negative analysis for non-blocked resources.

- bool is_sniffed_for_js = SniffForJS(data, length);

- // Record the number of responses whose content is sniffed for what its mime

- // type claims it to be. For example, we apply a HTML sniffer for a document

- // tagged with text/html here. Whenever this check becomes true, we'll block

- // the response.

- switch (resp_data.canonical_mime_type) {

- case SiteIsolationPolicy::ResponseMetaData::HTML:

- SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForHTML(data, length),

- "SiteIsolation.XSD.HTML");

- break;

- case SiteIsolationPolicy::ResponseMetaData::XML:

- SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForXML(data, length),

- "SiteIsolation.XSD.XML");

- break;

- case SiteIsolationPolicy::ResponseMetaData::JSON:

- SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForJSON(data, length),

- "SiteIsolation.XSD.JSON");

- break;

- case SiteIsolationPolicy::ResponseMetaData::Plain:

- if (SniffForHTML(data, length)) {

- SITE_ISOLATION_POLICY_COUNT_BLOCK(

- "SiteIsolation.XSD.Plain.HTML");

- } else if (SniffForXML(data, length)) {

- SITE_ISOLATION_POLICY_COUNT_BLOCK(

- "SiteIsolation.XSD.Plain.XML");

- } else if (SniffForJSON(data, length)) {

- SITE_ISOLATION_POLICY_COUNT_BLOCK(

- "SiteIsolation.XSD.Plain.JSON");

- } else if (is_sniffed_for_js) {

- if (resp_data.no_sniff) {

- SITE_ISOLATION_POLICY_COUNT_NO_SNIFF_BLOCK(

- "SiteIsolation.XSD.Plain");

- } else {

- SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(

- "SiteIsolation.XSD.Plain");

- }

- break;

- default :

- NOTREACHED() <<

- "Not a blockable mime type. This mime type shouldn't reach here.";

- break;

- }

- const CommandLine& command_line = *CommandLine::ForCurrentProcess();

- if (!command_line.HasSwitch(switches::kBlockCrossSiteDocuments))

- result = false;

- (*result_map)[request_id] = result;

- if (result) {

- alternative_data->erase();

- alternative_data->insert(0, " ");

- LOG(ERROR) << resp_data.response_url

- << " is blocked as an illegal cross-site document from "

- << resp_data.frame_origin;

- }

- return result;

-#undef SITE_ISOLATION_POLICY_COUNT_NOTBLOCK

-#undef SITE_ISOLATION_POLICY_SNIFF_AND_COUNT

-#undef SITE_ISOLATION_POLICY_COUNT_BLOCK

-void SiteIsolationPolicy::OnRequestComplete(int request_id) {

- RequestIdToMetaDataMap* metadata_map = GetRequestIdToMetaDataMap();

- RequestIdToResultMap* result_map = GetRequestIdToResultMap();

- metadata_map->erase(request_id);

- result_map->erase(request_id);

-SiteIsolationPolicy::ResponseMetaData::CanonicalMimeType

-SiteIsolationPolicy::GetCanonicalMimeType(const std::string& mime_type) {

- if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {

- return SiteIsolationPolicy::ResponseMetaData::HTML;

- }

- if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {

- return SiteIsolationPolicy::ResponseMetaData::Plain;

- }

- if (LowerCaseEqualsASCII(mime_type, kAppJson) ||

- LowerCaseEqualsASCII(mime_type, kTextJson) ||

- LowerCaseEqualsASCII(mime_type, kTextXjson)) {

- return SiteIsolationPolicy::ResponseMetaData::JSON;

- }

- if (LowerCaseEqualsASCII(mime_type, kTextXml) ||

- LowerCaseEqualsASCII(mime_type, xAppRssXml) ||

- LowerCaseEqualsASCII(mime_type, kAppXml)) {

- return SiteIsolationPolicy::ResponseMetaData::XML;

- }

- return SiteIsolationPolicy::ResponseMetaData::Others;

-bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) {

- // We exclude ftp:// from here. FTP doesn't provide a Content-Type

- // header which our policy depends on, so we cannot protect any

- // document from FTP servers.

- return url.SchemeIs("http") || url.SchemeIs("https");

-bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin,

- const GURL& response_url) {

- if (!frame_origin.is_valid() || !response_url.is_valid())

- return false;

- if (frame_origin.scheme() != response_url.scheme())

- return false;

- // SameDomainOrHost() extracts the effective domains (public suffix plus one)

- // from the two URLs and compare them.

- // TODO(dsjang): use INCLUDE_PRIVATE_REGISTRIES when http://crbug.com/7988 is

- // fixed.

- return net::registry_controlled_domains::SameDomainOrHost(

- frame_origin,

- response_url,

- net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);

-bool SiteIsolationPolicy::IsFrameNavigating(WebKit::WebFrame* frame) {

- // When a navigation starts, frame->provisionalDataSource() is set

- // to a not-null value which stands for the request made for the

- // navigation. As soon as the network request is committed to the

- // frame, frame->provisionalDataSource() is converted to null, and

- // the committed data source is moved to frame->dataSource(). This

- // is the most reliable way to detect whether the frame is in

- // navigation or not.

- return frame->provisionalDataSource() != NULL;

-// We don't use Webkit's existing CORS policy implementation since

-// their policy works in terms of origins, not sites. For example,

-// when frame is sub.a.com and it is not allowed to access a document

-// with sub1.a.com. But under Site Isolation, it's allowed.

-bool SiteIsolationPolicy::IsValidCorsHeaderSet(

- GURL& frame_origin,

- GURL& website_origin,

- std::string access_control_origin) {

- // Many websites are sending back "\"*\"" instead of "*". This is

- // non-standard practice, and not supported by Chrome. Refer to

- // CrossOriginAccessControl::passesAccessControlCheck().

- // TODO(dsjang): * is not allowed for the response from a request

- // with cookies. This allows for more than what the renderer will

- // eventually be able to receive, so we won't see illegal cross-site

- // documents allowed by this. We have to find a way to see if this

- // response is from a cookie-tagged request or not in the future.

- if (access_control_origin == "*")

- return true;

- // TODO(dsjang): The CORS spec only treats a fully specified URL, except for

- // "*", but many websites are using just a domain for access_control_origin,

- // and this is blocked by Webkit's CORS logic here :

- // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set

- // is_valid() to false when it is created from a URL containing * in the

- // domain part.

- GURL cors_origin(access_control_origin);

- return IsSameSite(frame_origin, cors_origin);

-// This function is a slight modification of |net::SniffForHTML|.

-bool SiteIsolationPolicy::SniffForHTML(const char* data, size_t length) {

- // The content sniffer used by Chrome and Firefox are using "<!--"

- // as one of the HTML signatures, but it also appears in valid

- // JavaScript, considered as well-formed JS by the browser. Since

- // we do not want to block any JS, we exclude it from our HTML

- // signatures. This can weaken our document block policy, but we can

- // break less websites.

- // TODO(dsjang): parameterize |net::SniffForHTML| with an option

- // that decides whether to include <!-- or not, so that we can

- // remove this function.

- const char* html_signatures[] = {"<!DOCTYPE html", // HTML5 spec

- "<script", // HTML5 spec, Mozilla

- "<html", // HTML5 spec, Mozilla

- "<head", // HTML5 spec, Mozilla

- "<iframe", // Mozilla

- "<h1", // Mozilla

- "<div", // Mozilla

- "<font", // Mozilla

- "<table", // Mozilla

- "<a", // Mozilla

- "<style", // Mozilla

- "<title", // Mozilla

- "<b", // Mozilla

- "<body", // Mozilla

- "<br", "<p", // Mozilla

- "<?xml" // Mozilla

- };

- if (MatchesSignature(

- data, length, html_signatures, arraysize(html_signatures)))

- return true;

- // "" pair for

- // comments.

- const char* comment_begins[] = {"<!--" };

- if (MatchesSignature(

- data, length, comment_begins, arraysize(comment_begins))) {

- // Search for --> and do SniffForHTML after that. If we can find the

- // comment's end, we start HTML sniffing from there again.

- const char end_comment[] = "-->";

- const size_t end_comment_size = strlen(end_comment);

- for (size_t i = 0; i <= length - end_comment_size; ++i) {

- if (!strncmp(data + i, end_comment, end_comment_size)) {

- size_t skipped = i + end_comment_size;

- return SniffForHTML(data + skipped, length - skipped);

- }

- return false;

-bool SiteIsolationPolicy::SniffForXML(const char* data, size_t length) {

- // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for

- // this signature. However, XML is case-sensitive. Don't we have to

- // be more lenient only to block documents starting with the exact

- // string <?xml rather than <?XML ?

- const char* xml_signatures[] = {"<?xml" // Mozilla

- };

- return MatchesSignature(

- data, length, xml_signatures, arraysize(xml_signatures));

-bool SiteIsolationPolicy::SniffForJSON(const char* data, size_t length) {

- // TODO(dsjang): We have to come up with a better way to sniff

- // JSON. However, even RE cannot help us that much due to the fact

- // that we don't do full parsing. This DFA starts with state 0, and

- // finds {, "/' and : in that order. We're avoiding adding a

- // dependency on a regular expression library.

- const int kInitState = 0;

- const int kLeftBraceState = 1;

- const int kLeftQuoteState = 2;

- const int kColonState = 3;

- const int kDeadState = 4;

- int state = kInitState;

- for (size_t i = 0; i < length && state < kColonState; ++i) {

- const char c = data[i];

- if (c == ' ' || c == '\t' || c == '\r' || c == '\n')

- continue;

- switch (state) {

- case kInitState:

- if (c == '{')

- state = kLeftBraceState;

- else

- state = kDeadState;

- break;

- case kLeftBraceState:

- if (c == '\"' || c == '\'')

- state = kLeftQuoteState;

- else

- state = kDeadState;

- break;

- case kLeftQuoteState:

- if (c == ':')

- state = kColonState;

- break;

- default:

- NOTREACHED();

- break;

- }

- return state == kColonState;

-bool SiteIsolationPolicy::MatchesSignature(const char* raw_data,

- size_t raw_length,

- const char* signatures[],

- size_t arr_size) {

- size_t start = 0;

- // Skip white characters at the beginning of the document.

- for (start = 0; start < raw_length; ++start) {

- char c = raw_data[start];

- if (!(c == ' ' || c == '\t' || c == '\r' || c == '\n'))

- break;

- }

- // There is no not-whitespace character in this document.

- if (!(start < raw_length))

- return false;

- const char* data = raw_data + start;

- size_t length = raw_length - start;

- for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {

- const char* signature = signatures[sig_index];

- size_t signature_length = strlen(signature);

- if (length < signature_length)

- continue;

- if (!base::strncasecmp(signature, data, signature_length))

- return true;

- }

- return false;

-bool SiteIsolationPolicy::IsRenderableStatusCodeForDocument(int status_code) {

- // Chrome only uses the content of a response with one of these status codes

- // for CSS/JavaScript. For images, Chrome just ignores status code.

- const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, 301, 302,

- 303, 305, 306, 307};

- for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {

- if (renderable_status_code[i] == status_code)

- return true;

- }

- return false;

-bool SiteIsolationPolicy::SniffForJS(const char* data, size_t length) {

- // TODO(dsjang): This is a real hack. The only purpose of this function is to

- // try to see if there's any possibility that this data can be JavaScript

- // (superset of JS). This function will be removed once UMA stats are

- // gathered.

- // Search for "var " for JS detection.

- for (size_t i = 0; i < length - 3; ++i) {

- if (strncmp(data + i, "var ", 4) == 0)

- return true;

- }

- return false;

-SiteIsolationPolicy::RequestIdToMetaDataMap*

-SiteIsolationPolicy::GetRequestIdToMetaDataMap() {

- CR_DEFINE_STATIC_LOCAL(RequestIdToMetaDataMap, metadata_map_, ());

- return &metadata_map_;

-SiteIsolationPolicy::RequestIdToResultMap*

-SiteIsolationPolicy::GetRequestIdToResultMap() {

- CR_DEFINE_STATIC_LOCAL(RequestIdToResultMap, result_map_, ());

- return &result_map_;

-} // namespace content

« no previous file with comments | « content/child/site_isolation_policy.h ('k') | content/child/site_isolation_policy_browsertest.cc » ('j') | no next file with comments »