content/child/site_isolation_policy.cc - Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files

Unified Diff: content/child/site_isolation_policy.cc

Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@rename_policy_to_sniffer3

Patch Set: Pull Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « content/child/site_isolation_policy.h ('k') | content/child/site_isolation_policy_browsertest.cc » ('j') | content/child/site_isolation_stats_gatherer_browsertest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: content/child/site_isolation_policy.cc

diff --git a/content/child/site_isolation_policy.cc b/content/child/site_isolation_policy.cc

deleted file mode 100644

index 0f9fc61ff26f93dd0be8b1c4f1587218d6d0273c..0000000000000000000000000000000000000000

--- a/content/child/site_isolation_policy.cc

+++ /dev/null

@@ -1,493 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include "content/child/site_isolation_policy.h"

-#include "base/basictypes.h"

-#include "base/command_line.h"

-#include "base/lazy_instance.h"

-#include "base/logging.h"

-#include "base/metrics/histogram.h"

-#include "base/strings/string_util.h"

-#include "content/public/common/content_switches.h"

-#include "content/public/common/resource_response_info.h"

-#include "net/base/registry_controlled_domains/registry_controlled_domain.h"

-#include "net/http/http_response_headers.h"

-using base::StringPiece;

-namespace content {

-namespace {

-// The gathering of UMA stats for site isolation is deactivated by default, and

-// only activated in renderer processes.

-static bool g_stats_gathering_enabled = false;

-// MIME types

-const char kTextHtml[] = "text/html";

-const char kTextXml[] = "text/xml";

-const char xAppRssXml[] = "application/rss+xml";

-const char kAppXml[] = "application/xml";

-const char kAppJson[] = "application/json";

-const char kTextJson[] = "text/json";

-const char kTextXjson[] = "text/x-json";

-const char kTextPlain[] = "text/plain";

-// TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted

-// when this class is used for actual blocking.

-bool IsRenderableStatusCode(int status_code) {

- // Chrome only uses the content of a response with one of these status codes

- // for CSS/JavaScript. For images, Chrome just ignores status code.

- const int renderable_status_code[] = {

- 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};

- for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {

- if (renderable_status_code[i] == status_code)

- return true;

- }

- return false;

-bool MatchesSignature(StringPiece data,

- const StringPiece signatures[],

- size_t arr_size) {

- size_t offset = data.find_first_not_of(" \t\r\n");

- // There is no not-whitespace character in this document.

- if (offset == base::StringPiece::npos)

- return false;

- data.remove_prefix(offset);

- size_t length = data.length();

- for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {

- const StringPiece& signature = signatures[sig_index];

- size_t signature_length = signature.length();

- if (length < signature_length)

- continue;

- if (LowerCaseEqualsASCII(data.begin(), data.begin() + signature_length,

- signature.data()))

- return true;

- }

- return false;

-void IncrementHistogramCount(const std::string& name) {

- // The default value of min, max, bucket_count are copied from histogram.h.

- base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(

- name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);

- histogram_pointer->Add(1);

-void IncrementHistogramEnum(const std::string& name,

- uint32 sample,

- uint32 boundary_value) {

- // The default value of min, max, bucket_count are copied from histogram.h.

- base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet(

- name, 1, boundary_value, boundary_value + 1,

- base::HistogramBase::kUmaTargetedHistogramFlag);

- histogram_pointer->Add(sample);

-void HistogramCountBlockedResponse(

- const std::string& bucket_prefix,

- const linked_ptr<SiteIsolationResponseMetaData>& resp_data,

- bool nosniff_block) {

- std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked");

- IncrementHistogramCount(bucket_prefix + block_label);

- // The content is blocked if it is sniffed as HTML/JSON/XML. When

- // the blocked response is with an error status code, it is not

- // disruptive for the following reasons : 1) the blocked content is

- // not a binary object (such as an image) since it is sniffed as

- // text; 2) then, this blocking only breaks the renderer behavior

- // only if it is either JavaScript or CSS. However, the renderer

- // doesn't use the contents of JS/CSS with unaffected status code

- // (e.g, 404). 3) the renderer is expected not to use the cross-site

- // document content for purposes other than JS/CSS (e.g, XHR).

- bool renderable_status_code =

- IsRenderableStatusCode(resp_data->http_status_code);

- if (renderable_status_code) {

- IncrementHistogramEnum(

- bucket_prefix + block_label + ".RenderableStatusCode",

- resp_data->resource_type, RESOURCE_TYPE_LAST_TYPE);

- } else {

- IncrementHistogramCount(bucket_prefix + block_label +

- ".NonRenderableStatusCode");

- }

-void HistogramCountNotBlockedResponse(const std::string& bucket_prefix,

- bool sniffed_as_js) {

- IncrementHistogramCount(bucket_prefix + ".NotBlocked");

- if (sniffed_as_js)

- IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS");

-} // namespace

-SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {

-void SiteIsolationStatsGatherer::SetEnabled(bool enabled) {

- g_stats_gathering_enabled = enabled;

-linked_ptr<SiteIsolationResponseMetaData>

-SiteIsolationStatsGatherer::OnReceivedResponse(

- const GURL& frame_origin,

- const GURL& response_url,

- ResourceType resource_type,

- int origin_pid,

- const ResourceResponseInfo& info) {

- if (!g_stats_gathering_enabled)

- return linked_ptr<SiteIsolationResponseMetaData>();

- // if |origin_pid| is non-zero, it means that this response is for a plugin

- // spawned from this renderer process. We exclude responses for plugins for

- // now, but eventually, we're going to make plugin processes directly talk to

- // the browser process so that we don't apply cross-site document blocking to

- // them.

- if (origin_pid)

- return linked_ptr<SiteIsolationResponseMetaData>();

- UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);

- // See if this is for navigation. If it is, don't block it, under the

- // assumption that we will put it in an appropriate process.

- if (IsResourceTypeFrame(resource_type))

- return linked_ptr<SiteIsolationResponseMetaData>();

- if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url))

- return linked_ptr<SiteIsolationResponseMetaData>();

- if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url))

- return linked_ptr<SiteIsolationResponseMetaData>();

- CrossSiteDocumentMimeType canonical_mime_type =

- CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type);

- if (canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS)

- return linked_ptr<SiteIsolationResponseMetaData>();

- // Every CORS request should have the Access-Control-Allow-Origin header even

- // if it is preceded by a pre-flight request. Therefore, if this is a CORS

- // request, it has this header. response.httpHeaderField() internally uses

- // case-insensitive matching for the header name.

- std::string access_control_origin;

- // We can use a case-insensitive header name for EnumerateHeader().

- info.headers->EnumerateHeader(NULL, "access-control-allow-origin",

- &access_control_origin);

- if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet(

- frame_origin, response_url, access_control_origin))

- return linked_ptr<SiteIsolationResponseMetaData>();

- // Real XSD data collection starts from here.

- std::string no_sniff;

- info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);

- linked_ptr<SiteIsolationResponseMetaData> resp_data(

- new SiteIsolationResponseMetaData);

- resp_data->frame_origin = frame_origin.spec();

- resp_data->response_url = response_url;

- resp_data->resource_type = resource_type;

- resp_data->canonical_mime_type = canonical_mime_type;

- resp_data->http_status_code = info.headers->response_code();

- resp_data->no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff");

- return resp_data;

-bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(

- const linked_ptr<SiteIsolationResponseMetaData>& resp_data,

- const char* raw_data,

- int raw_length) {

- if (!g_stats_gathering_enabled)

- return false;

- DCHECK(resp_data.get());

- StringPiece data(raw_data, raw_length);

- // Record the length of the first received chunk of data to see if it's enough

- // for sniffing.

- UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);

- // Record the number of cross-site document responses with a specific mime

- // type (text/html, text/xml, etc).

- UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",

- resp_data->canonical_mime_type,

- CROSS_SITE_DOCUMENT_MIME_TYPE_MAX);

- // Store the result of cross-site document blocking analysis.

- bool would_block = false;

- bool sniffed_as_js = SniffForJS(data);

- // Record the number of responses whose content is sniffed for what its mime

- // type claims it to be. For example, we apply a HTML sniffer for a document

- // tagged with text/html here. Whenever this check becomes true, we'll block

- // the response.

- if (resp_data->canonical_mime_type != CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN) {

- std::string bucket_prefix;

- bool sniffed_as_target_document = false;

- if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_HTML) {

- bucket_prefix = "SiteIsolation.XSD.HTML";

- sniffed_as_target_document =

- CrossSiteDocumentClassifier::SniffForHTML(data);

- } else if (resp_data->canonical_mime_type ==

- CROSS_SITE_DOCUMENT_MIME_TYPE_XML) {

- bucket_prefix = "SiteIsolation.XSD.XML";

- sniffed_as_target_document =

- CrossSiteDocumentClassifier::SniffForXML(data);

- } else if (resp_data->canonical_mime_type ==

- CROSS_SITE_DOCUMENT_MIME_TYPE_JSON) {

- bucket_prefix = "SiteIsolation.XSD.JSON";

- sniffed_as_target_document =

- CrossSiteDocumentClassifier::SniffForJSON(data);

- } else {

- NOTREACHED() << "Not a blockable mime type: "

- << resp_data->canonical_mime_type;

- }

- if (sniffed_as_target_document) {

- would_block = true;

- HistogramCountBlockedResponse(bucket_prefix, resp_data, false);

- } else {

- if (resp_data->no_sniff) {

- would_block = true;

- HistogramCountBlockedResponse(bucket_prefix, resp_data, true);

- } else {

- HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js);

- }

- } else {

- // This block is for plain text documents. We apply our HTML, XML,

- // and JSON sniffer to a text document in the order, and block it

- // if any of them succeeds in sniffing.

- std::string bucket_prefix;

- if (CrossSiteDocumentClassifier::SniffForHTML(data))

- bucket_prefix = "SiteIsolation.XSD.Plain.HTML";

- else if (CrossSiteDocumentClassifier::SniffForXML(data))

- bucket_prefix = "SiteIsolation.XSD.Plain.XML";

- else if (CrossSiteDocumentClassifier::SniffForJSON(data))

- bucket_prefix = "SiteIsolation.XSD.Plain.JSON";

- if (bucket_prefix.size() > 0) {

- would_block = true;

- HistogramCountBlockedResponse(bucket_prefix, resp_data, false);

- } else if (resp_data->no_sniff) {

- would_block = true;

- HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);

- } else {

- HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",

- sniffed_as_js);

- }

- return would_block;

-CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType(

- const std::string& mime_type) {

- if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {

- return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML;

- }

- if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {

- return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN;

- }

- if (LowerCaseEqualsASCII(mime_type, kAppJson) ||

- LowerCaseEqualsASCII(mime_type, kTextJson) ||

- LowerCaseEqualsASCII(mime_type, kTextXjson)) {

- return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON;

- }

- if (LowerCaseEqualsASCII(mime_type, kTextXml) ||

- LowerCaseEqualsASCII(mime_type, xAppRssXml) ||

- LowerCaseEqualsASCII(mime_type, kAppXml)) {

- return CROSS_SITE_DOCUMENT_MIME_TYPE_XML;

- }

- return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS;

-bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) {

- // We exclude ftp:// from here. FTP doesn't provide a Content-Type

- // header which our policy depends on, so we cannot protect any

- // document from FTP servers.

- return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme);

-bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin,

- const GURL& response_url) {

- if (!frame_origin.is_valid() || !response_url.is_valid())

- return false;

- if (frame_origin.scheme() != response_url.scheme())

- return false;

- // SameDomainOrHost() extracts the effective domains (public suffix plus one)

- // from the two URLs and compare them.

- return net::registry_controlled_domains::SameDomainOrHost(

- frame_origin, response_url,

- net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);

-// We don't use Webkit's existing CORS policy implementation since

-// their policy works in terms of origins, not sites. For example,

-// when frame is sub.a.com and it is not allowed to access a document

-// with sub1.a.com. But under Site Isolation, it's allowed.

-bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet(

- const GURL& frame_origin,

- const GURL& website_origin,

- const std::string& access_control_origin) {

- // Many websites are sending back "\"*\"" instead of "*". This is

- // non-standard practice, and not supported by Chrome. Refer to

- // CrossOriginAccessControl::passesAccessControlCheck().

- // TODO(dsjang): * is not allowed for the response from a request

- // with cookies. This allows for more than what the renderer will

- // eventually be able to receive, so we won't see illegal cross-site

- // documents allowed by this. We have to find a way to see if this

- // response is from a cookie-tagged request or not in the future.

- if (access_control_origin == "*")

- return true;

- // TODO(dsjang): The CORS spec only treats a fully specified URL, except for

- // "*", but many websites are using just a domain for access_control_origin,

- // and this is blocked by Webkit's CORS logic here :

- // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set

- // is_valid() to false when it is created from a URL containing * in the

- // domain part.

- GURL cors_origin(access_control_origin);

- return IsSameSite(frame_origin, cors_origin);

-// This function is a slight modification of |net::SniffForHTML|.

-bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) {

- // The content sniffer used by Chrome and Firefox are using "<!--"

- // as one of the HTML signatures, but it also appears in valid

- // JavaScript, considered as well-formed JS by the browser. Since

- // we do not want to block any JS, we exclude it from our HTML

- // signatures. This can weaken our document block policy, but we can

- // break less websites.

- // TODO(dsjang): parameterize |net::SniffForHTML| with an option

- // that decides whether to include <!-- or not, so that we can

- // remove this function.

- // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser

- // process, we should do single-thread checking here for the static

- // initializer.

- static const StringPiece kHtmlSignatures[] = {

- StringPiece("<!DOCTYPE html"), // HTML5 spec

- StringPiece("<script"), // HTML5 spec, Mozilla

- StringPiece("<html"), // HTML5 spec, Mozilla

- StringPiece("<head"), // HTML5 spec, Mozilla

- StringPiece("<iframe"), // Mozilla

- StringPiece("<h1"), // Mozilla

- StringPiece("<div"), // Mozilla

- StringPiece("<font"), // Mozilla

- StringPiece("<table"), // Mozilla

- StringPiece("<a"), // Mozilla

- StringPiece("<style"), // Mozilla

- StringPiece("<title"), // Mozilla

- StringPiece("<b"), // Mozilla

- StringPiece("<body"), // Mozilla

- StringPiece("<br"), // Mozilla

- StringPiece("<p"), // Mozilla

- StringPiece("<?xml") // Mozilla

- };

- while (data.length() > 0) {

- if (MatchesSignature(data, kHtmlSignatures, arraysize(kHtmlSignatures)))

- return true;

- // If we cannot find "<!--", we fail sniffing this as HTML.

- static const StringPiece kCommentBegins[] = {StringPiece("<!--")};

- if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins)))

- break;

- // Search for --> and do SniffForHTML after that. If we can find the

- // comment's end, we start HTML sniffing from there again.

- static const char kEndComment[] = "-->";

- size_t offset = data.find(kEndComment);

- if (offset == base::StringPiece::npos)

- break;

- // Proceed to the index next to the ending comment (-->).

- data.remove_prefix(offset + strlen(kEndComment));

- }

- return false;

-bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) {

- // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for

- // this signature. However, XML is case-sensitive. Don't we have to

- // be more lenient only to block documents starting with the exact

- // string <?xml rather than <?XML ?

- // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser

- // process, we should do single-thread checking here for the static

- // initializer.

- static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")};

- return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures));

-bool CrossSiteDocumentClassifier::SniffForJSON(base::StringPiece data) {

- // TODO(dsjang): We have to come up with a better way to sniff

- // JSON. However, even RE cannot help us that much due to the fact

- // that we don't do full parsing. This DFA starts with state 0, and

- // finds {, "/' and : in that order. We're avoiding adding a

- // dependency on a regular expression library.

- enum {

- kStartState,

- kLeftBraceState,

- kLeftQuoteState,

- kColonState,

- kTerminalState,

- } state = kStartState;

- size_t length = data.length();

- for (size_t i = 0; i < length && state < kColonState; ++i) {

- const char c = data[i];

- if (c == ' ' || c == '\t' || c == '\r' || c == '\n')

- continue;

- switch (state) {

- case kStartState:

- if (c == '{')

- state = kLeftBraceState;

- else

- state = kTerminalState;

- break;

- case kLeftBraceState:

- if (c == '\"' || c == '\'')

- state = kLeftQuoteState;

- else

- state = kTerminalState;

- break;

- case kLeftQuoteState:

- if (c == ':')

- state = kColonState;

- break;

- case kColonState:

- case kTerminalState:

- NOTREACHED();

- break;

- }

- return state == kColonState;

-bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) {

- // The purpose of this function is to try to see if there's any possibility

- // that this data can be JavaScript (superset of JS). Search for "var " for JS

- // detection. This is a real hack and should only be used for stats gathering.

- return data.find("var ") != base::StringPiece::npos;

-} // namespace content