Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Unified Diff: content/child/site_isolation_stats_gatherer.cc

Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@rename_policy_to_sniffer3
Patch Set: Re-upload. Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/child/site_isolation_stats_gatherer.cc
diff --git a/content/child/site_isolation_policy.cc b/content/child/site_isolation_stats_gatherer.cc
similarity index 51%
copy from content/child/site_isolation_policy.cc
copy to content/child/site_isolation_stats_gatherer.cc
index 5fb003951b65be89011437a53489d28581b35ab5..c51640eb393ac9343c0b066e2ea36e7ae6b66613 100644
--- a/content/child/site_isolation_policy.cc
+++ b/content/child/site_isolation_stats_gatherer.cc
@@ -1,22 +1,15 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "content/child/site_isolation_policy.h"
+#include "content/child/site_isolation_stats_gatherer.h"
-#include "base/basictypes.h"
-#include "base/command_line.h"
-#include "base/lazy_instance.h"
-#include "base/logging.h"
#include "base/metrics/histogram.h"
+#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
-#include "content/public/common/content_switches.h"
#include "content/public/common/resource_response_info.h"
-#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "net/http/http_response_headers.h"
-using base::StringPiece;
-
namespace content {
namespace {
@@ -25,18 +18,6 @@ namespace {
// only activated in renderer processes.
static bool g_stats_gathering_enabled = false;
-// MIME types
-const char kTextHtml[] = "text/html";
-const char kTextXml[] = "text/xml";
-const char xAppRssXml[] = "application/rss+xml";
-const char kAppXml[] = "application/xml";
-const char kAppJson[] = "application/json";
-const char kTextJson[] = "text/json";
-const char kTextXjson[] = "text/x-json";
-const char kTextPlain[] = "text/plain";
-
-// TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted
-// when this class is used for actual blocking.
bool IsRenderableStatusCode(int status_code) {
// Chrome only uses the content of a response with one of these status codes
// for CSS/JavaScript. For images, Chrome just ignores status code.
@@ -49,31 +30,6 @@ bool IsRenderableStatusCode(int status_code) {
return false;
}
-bool MatchesSignature(StringPiece data,
- const StringPiece signatures[],
- size_t arr_size) {
- size_t offset = data.find_first_not_of(" \t\r\n");
- // There is no not-whitespace character in this document.
- if (offset == base::StringPiece::npos)
- return false;
-
- data.remove_prefix(offset);
- size_t length = data.length();
-
- for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {
- const StringPiece& signature = signatures[sig_index];
- size_t signature_length = signature.length();
- if (length < signature_length)
- continue;
-
- if (base::LowerCaseEqualsASCII(data.begin(),
- data.begin() + signature_length,
- signature.data()))
- return true;
- }
- return false;
-}
-
void IncrementHistogramCount(const std::string& name) {
// The default value of min, max, bucket_count are copied from histogram.h.
base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(
@@ -211,7 +167,7 @@ bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(
DCHECK(resp_data.get());
- StringPiece data(raw_data, raw_length);
+ base::StringPiece data(raw_data, raw_length);
// Record the length of the first received chunk of data to see if it's enough
// for sniffing.
@@ -291,200 +247,7 @@ bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(
return would_block;
}
-CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType(
- const std::string& mime_type) {
- if (base::LowerCaseEqualsASCII(mime_type, kTextHtml)) {
- return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML;
- }
-
- if (base::LowerCaseEqualsASCII(mime_type, kTextPlain)) {
- return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN;
- }
-
- if (base::LowerCaseEqualsASCII(mime_type, kAppJson) ||
- base::LowerCaseEqualsASCII(mime_type, kTextJson) ||
- base::LowerCaseEqualsASCII(mime_type, kTextXjson)) {
- return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON;
- }
-
- if (base::LowerCaseEqualsASCII(mime_type, kTextXml) ||
- base::LowerCaseEqualsASCII(mime_type, xAppRssXml) ||
- base::LowerCaseEqualsASCII(mime_type, kAppXml)) {
- return CROSS_SITE_DOCUMENT_MIME_TYPE_XML;
- }
-
- return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS;
-}
-
-bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) {
- // We exclude ftp:// from here. FTP doesn't provide a Content-Type
- // header which our policy depends on, so we cannot protect any
- // document from FTP servers.
- return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme);
-}
-
-bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin,
- const GURL& response_url) {
- if (!frame_origin.is_valid() || !response_url.is_valid())
- return false;
-
- if (frame_origin.scheme() != response_url.scheme())
- return false;
-
- // SameDomainOrHost() extracts the effective domains (public suffix plus one)
- // from the two URLs and compare them.
- return net::registry_controlled_domains::SameDomainOrHost(
- frame_origin, response_url,
- net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
-}
-
-// We don't use Webkit's existing CORS policy implementation since
-// their policy works in terms of origins, not sites. For example,
-// when frame is sub.a.com and it is not allowed to access a document
-// with sub1.a.com. But under Site Isolation, it's allowed.
-bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet(
- const GURL& frame_origin,
- const GURL& website_origin,
- const std::string& access_control_origin) {
- // Many websites are sending back "\"*\"" instead of "*". This is
- // non-standard practice, and not supported by Chrome. Refer to
- // CrossOriginAccessControl::passesAccessControlCheck().
-
- // TODO(dsjang): * is not allowed for the response from a request
- // with cookies. This allows for more than what the renderer will
- // eventually be able to receive, so we won't see illegal cross-site
- // documents allowed by this. We have to find a way to see if this
- // response is from a cookie-tagged request or not in the future.
- if (access_control_origin == "*")
- return true;
-
- // TODO(dsjang): The CORS spec only treats a fully specified URL, except for
- // "*", but many websites are using just a domain for access_control_origin,
- // and this is blocked by Webkit's CORS logic here :
- // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set
- // is_valid() to false when it is created from a URL containing * in the
- // domain part.
-
- GURL cors_origin(access_control_origin);
- return IsSameSite(frame_origin, cors_origin);
-}
-
-// This function is a slight modification of |net::SniffForHTML|.
-bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) {
- // The content sniffer used by Chrome and Firefox are using "<!--"
- // as one of the HTML signatures, but it also appears in valid
- // JavaScript, considered as well-formed JS by the browser. Since
- // we do not want to block any JS, we exclude it from our HTML
- // signatures. This can weaken our document block policy, but we can
- // break less websites.
- // TODO(dsjang): parameterize |net::SniffForHTML| with an option
- // that decides whether to include <!-- or not, so that we can
- // remove this function.
- // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser
- // process, we should do single-thread checking here for the static
- // initializer.
- static const StringPiece kHtmlSignatures[] = {
- StringPiece("<!DOCTYPE html"), // HTML5 spec
- StringPiece("<script"), // HTML5 spec, Mozilla
- StringPiece("<html"), // HTML5 spec, Mozilla
- StringPiece("<head"), // HTML5 spec, Mozilla
- StringPiece("<iframe"), // Mozilla
- StringPiece("<h1"), // Mozilla
- StringPiece("<div"), // Mozilla
- StringPiece("<font"), // Mozilla
- StringPiece("<table"), // Mozilla
- StringPiece("<a"), // Mozilla
- StringPiece("<style"), // Mozilla
- StringPiece("<title"), // Mozilla
- StringPiece("<b"), // Mozilla
- StringPiece("<body"), // Mozilla
- StringPiece("<br"), // Mozilla
- StringPiece("<p"), // Mozilla
- StringPiece("<?xml") // Mozilla
- };
-
- while (data.length() > 0) {
- if (MatchesSignature(data, kHtmlSignatures, arraysize(kHtmlSignatures)))
- return true;
-
- // If we cannot find "<!--", we fail sniffing this as HTML.
- static const StringPiece kCommentBegins[] = {StringPiece("<!--")};
- if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins)))
- break;
-
- // Search for --> and do SniffForHTML after that. If we can find the
- // comment's end, we start HTML sniffing from there again.
- static const char kEndComment[] = "-->";
- size_t offset = data.find(kEndComment);
- if (offset == base::StringPiece::npos)
- break;
-
- // Proceed to the index next to the ending comment (-->).
- data.remove_prefix(offset + strlen(kEndComment));
- }
-
- return false;
-}
-
-bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) {
- // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for
- // this signature. However, XML is case-sensitive. Don't we have to
- // be more lenient only to block documents starting with the exact
- // string <?xml rather than <?XML ?
- // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser
- // process, we should do single-thread checking here for the static
- // initializer.
- static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")};
- return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures));
-}
-
-bool CrossSiteDocumentClassifier::SniffForJSON(base::StringPiece data) {
- // TODO(dsjang): We have to come up with a better way to sniff
- // JSON. However, even RE cannot help us that much due to the fact
- // that we don't do full parsing. This DFA starts with state 0, and
- // finds {, "/' and : in that order. We're avoiding adding a
- // dependency on a regular expression library.
- enum {
- kStartState,
- kLeftBraceState,
- kLeftQuoteState,
- kColonState,
- kTerminalState,
- } state = kStartState;
-
- size_t length = data.length();
- for (size_t i = 0; i < length && state < kColonState; ++i) {
- const char c = data[i];
- if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
- continue;
-
- switch (state) {
- case kStartState:
- if (c == '{')
- state = kLeftBraceState;
- else
- state = kTerminalState;
- break;
- case kLeftBraceState:
- if (c == '\"' || c == '\'')
- state = kLeftQuoteState;
- else
- state = kTerminalState;
- break;
- case kLeftQuoteState:
- if (c == ':')
- state = kColonState;
- break;
- case kColonState:
- case kTerminalState:
- NOTREACHED();
- break;
- }
- }
- return state == kColonState;
-}
-
-bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) {
+bool SiteIsolationStatsGatherer::SniffForJS(base::StringPiece data) {
// The purpose of this function is to try to see if there's any possibility
// that this data can be JavaScript (superset of JS). Search for "var " for JS
// detection. This is a real hack and should only be used for stats gathering.
« no previous file with comments | « content/child/site_isolation_stats_gatherer.h ('k') | content/child/site_isolation_stats_gatherer_browsertest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698