Index: content/child/site_isolation_stats_gatherer.cc |
diff --git a/content/child/site_isolation_policy.cc b/content/child/site_isolation_stats_gatherer.cc |
similarity index 52% |
copy from content/child/site_isolation_policy.cc |
copy to content/child/site_isolation_stats_gatherer.cc |
index 0f9fc61ff26f93dd0be8b1c4f1587218d6d0273c..c91847984db7f3e4a629f93de197f558416f63af 100644 |
--- a/content/child/site_isolation_policy.cc |
+++ b/content/child/site_isolation_stats_gatherer.cc |
@@ -1,22 +1,15 @@ |
-// Copyright 2013 The Chromium Authors. All rights reserved. |
+// Copyright 2015 The Chromium Authors. All rights reserved. |
// Use of this source code is governed by a BSD-style license that can be |
// found in the LICENSE file. |
-#include "content/child/site_isolation_policy.h" |
+#include "content/child/site_isolation_stats_gatherer.h" |
-#include "base/basictypes.h" |
-#include "base/command_line.h" |
-#include "base/lazy_instance.h" |
-#include "base/logging.h" |
#include "base/metrics/histogram.h" |
+#include "base/strings/string_piece.h" |
#include "base/strings/string_util.h" |
-#include "content/public/common/content_switches.h" |
#include "content/public/common/resource_response_info.h" |
-#include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
#include "net/http/http_response_headers.h" |
-using base::StringPiece; |
- |
namespace content { |
namespace { |
@@ -25,18 +18,6 @@ namespace { |
// only activated in renderer processes. |
static bool g_stats_gathering_enabled = false; |
-// MIME types |
-const char kTextHtml[] = "text/html"; |
-const char kTextXml[] = "text/xml"; |
-const char xAppRssXml[] = "application/rss+xml"; |
-const char kAppXml[] = "application/xml"; |
-const char kAppJson[] = "application/json"; |
-const char kTextJson[] = "text/json"; |
-const char kTextXjson[] = "text/x-json"; |
-const char kTextPlain[] = "text/plain"; |
- |
-// TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted |
-// when this class is used for actual blocking. |
bool IsRenderableStatusCode(int status_code) { |
// Chrome only uses the content of a response with one of these status codes |
// for CSS/JavaScript. For images, Chrome just ignores status code. |
@@ -49,30 +30,6 @@ bool IsRenderableStatusCode(int status_code) { |
return false; |
} |
-bool MatchesSignature(StringPiece data, |
- const StringPiece signatures[], |
- size_t arr_size) { |
- size_t offset = data.find_first_not_of(" \t\r\n"); |
- // There is no not-whitespace character in this document. |
- if (offset == base::StringPiece::npos) |
- return false; |
- |
- data.remove_prefix(offset); |
- size_t length = data.length(); |
- |
- for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { |
- const StringPiece& signature = signatures[sig_index]; |
- size_t signature_length = signature.length(); |
- if (length < signature_length) |
- continue; |
- |
- if (LowerCaseEqualsASCII(data.begin(), data.begin() + signature_length, |
- signature.data())) |
- return true; |
- } |
- return false; |
-} |
- |
void IncrementHistogramCount(const std::string& name) { |
// The default value of min, max, bucket_count are copied from histogram.h. |
base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet( |
@@ -210,7 +167,7 @@ bool SiteIsolationStatsGatherer::OnReceivedFirstChunk( |
DCHECK(resp_data.get()); |
- StringPiece data(raw_data, raw_length); |
+ base::StringPiece data(raw_data, raw_length); |
// Record the length of the first received chunk of data to see if it's enough |
// for sniffing. |
@@ -290,200 +247,7 @@ bool SiteIsolationStatsGatherer::OnReceivedFirstChunk( |
return would_block; |
} |
-CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType( |
- const std::string& mime_type) { |
- if (LowerCaseEqualsASCII(mime_type, kTextHtml)) { |
- return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML; |
- } |
- |
- if (LowerCaseEqualsASCII(mime_type, kTextPlain)) { |
- return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN; |
- } |
- |
- if (LowerCaseEqualsASCII(mime_type, kAppJson) || |
- LowerCaseEqualsASCII(mime_type, kTextJson) || |
- LowerCaseEqualsASCII(mime_type, kTextXjson)) { |
- return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON; |
- } |
- |
- if (LowerCaseEqualsASCII(mime_type, kTextXml) || |
- LowerCaseEqualsASCII(mime_type, xAppRssXml) || |
- LowerCaseEqualsASCII(mime_type, kAppXml)) { |
- return CROSS_SITE_DOCUMENT_MIME_TYPE_XML; |
- } |
- |
- return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS; |
-} |
- |
-bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) { |
- // We exclude ftp:// from here. FTP doesn't provide a Content-Type |
- // header which our policy depends on, so we cannot protect any |
- // document from FTP servers. |
- return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme); |
-} |
- |
-bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin, |
- const GURL& response_url) { |
- if (!frame_origin.is_valid() || !response_url.is_valid()) |
- return false; |
- |
- if (frame_origin.scheme() != response_url.scheme()) |
- return false; |
- |
- // SameDomainOrHost() extracts the effective domains (public suffix plus one) |
- // from the two URLs and compare them. |
- return net::registry_controlled_domains::SameDomainOrHost( |
- frame_origin, response_url, |
- net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); |
-} |
- |
-// We don't use Webkit's existing CORS policy implementation since |
-// their policy works in terms of origins, not sites. For example, |
-// when frame is sub.a.com and it is not allowed to access a document |
-// with sub1.a.com. But under Site Isolation, it's allowed. |
-bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet( |
- const GURL& frame_origin, |
- const GURL& website_origin, |
- const std::string& access_control_origin) { |
- // Many websites are sending back "\"*\"" instead of "*". This is |
- // non-standard practice, and not supported by Chrome. Refer to |
- // CrossOriginAccessControl::passesAccessControlCheck(). |
- |
- // TODO(dsjang): * is not allowed for the response from a request |
- // with cookies. This allows for more than what the renderer will |
- // eventually be able to receive, so we won't see illegal cross-site |
- // documents allowed by this. We have to find a way to see if this |
- // response is from a cookie-tagged request or not in the future. |
- if (access_control_origin == "*") |
- return true; |
- |
- // TODO(dsjang): The CORS spec only treats a fully specified URL, except for |
- // "*", but many websites are using just a domain for access_control_origin, |
- // and this is blocked by Webkit's CORS logic here : |
- // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set |
- // is_valid() to false when it is created from a URL containing * in the |
- // domain part. |
- |
- GURL cors_origin(access_control_origin); |
- return IsSameSite(frame_origin, cors_origin); |
-} |
- |
-// This function is a slight modification of |net::SniffForHTML|. |
-bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) { |
- // The content sniffer used by Chrome and Firefox are using "<!--" |
- // as one of the HTML signatures, but it also appears in valid |
- // JavaScript, considered as well-formed JS by the browser. Since |
- // we do not want to block any JS, we exclude it from our HTML |
- // signatures. This can weaken our document block policy, but we can |
- // break less websites. |
- // TODO(dsjang): parameterize |net::SniffForHTML| with an option |
- // that decides whether to include <!-- or not, so that we can |
- // remove this function. |
- // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser |
- // process, we should do single-thread checking here for the static |
- // initializer. |
- static const StringPiece kHtmlSignatures[] = { |
- StringPiece("<!DOCTYPE html"), // HTML5 spec |
- StringPiece("<script"), // HTML5 spec, Mozilla |
- StringPiece("<html"), // HTML5 spec, Mozilla |
- StringPiece("<head"), // HTML5 spec, Mozilla |
- StringPiece("<iframe"), // Mozilla |
- StringPiece("<h1"), // Mozilla |
- StringPiece("<div"), // Mozilla |
- StringPiece("<font"), // Mozilla |
- StringPiece("<table"), // Mozilla |
- StringPiece("<a"), // Mozilla |
- StringPiece("<style"), // Mozilla |
- StringPiece("<title"), // Mozilla |
- StringPiece("<b"), // Mozilla |
- StringPiece("<body"), // Mozilla |
- StringPiece("<br"), // Mozilla |
- StringPiece("<p"), // Mozilla |
- StringPiece("<?xml") // Mozilla |
- }; |
- |
- while (data.length() > 0) { |
- if (MatchesSignature(data, kHtmlSignatures, arraysize(kHtmlSignatures))) |
- return true; |
- |
- // If we cannot find "<!--", we fail sniffing this as HTML. |
- static const StringPiece kCommentBegins[] = {StringPiece("<!--")}; |
- if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins))) |
- break; |
- |
- // Search for --> and do SniffForHTML after that. If we can find the |
- // comment's end, we start HTML sniffing from there again. |
- static const char kEndComment[] = "-->"; |
- size_t offset = data.find(kEndComment); |
- if (offset == base::StringPiece::npos) |
- break; |
- |
- // Proceed to the index next to the ending comment (-->). |
- data.remove_prefix(offset + strlen(kEndComment)); |
- } |
- |
- return false; |
-} |
- |
-bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) { |
- // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for |
- // this signature. However, XML is case-sensitive. Don't we have to |
- // be more lenient only to block documents starting with the exact |
- // string <?xml rather than <?XML ? |
- // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser |
- // process, we should do single-thread checking here for the static |
- // initializer. |
- static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")}; |
- return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures)); |
-} |
- |
-bool CrossSiteDocumentClassifier::SniffForJSON(base::StringPiece data) { |
- // TODO(dsjang): We have to come up with a better way to sniff |
- // JSON. However, even RE cannot help us that much due to the fact |
- // that we don't do full parsing. This DFA starts with state 0, and |
- // finds {, "/' and : in that order. We're avoiding adding a |
- // dependency on a regular expression library. |
- enum { |
- kStartState, |
- kLeftBraceState, |
- kLeftQuoteState, |
- kColonState, |
- kTerminalState, |
- } state = kStartState; |
- |
- size_t length = data.length(); |
- for (size_t i = 0; i < length && state < kColonState; ++i) { |
- const char c = data[i]; |
- if (c == ' ' || c == '\t' || c == '\r' || c == '\n') |
- continue; |
- |
- switch (state) { |
- case kStartState: |
- if (c == '{') |
- state = kLeftBraceState; |
- else |
- state = kTerminalState; |
- break; |
- case kLeftBraceState: |
- if (c == '\"' || c == '\'') |
- state = kLeftQuoteState; |
- else |
- state = kTerminalState; |
- break; |
- case kLeftQuoteState: |
- if (c == ':') |
- state = kColonState; |
- break; |
- case kColonState: |
- case kTerminalState: |
- NOTREACHED(); |
- break; |
- } |
- } |
- return state == kColonState; |
-} |
- |
-bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) { |
+bool SiteIsolationStatsGatherer::SniffForJS(base::StringPiece data) { |
// The purpose of this function is to try to see if there's any possibility |
// that this data can be JavaScript (superset of JS). Search for "var " for JS |
// detection. This is a real hack and should only be used for stats gathering. |