Chromium Code Reviews| Index: webkit/child/site_isolation_policy.cc |
| diff --git a/webkit/child/site_isolation_policy.cc b/webkit/child/site_isolation_policy.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..01f8d615089428572b1744cae7547c9cba2aa1bf |
| --- /dev/null |
| +++ b/webkit/child/site_isolation_policy.cc |
| @@ -0,0 +1,503 @@ |
| +// Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "webkit/child/site_isolation_policy.h" |
| + |
| +#include "base/basictypes.h" |
| +#include "base/logging.h" |
| +#include "base/metrics/histogram.h" |
| +#include "base/strings/string_util.h" |
| +#include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| +#include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h" |
| +#include "third_party/WebKit/public/platform/WebString.h" |
| +#include "third_party/WebKit/public/platform/WebURL.h" |
| +#include "third_party/WebKit/public/platform/WebURLRequest.h" |
| +#include "third_party/WebKit/public/platform/WebURLResponse.h" |
| +#include "third_party/WebKit/public/web/WebDocument.h" |
| +#include "third_party/WebKit/public/web/WebFrame.h" |
| +#include "third_party/WebKit/public/web/WebFrameClient.h" |
| +#include "third_party/WebKit/public/web/WebSecurityOrigin.h" |
| + |
| +using base::strncasecmp; |
| +using WebKit::WebDocument; |
| +using WebKit::WebString; |
| +using WebKit::WebURL; |
| +using WebKit::WebURLResponse; |
| +using WebKit::WebURLRequest; |
| + |
| + |
| +namespace webkit_glue { |
| + |
| +std::map<unsigned, WebURLRequest::TargetType> |
| + SiteIsolationPolicy::id_target_map_; |
| +std::map<std::string, ResponseMetaData> |
| + SiteIsolationPolicy::url_responsedata_map_; |
| +std::map<unsigned, std::string> SiteIsolationPolicy::id_url_map_; |
| + |
| +void SiteIsolationPolicy::WillSendRequest( |
| + unsigned identifier, |
| + WebURLRequest::TargetType target_type) { |
| + // When identifier already exists in the map, it means that this |
| + // request has been redirected to issue another request. We don't |
| + // overwrite the existing target_type since it becomes |
| + // TargetIsSubresource no matter what the original target_type was. |
| + if (!id_target_map_.count(identifier)) |
| + id_target_map_[identifier] = target_type; |
| +} |
| + |
| +void SiteIsolationPolicy::DidReceiveResponse(WebKit::WebFrame* frame, |
| + unsigned identifier, |
| + const WebURLResponse& response) { |
| + DCHECK_EQ(id_target_map_.count(identifier),1U); |
| + |
| + UMA_HISTOGRAM_COUNTS("SiteIsolation.ALL", 1); |
|
Charlie Reis
2013/08/09 18:48:38
ALL -> AllResponses
nasko
2013/08/09 19:07:28
nit: TOTAL?
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
AllResponses seems more informative than TOTAL to
|
| + |
| + GURL response_url = response.url(); |
| + WebURLRequest::TargetType target_type = id_target_map_[identifier]; |
| + id_target_map_.erase(identifier); |
| + |
| + // See if this is for navigation. If it is, don't block it, under |
| + // the assumption that we will put it in an appropriate process. |
| + if (IsFrameNavigating(frame)) { |
| + LOG(INFO) << "SiteIsolationPolicy.FrameInNavigation"; |
| + return; |
| + } |
| + |
| + GURL frame_origin(frame->document().securityOrigin().toString()); |
| + |
| + if (!IsBlockableScheme(frame_origin)) { |
| + LOG(INFO) << "SiteIsolationPolicy.NotNetworkScheme:" << frame_origin; |
| + return; |
| + } |
| + |
| + if (IsSameSite(frame_origin, response_url)) { |
| + LOG(INFO) << "SiteIsolationPolicy.SameSite:" << frame_origin << "," |
| + << response_url; |
| + return; |
| + } |
| + |
| + ResponseMetaData::CanonicalMimeType canonical_mime_type = |
| + GetCanonicalMimeType(response); |
| + |
| + if (canonical_mime_type == ResponseMetaData::Others) { |
| + LOG(INFO) << "SiteIsolationPolicy.mimetype:" << frame_origin << "," |
| + << response_url << ",[" << response.mimeType().utf8() << "]"; |
| + return; |
| + } |
| + |
| + // Every CORS request should have the Access-Control-Allow-Origin |
| + // header even if it is preceded by a pre-flight request. Therefore, |
| + // if this is a CORS request, it has this header. |
| + std::string access_control_origin = response.httpHeaderField( |
| + WebKit::WebString::fromUTF8("Access-Control-Allow-Origin")).utf8(); |
| + |
| + if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) { |
| + LOG(INFO) << "SiteIsolationPolicy.CorsIsSafe:"; |
| + return; |
| + } |
| + |
| + // Real XSD data collection starts from here. |
| + LOG(INFO) << "SiteIsolationPolicy.XSD:from header:" << canonical_mime_type << |
| + ":" << response_url; |
| + |
| + // TODO(dsjang): Apply X-Content-Type option here. |
| + ResponseMetaData resp_data; |
| + resp_data.frame_origin = frame_origin.spec(); |
| + resp_data.response_url = response_url.spec(); |
| + resp_data.request_identifier = identifier; |
| + resp_data.target_type = target_type; |
| + resp_data.canonical_mime_type = canonical_mime_type; |
| + resp_data.http_status_code = response.httpStatusCode(); |
| + |
| + url_responsedata_map_[resp_data.response_url] = resp_data; |
| + id_url_map_[identifier] = resp_data.response_url; |
| + |
| + return; |
| +} |
| + |
| +#define SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \ |
| + UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".Blocked", 1); \ |
| + if (ok_status_code) { \ |
| + UMA_HISTOGRAM_ENUMERATION( \ |
| + ""BUCKET_PREFIX".Blocked.OKStatusCode", \ |
| + resp_data.target_type, \ |
| + WebURLRequest::TargetIsUnspecified + 1); \ |
| + } else { \ |
| + UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".Blocked.ErrorStatusCode", 1); \ |
| + } |
| + |
| +#define SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \ |
| + UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".NotBlocked", 1); \ |
| + if (is_sniffed_for_js) \ |
| + UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".NotBlocked.MaybeJS", 1); \ |
| + |
| +#define SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SNIFF_EXPR,BUCKET_PREFIX) \ |
| + if (SNIFF_EXPR) { \ |
| + SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \ |
| + } else { \ |
| + SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \ |
| + } |
| + |
| +void SiteIsolationPolicy::DidReceiveData(const char* data, |
| + int length, |
| + WebURL& web_response_url) { |
| + // We only record XSDs whose content is actually non-zero. |
|
Charlie Reis
2013/08/09 18:48:38
Make sure XSDs is spelled out somewhere (perhaps i
nasko
2013/08/09 19:07:28
How do we know the content is non-zero based on th
dsjang
2013/08/12 22:56:17
This means that we can't see zero responses here.
dsjang
2013/08/12 22:56:17
We don't know. What I meant was that DidReceiveDat
|
| + GURL response_url(web_response_url); |
| + |
| + std::string response_url_str = response_url.spec(); |
| + if (url_responsedata_map_.count(response_url_str) == 0) |
| + return; |
| + |
| + DCHECK_EQ(url_responsedata_map_.count(response_url_str), 1U); |
| + ResponseMetaData resp_data = url_responsedata_map_[response_url_str]; |
| + url_responsedata_map_.erase(response_url_str); |
| + |
| + // Record the length of the first received network packet to see if |
| + // it's enough for sniffing. |
| + UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", length); |
| + |
| + // Record the entire number of responses with a specific mime |
|
Charlie Reis
2013/08/09 18:48:38
number of XSD reponses
dsjang
2013/08/12 22:56:17
Done.
|
| + // type(text/html, text/xml, etc). |
|
Charlie Reis
2013/08/09 18:48:38
nit: space after type
dsjang
2013/08/12 22:56:17
Done.
|
| + UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType", |
| + resp_data.canonical_mime_type, |
| + ResponseMetaData::MaxCanonicalMimeType); |
| + |
| + // Blocking only happens when the content is sniffed for |
| + // HTML/JSON/XML. So if the status code is an error status code, it |
|
Charlie Reis
2013/08/09 18:48:38
it -> blocking it
dsjang
2013/08/12 22:56:17
Done.
|
| + // is not disruptive by the following reasons : 1) the blocked |
| + // content is not a binary object (such as an image) since it is |
| + // sniffed as a text document. 2) then, this blocking only breaks |
|
Charlie Reis
2013/08/09 18:48:38
nit: Either capitalize each sentence or use a semi
dsjang
2013/08/12 22:56:17
Done.
|
| + // the renderer behavior only if it is either JavaScript or |
| + // CSS. However, the renderer doesn't use the contents of JS/CSS |
| + // with unaffected status code(e.g, 404). *) the renderer is |
|
Charlie Reis
2013/08/09 18:48:38
nit: space after code
Also, why "*)" rather than "
|
| + // expected not to use the cross-site document content for purposes |
| + // other than JS/CSS (e.g, XHR). |
| + bool ok_status_code = !IsErrorStatusCode(resp_data.http_status_code); |
| + |
| + // This is only used for measuring false-negative analysis for |
|
Charlie Reis
2013/08/09 18:48:38
nit: "measuring" and "analysis" are redundant here
dsjang
2013/08/12 22:56:17
Done.
|
| + // non-blocked resources. |
| + bool is_sniffed_for_js = SniffForJS(data, length); |
| + |
| + // Record the number of responses whose content is sniffed for what |
| + // its mime type claims it to be. For example, we apply a HTML |
| + // sniffer for a document tagged with text/html here. Whenever this |
| + // check becomes true, we'll block the response. |
| + switch (resp_data.canonical_mime_type) { |
| + case ResponseMetaData::HTML: |
| + SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForHTML(data, length), |
| + "SiteIsolation.XSD.MimeType.HTML"); |
|
Charlie Reis
2013/08/09 18:48:38
I don't think we need "MimeType" in the stat name
dsjang
2013/08/12 22:56:17
Done.
|
| + break; |
| + case ResponseMetaData::XML: |
| + SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForXML(data, length), |
| + "SiteIsolation.XSD.MimeType.XML"); |
| + break; |
| + case ResponseMetaData::JSON: |
| + SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForJSON(data, length), |
| + "SiteIsolation.XSD.MimeType.JSON"); |
| + break; |
| + case ResponseMetaData::Plain: |
| + if (SniffForHTML(data, length)) { |
| + SITE_ISOLATION_POLICY_COUNT_BLOCK( |
| + "SiteIsolation.XSD.MimeType.Plain.HTML"); |
| + } else if (SniffForXML(data, length)) { |
| + SITE_ISOLATION_POLICY_COUNT_BLOCK( |
| + "SiteIsolation.XSD.MimeType.Plain.XML"); |
| + } else if (SniffForJSON(data, length)) { |
| + SITE_ISOLATION_POLICY_COUNT_BLOCK( |
| + "SiteIsolation.XSD.MimeType.Plain.JSON"); |
| + } else if (is_sniffed_for_js) { |
| + SITE_ISOLATION_POLICY_COUNT_NOTBLOCK( |
| + "SiteIsolation.XSD.MimeType.Plain"); |
| + } |
| + break; |
| + default : |
| + DCHECK(false); |
|
Charlie Reis
2013/08/09 18:48:38
Use NOTREACHED() instead.
nasko
2013/08/09 19:07:28
nit: You can add << "message" to clarify why this
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
This is very useful. Thanks for letting me know th
|
| + break; |
| + } |
| +} |
| + |
| +#undef SITE_ISOLATION_POLICY_COUNT_BLOCK |
| +#undef SITE_ISOLATION_POLICY_COUNT_NOTBLOCK |
| +#undef SITE_ISOLATION_POLICY_SNIFF_AND_COUNT |
|
Charlie Reis
2013/08/09 18:48:38
Probably should undef these in reverse order, sinc
dsjang
2013/08/12 22:56:17
Done.
|
| + |
| +void SiteIsolationPolicy::DidFinishResourceLoad(unsigned identifier) { |
| + id_target_map_.erase(identifier); |
| + if (!id_url_map_.count(identifier)) { |
| + url_responsedata_map_.erase(id_url_map_[identifier]); |
| + id_url_map_.erase(identifier); |
| + } |
| +} |
| + |
| +void SiteIsolationPolicy::DidFinishResourceLoadForUrl( |
| + const WebKit::WebURL& web_response_url) { |
| + GURL response_url(web_response_url); |
| + |
| + if (!url_responsedata_map_.count(response_url.spec())) { |
| + ResponseMetaData meta_data = url_responsedata_map_[response_url.spec()]; |
| + url_responsedata_map_.erase(response_url.spec()); |
| + id_target_map_.erase(meta_data.request_identifier); |
| + id_url_map_.erase(meta_data.request_identifier); |
| + } |
| +} |
| + |
| +ResponseMetaData::CanonicalMimeType SiteIsolationPolicy::GetCanonicalMimeType( |
| + const WebURLResponse& response) { |
| + static const char TEXT_HTML[] = "text/html"; |
|
Charlie Reis
2013/08/09 18:48:38
Constants should be formatted as kTextHtml, etc.
dsjang
2013/08/12 22:56:17
Done.
|
| + static const char TEXT_XML[] = "text/xml"; |
| + static const char APP_RSS_XML[] = "application/rss+xml"; |
| + static const char APP_XML[] = "application/xml"; |
| + static const char APP_JSON[] = "application/json"; |
| + static const char TEXT_XJSON[] = "text/x-json"; |
| + static const char TEXT_JSON[] = "text/json"; |
| + static const char TEXT_PLAIN[] = "text/json"; |
|
Charlie Reis
2013/08/09 18:48:38
Shouldn't this be text/plain?
dsjang
2013/08/12 22:56:17
Done.
|
| + |
| + const std::string mime_type = response.mimeType().utf8(); |
| + |
| + LOG(ERROR) << "mimetype:" << mime_type << "==[" << TEXT_HTML << "]"; |
|
nasko
2013/08/09 19:07:28
Why is this TEXT_HTML there? Wouldn't it always pr
dsjang
2013/08/12 22:56:17
Done.
|
| + |
| + // These are a thorough list of the mime types crawled over the top |
| + // 50k sites related to HTML, XML, JSON, Plain. |
|
Charlie Reis
2013/08/09 18:48:38
This comment belongs above your constants.
dsjang
2013/08/12 22:56:17
Done.
|
| + if (LowerCaseEqualsASCII(mime_type, TEXT_HTML)) { |
| + return ResponseMetaData::HTML; |
| + } else if (LowerCaseEqualsASCII(mime_type, TEXT_XML) || |
|
nasko
2013/08/09 19:07:28
nit: Have you ordered these in order of decreasing
dsjang
2013/08/12 22:56:17
Done.
|
| + LowerCaseEqualsASCII(mime_type, APP_RSS_XML) || |
| + LowerCaseEqualsASCII(mime_type, APP_XML)) { |
| + return ResponseMetaData::XML; |
| + } else if (LowerCaseEqualsASCII(mime_type, APP_JSON) || |
| + LowerCaseEqualsASCII(mime_type, TEXT_XJSON) || |
| + LowerCaseEqualsASCII(mime_type, TEXT_JSON)) { |
| + return ResponseMetaData::JSON; |
| + } else if (LowerCaseEqualsASCII(mime_type, TEXT_PLAIN)) { |
| + return ResponseMetaData::Plain; |
| + } else { |
| + return ResponseMetaData::Others; |
| + } |
| +} |
| + |
| +bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) { |
| + // We exclude ftp:// from here. FTP doesn't provide a Content-Type |
| + // header which our policy depends on, so we cannot protect any |
| + // document from FTP servers. |
| + return url.SchemeIs("http") || url.SchemeIs("https"); |
| +} |
| + |
| +bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin, |
|
Charlie Reis
2013/08/09 18:48:38
This seems like it should be modeled more after Si
|
| + const GURL& response_url) { |
| + if (frame_origin.scheme() != response_url.scheme()) |
| + return false; |
| + |
| + // Extract the effective domains (public suffix plus one) of the |
| + // urls. |
| + |
| + // TODO(dsjang): Is there any reason why we don't use |
|
nasko
2013/08/09 19:07:28
This will be good to iron out before we commit the
dsjang
2013/08/12 22:56:17
Done.
|
| + // net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES |
| + // instead of |
| + // net::registry_controlled_domains::EXCLUSE_PRIVATE_REGISTRIES? If |
| + // we allow sites to use their private registries, they can use |
| + // "finer grained" sites than only using public ones. |
|
Charlie Reis
2013/08/09 18:48:38
Just change this to be a TODO to use INCLUDE_PRIVA
dsjang
2013/08/12 22:56:17
Done.
|
| + std::string frame_domain = |
| + net::registry_controlled_domains::GetDomainAndRegistry( |
| + frame_origin, |
| + net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
| + std::string response_domain = |
| + net::registry_controlled_domains::GetDomainAndRegistry( |
| + response_url, |
| + net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
| + |
| + return frame_domain == response_domain; |
| +} |
| + |
| +bool SiteIsolationPolicy::IsFrameNavigating(WebKit::WebFrame* frame) { |
| + // When a navigation starts, frame->provisionalDataSource() is set |
| + // to a not-null value which stands for the request made for the |
| + // navigation. As soon as the network request is committed to the |
| + // frame, frame->provisionalDataSource() is converted to null, and |
| + // the committed data source is moved to frame->dataSource(). This |
| + // is the most reliable way to detect whether the frame is in |
| + // navigation or not by far. |
|
Charlie Reis
2013/08/09 18:48:38
nit: drop "by far"
nasko
2013/08/09 19:07:28
Awesome comment! Thank you! I've learned something
dsjang
2013/08/12 22:56:17
Thank you here! I'm so glad that you got something
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
Done.
|
| + return frame->provisionalDataSource() != NULL; |
| +} |
| + |
| +bool SiteIsolationPolicy::IsValidCorsHeaderSet( |
| + GURL& frame_origin, |
| + GURL& website_origin, |
| + std::string access_control_origin) { |
| + |
| + size_t access_control_origin_len = access_control_origin.size(); |
| + |
| + // TODO(dsjang): Is this actually true? The server seems to return |
|
Charlie Reis
2013/08/09 18:48:38
Can we resolve this TODO?
dsjang
2013/08/12 22:56:17
third_party/WebKit/Source/core/loader/CrossOriginA
|
| + // an empty string or "null". |
| + if (access_control_origin_len == 0) |
| + return false; |
| + |
| + // Many websites are sending back "\"*\"" instead of "*". This is |
| + // non-standard practice, and seems not supported by the |
| + // brwoser. Refer to |
| + // CrossOriginAccessControl::passesAccessControlCheck(). |
| + |
| + // TODO(dsjang): * is not allowed for the response from a request |
| + // with cookies. This allows for more than what the renderer will |
| + // eventually be able to receive, so we won't see illegal cross-site |
| + // documents alllowed by this. We have to have t a way to see if |
|
Charlie Reis
2013/08/09 18:48:38
typos: alllowed, t
nasko
2013/08/09 19:07:28
nit: Not sure what "t a way" stands for. To find a
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
Done.
|
| + // this response is from a cookie-tagged request or not in the |
| + // future. |
| + if (access_control_origin == "*") |
| + return true; |
| + |
| + // TODO(dsjang): The CORS spec only treats a fully specified URL, |
| + // except for "*", but many websites are using just a domain for |
| + // access_control_origin, and this is blocked by Webkit's CORS logic |
| + // here : CrossOriginAccessControl::passesAccessControlCheck() |
| + |
| + // We don't use Webkit's existing CORS policy implementation since |
| + // their policy works in terms of origins, not sites. For |
| + // example, when frame is sub.a.com and it is not allowed to access |
| + // a document with sub1.a.com. But under Site Isolation, it's |
| + // allowed. |
|
Charlie Reis
2013/08/09 18:48:38
Please move this comment to the top of this functi
dsjang
2013/08/12 22:56:17
Done.
|
| + |
| + // TODO(dsjang): examine createFromString()'s behavior for a URL |
| + // containing * in it. |
| + WebKit::WebSecurityOrigin cors_security_origin = |
|
Charlie Reis
2013/08/09 18:48:38
Why send this through WebSecurityOrigin if we're j
dsjang
2013/08/12 22:56:17
Done.
|
| + WebKit::WebSecurityOrigin::createFromString( |
| + WebKit::WebString::fromUTF8(access_control_origin)); |
| + GURL cors_origin(cors_security_origin.toString().utf8()); |
| + |
| + LOG(ERROR) << cors_security_origin.toString().utf8(); |
|
Charlie Reis
2013/08/09 18:48:38
We'll need to get rid of all the log statements be
dsjang
2013/08/12 22:56:17
Sure. I'll do that when the CL is ready for commit
|
| + return IsSameSite(frame_origin, cors_origin); |
| +} |
| + |
| +bool SiteIsolationPolicy::SniffForHTML(const char* data, size_t length) { |
|
Charlie Reis
2013/08/09 18:48:38
Can you put a comment somewhere saying which funct
dsjang
2013/08/12 22:56:17
Done.
|
| + // TODO(dsjang): The content sniffer used by Chrome and Firefox are |
|
Charlie Reis
2013/08/09 18:48:38
This part of the comment isn't a TODO. However, i
dsjang
2013/08/12 22:56:17
Done.
|
| + // using "<!--" as one of the HTML signatures, but it also appears |
| + // in valid JavaScript, considered as well-formed JS by the browser. |
| + // Since we do not want to block any JS, we exclude it from our HTML |
| + // signatures. This can weaken our document block policy, but we can |
| + // break less websites. |
| + const char* html_signatures[] = {"<!DOCTYPE html", // HTML5 spec |
| + "<script", // HTML5 spec, Mozilla |
| + "<html", // HTML5 spec, Mozilla |
| + "<head", // HTML5 spec, Mozilla |
| + "<iframe", // Mozilla |
| + "<h1", // Mozilla |
| + "<div", // Mozilla |
| + "<font", // Mozilla |
| + "<table", // Mozilla |
| + "<a", // Mozilla |
| + "<style", // Mozilla |
| + "<title", // Mozilla |
| + "<b", // Mozilla |
| + "<body", // Mozilla |
| + "<br", "<p" // Mozilla |
| + }; |
| + return DoSignatureMatching( |
| + data, length, html_signatures, arraysize(html_signatures)); |
| +} |
| + |
| +bool SiteIsolationPolicy::SniffForXML(const char* data, size_t length) { |
| + const char* xml_signatures[] = {"<?xml" // Mozilla |
| + }; |
| + return DoSignatureMatching( |
| + data, length, xml_signatures, arraysize(xml_signatures)); |
| +} |
| + |
| +bool SiteIsolationPolicy::SniffForJSON(const char* data, size_t length) { |
| + // TODO(dsjang): We have to come up with a better way to sniff |
| + // JSON. However, even RE cannot help us that much due to the fact |
| + // that we don't do full parsing. This DFA starts with state 0, and |
| + // finds 1) {, 2) "or', 3) : in the order. This is intentionally not |
|
Charlie Reis
2013/08/09 18:48:38
finds {, "/', and : in that order.
dsjang
2013/08/12 22:56:17
Done.
|
| + // using a regular expression library so that we can make the |
| + // trusted code base as small as possible. State 4 is a dead state. |
|
Charlie Reis
2013/08/09 18:48:38
I'd change this sentence to just say that we're av
dsjang
2013/08/12 22:56:17
Done.
|
| + const int INIT_ST = 0; |
|
nasko
2013/08/09 19:07:28
nit: I'd use less abbreviation in these, but it is
|
| + const int LBRACE_ST = 1; |
| + const int LQUOTE_ST = 2; |
| + const int COLON_ST = 3; |
| + const int DEAD_ST = 4; |
| + |
| + int state = INIT_ST; |
| + for (size_t i = 0; i < length && state < COLON_ST; ++i, ++data) { |
| + const char c = *data; |
| + if (c == ' ' || c == '\t' || c == '\r' || c == '\n') |
| + continue; |
| + |
| + switch (state) { |
| + case INIT_ST: |
| + if (c == '{') |
| + state = LBRACE_ST; |
| + else |
| + state = DEAD_ST; |
| + break; |
| + case LBRACE_ST: |
| + if (c == '\"' || c == '\'') |
| + state = LQUOTE_ST; |
| + else |
| + state = DEAD_ST; |
| + break; |
| + case LQUOTE_ST: |
| + if (c == ':') { |
|
Charlie Reis
2013/08/09 18:48:38
nit: No braces needed.
dsjang
2013/08/12 22:56:17
Done.
|
| + state = COLON_ST; |
| + } |
| + break; |
| + default: |
|
Charlie Reis
2013/08/09 18:48:38
NOTREACHED()
dsjang
2013/08/12 22:56:17
Done.
|
| + break; |
| + } |
| + } |
| + return state == COLON_ST; |
| +} |
| + |
| +bool SiteIsolationPolicy::DoSignatureMatching(const char* data, |
|
Charlie Reis
2013/08/09 18:48:38
MatchesSignature might be a better name.
|
| + size_t length, |
| + const char* signatures[], |
| + size_t arr_size) { |
| + for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { |
| + const char* signature = signatures[sig_index]; |
| + size_t signature_length = strlen(signature); |
| + size_t i = 0; |
| + // Skip the white characters at the beginning of the document. |
| + for (i = 0; i < length; ++i) { |
| + char c = *data; |
| + if (!(c == ' ' || c == '\r' || c == '\n' || c == '\t')) { |
|
Charlie Reis
2013/08/09 18:48:38
nit: No braces needed on one-line clause.
dsjang
2013/08/12 22:56:17
Done.
|
| + break; |
| + } |
| + ++data; |
|
nasko
2013/08/09 19:07:28
Why are you modifying the input parameter? You als
dsjang
2013/08/12 22:56:17
Thanks a lot!! Done.
|
| + } |
| + length = length - i; |
| + if (length < signature_length) |
|
nasko
2013/08/09 19:07:28
Is there a reason why we can't put this before we
dsjang
2013/08/12 22:56:17
Done.
|
| + continue; |
| + if (!base::strncasecmp(signature, data, signature_length)) { |
| + return true; |
| + } |
| + } |
| + return false; |
| +} |
| + |
| +bool SiteIsolationPolicy::IsErrorStatusCode(int status_code) { |
|
Charlie Reis
2013/08/09 18:48:38
The only place you use this is when deciding if it
dsjang
2013/08/12 22:56:17
Done.
|
| + // Chrome only uses the content of a response with one of these |
| + // status codes for CSS/JavaScript. For images, Chrome just ignores |
| + // status code. |
| + const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, 301, 302, |
| + 303, 305, 306, 307}; |
| + for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { |
| + if (renderable_status_code[i] == status_code) |
| + return false; |
| + } |
| + return true; |
| +} |
| + |
| +bool SiteIsolationPolicy::SniffForJS(const char* data, size_t length) { |
| + // TODO(dsjang): This is a real hacking. The only purpose of this |
|
Charlie Reis
2013/08/09 18:48:38
hacking -> hack
dsjang
2013/08/12 22:56:17
Done.
|
| + // function is to try to see if there's any possibility that this |
| + // data can be JavaScript.(superset of JS). This function will be |
|
Charlie Reis
2013/08/09 18:48:38
nit: space, not period, after JavaScript
dsjang
2013/08/12 22:56:17
Done.
|
| + // removed for the production code. |
|
Charlie Reis
2013/08/09 18:48:38
will be removed once UMA stats are gathered.
dsjang
2013/08/12 22:56:17
Done.
|
| + |
| + // Search for "var " for JS detection. :-) |
| + for (size_t i = 0; i < length - 3; ++i) { |
| + if (strncmp(data, "var ", 4) == 0) { |
|
Charlie Reis
2013/08/09 18:48:38
nit: No brace.
dsjang
2013/08/12 22:56:17
Done.
|
| + return true; |
| + } |
| + ++data; |
| + } |
| + return false; |
| +} |
| + |
| +} // namespace webkit_glue |