Index: webkit/child/site_isolation_policy.cc |
diff --git a/webkit/child/site_isolation_policy.cc b/webkit/child/site_isolation_policy.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..01f8d615089428572b1744cae7547c9cba2aa1bf |
--- /dev/null |
+++ b/webkit/child/site_isolation_policy.cc |
@@ -0,0 +1,503 @@ |
+// Copyright (c) 2013 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "webkit/child/site_isolation_policy.h" |
+ |
+#include "base/basictypes.h" |
+#include "base/logging.h" |
+#include "base/metrics/histogram.h" |
+#include "base/strings/string_util.h" |
+#include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
+#include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h" |
+#include "third_party/WebKit/public/platform/WebString.h" |
+#include "third_party/WebKit/public/platform/WebURL.h" |
+#include "third_party/WebKit/public/platform/WebURLRequest.h" |
+#include "third_party/WebKit/public/platform/WebURLResponse.h" |
+#include "third_party/WebKit/public/web/WebDocument.h" |
+#include "third_party/WebKit/public/web/WebFrame.h" |
+#include "third_party/WebKit/public/web/WebFrameClient.h" |
+#include "third_party/WebKit/public/web/WebSecurityOrigin.h" |
+ |
+using base::strncasecmp; |
+using WebKit::WebDocument; |
+using WebKit::WebString; |
+using WebKit::WebURL; |
+using WebKit::WebURLResponse; |
+using WebKit::WebURLRequest; |
+ |
+ |
+namespace webkit_glue { |
+ |
+std::map<unsigned, WebURLRequest::TargetType> |
+ SiteIsolationPolicy::id_target_map_; |
+std::map<std::string, ResponseMetaData> |
+ SiteIsolationPolicy::url_responsedata_map_; |
+std::map<unsigned, std::string> SiteIsolationPolicy::id_url_map_; |
+ |
+void SiteIsolationPolicy::WillSendRequest( |
+ unsigned identifier, |
+ WebURLRequest::TargetType target_type) { |
+ // When identifier already exists in the map, it means that this |
+ // request has been redirected to issue another request. We don't |
+ // overwrite the existing target_type since it becomes |
+ // TargetIsSubresource no matter what the original target_type was. |
+ if (!id_target_map_.count(identifier)) |
+ id_target_map_[identifier] = target_type; |
+} |
+ |
+void SiteIsolationPolicy::DidReceiveResponse(WebKit::WebFrame* frame, |
+ unsigned identifier, |
+ const WebURLResponse& response) { |
+ DCHECK_EQ(id_target_map_.count(identifier),1U); |
+ |
+ UMA_HISTOGRAM_COUNTS("SiteIsolation.ALL", 1); |
Charlie Reis
2013/08/09 18:48:38
ALL -> AllResponses
nasko
2013/08/09 19:07:28
nit: TOTAL?
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
AllResponses seems more informative than TOTAL to
|
+ |
+ GURL response_url = response.url(); |
+ WebURLRequest::TargetType target_type = id_target_map_[identifier]; |
+ id_target_map_.erase(identifier); |
+ |
+ // See if this is for navigation. If it is, don't block it, under |
+ // the assumption that we will put it in an appropriate process. |
+ if (IsFrameNavigating(frame)) { |
+ LOG(INFO) << "SiteIsolationPolicy.FrameInNavigation"; |
+ return; |
+ } |
+ |
+ GURL frame_origin(frame->document().securityOrigin().toString()); |
+ |
+ if (!IsBlockableScheme(frame_origin)) { |
+ LOG(INFO) << "SiteIsolationPolicy.NotNetworkScheme:" << frame_origin; |
+ return; |
+ } |
+ |
+ if (IsSameSite(frame_origin, response_url)) { |
+ LOG(INFO) << "SiteIsolationPolicy.SameSite:" << frame_origin << "," |
+ << response_url; |
+ return; |
+ } |
+ |
+ ResponseMetaData::CanonicalMimeType canonical_mime_type = |
+ GetCanonicalMimeType(response); |
+ |
+ if (canonical_mime_type == ResponseMetaData::Others) { |
+ LOG(INFO) << "SiteIsolationPolicy.mimetype:" << frame_origin << "," |
+ << response_url << ",[" << response.mimeType().utf8() << "]"; |
+ return; |
+ } |
+ |
+ // Every CORS request should have the Access-Control-Allow-Origin |
+ // header even if it is preceded by a pre-flight request. Therefore, |
+ // if this is a CORS request, it has this header. |
+ std::string access_control_origin = response.httpHeaderField( |
+ WebKit::WebString::fromUTF8("Access-Control-Allow-Origin")).utf8(); |
+ |
+ if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) { |
+ LOG(INFO) << "SiteIsolationPolicy.CorsIsSafe:"; |
+ return; |
+ } |
+ |
+ // Real XSD data collection starts from here. |
+ LOG(INFO) << "SiteIsolationPolicy.XSD:from header:" << canonical_mime_type << |
+ ":" << response_url; |
+ |
+ // TODO(dsjang): Apply X-Content-Type option here. |
+ ResponseMetaData resp_data; |
+ resp_data.frame_origin = frame_origin.spec(); |
+ resp_data.response_url = response_url.spec(); |
+ resp_data.request_identifier = identifier; |
+ resp_data.target_type = target_type; |
+ resp_data.canonical_mime_type = canonical_mime_type; |
+ resp_data.http_status_code = response.httpStatusCode(); |
+ |
+ url_responsedata_map_[resp_data.response_url] = resp_data; |
+ id_url_map_[identifier] = resp_data.response_url; |
+ |
+ return; |
+} |
+ |
+#define SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \ |
+ UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".Blocked", 1); \ |
+ if (ok_status_code) { \ |
+ UMA_HISTOGRAM_ENUMERATION( \ |
+ ""BUCKET_PREFIX".Blocked.OKStatusCode", \ |
+ resp_data.target_type, \ |
+ WebURLRequest::TargetIsUnspecified + 1); \ |
+ } else { \ |
+ UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".Blocked.ErrorStatusCode", 1); \ |
+ } |
+ |
+#define SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \ |
+ UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".NotBlocked", 1); \ |
+ if (is_sniffed_for_js) \ |
+ UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".NotBlocked.MaybeJS", 1); \ |
+ |
+#define SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SNIFF_EXPR,BUCKET_PREFIX) \ |
+ if (SNIFF_EXPR) { \ |
+ SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \ |
+ } else { \ |
+ SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \ |
+ } |
+ |
+void SiteIsolationPolicy::DidReceiveData(const char* data, |
+ int length, |
+ WebURL& web_response_url) { |
+ // We only record XSDs whose content is actually non-zero. |
Charlie Reis
2013/08/09 18:48:38
Make sure XSDs is spelled out somewhere (perhaps i
nasko
2013/08/09 19:07:28
How do we know the content is non-zero based on th
dsjang
2013/08/12 22:56:17
This means that we can't see zero responses here.
dsjang
2013/08/12 22:56:17
We don't know. What I meant was that DidReceiveDat
|
+ GURL response_url(web_response_url); |
+ |
+ std::string response_url_str = response_url.spec(); |
+ if (url_responsedata_map_.count(response_url_str) == 0) |
+ return; |
+ |
+ DCHECK_EQ(url_responsedata_map_.count(response_url_str), 1U); |
+ ResponseMetaData resp_data = url_responsedata_map_[response_url_str]; |
+ url_responsedata_map_.erase(response_url_str); |
+ |
+ // Record the length of the first received network packet to see if |
+ // it's enough for sniffing. |
+ UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", length); |
+ |
+ // Record the entire number of responses with a specific mime |
Charlie Reis
2013/08/09 18:48:38
number of XSD reponses
dsjang
2013/08/12 22:56:17
Done.
|
+ // type(text/html, text/xml, etc). |
Charlie Reis
2013/08/09 18:48:38
nit: space after type
dsjang
2013/08/12 22:56:17
Done.
|
+ UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType", |
+ resp_data.canonical_mime_type, |
+ ResponseMetaData::MaxCanonicalMimeType); |
+ |
+ // Blocking only happens when the content is sniffed for |
+ // HTML/JSON/XML. So if the status code is an error status code, it |
Charlie Reis
2013/08/09 18:48:38
it -> blocking it
dsjang
2013/08/12 22:56:17
Done.
|
+ // is not disruptive by the following reasons : 1) the blocked |
+ // content is not a binary object (such as an image) since it is |
+ // sniffed as a text document. 2) then, this blocking only breaks |
Charlie Reis
2013/08/09 18:48:38
nit: Either capitalize each sentence or use a semi
dsjang
2013/08/12 22:56:17
Done.
|
+ // the renderer behavior only if it is either JavaScript or |
+ // CSS. However, the renderer doesn't use the contents of JS/CSS |
+ // with unaffected status code(e.g, 404). *) the renderer is |
Charlie Reis
2013/08/09 18:48:38
nit: space after code
Also, why "*)" rather than "
|
+ // expected not to use the cross-site document content for purposes |
+ // other than JS/CSS (e.g, XHR). |
+ bool ok_status_code = !IsErrorStatusCode(resp_data.http_status_code); |
+ |
+ // This is only used for measuring false-negative analysis for |
Charlie Reis
2013/08/09 18:48:38
nit: "measuring" and "analysis" are redundant here
dsjang
2013/08/12 22:56:17
Done.
|
+ // non-blocked resources. |
+ bool is_sniffed_for_js = SniffForJS(data, length); |
+ |
+ // Record the number of responses whose content is sniffed for what |
+ // its mime type claims it to be. For example, we apply a HTML |
+ // sniffer for a document tagged with text/html here. Whenever this |
+ // check becomes true, we'll block the response. |
+ switch (resp_data.canonical_mime_type) { |
+ case ResponseMetaData::HTML: |
+ SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForHTML(data, length), |
+ "SiteIsolation.XSD.MimeType.HTML"); |
Charlie Reis
2013/08/09 18:48:38
I don't think we need "MimeType" in the stat name
dsjang
2013/08/12 22:56:17
Done.
|
+ break; |
+ case ResponseMetaData::XML: |
+ SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForXML(data, length), |
+ "SiteIsolation.XSD.MimeType.XML"); |
+ break; |
+ case ResponseMetaData::JSON: |
+ SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForJSON(data, length), |
+ "SiteIsolation.XSD.MimeType.JSON"); |
+ break; |
+ case ResponseMetaData::Plain: |
+ if (SniffForHTML(data, length)) { |
+ SITE_ISOLATION_POLICY_COUNT_BLOCK( |
+ "SiteIsolation.XSD.MimeType.Plain.HTML"); |
+ } else if (SniffForXML(data, length)) { |
+ SITE_ISOLATION_POLICY_COUNT_BLOCK( |
+ "SiteIsolation.XSD.MimeType.Plain.XML"); |
+ } else if (SniffForJSON(data, length)) { |
+ SITE_ISOLATION_POLICY_COUNT_BLOCK( |
+ "SiteIsolation.XSD.MimeType.Plain.JSON"); |
+ } else if (is_sniffed_for_js) { |
+ SITE_ISOLATION_POLICY_COUNT_NOTBLOCK( |
+ "SiteIsolation.XSD.MimeType.Plain"); |
+ } |
+ break; |
+ default : |
+ DCHECK(false); |
Charlie Reis
2013/08/09 18:48:38
Use NOTREACHED() instead.
nasko
2013/08/09 19:07:28
nit: You can add << "message" to clarify why this
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
This is very useful. Thanks for letting me know th
|
+ break; |
+ } |
+} |
+ |
+#undef SITE_ISOLATION_POLICY_COUNT_BLOCK |
+#undef SITE_ISOLATION_POLICY_COUNT_NOTBLOCK |
+#undef SITE_ISOLATION_POLICY_SNIFF_AND_COUNT |
Charlie Reis
2013/08/09 18:48:38
Probably should undef these in reverse order, sinc
dsjang
2013/08/12 22:56:17
Done.
|
+ |
+void SiteIsolationPolicy::DidFinishResourceLoad(unsigned identifier) { |
+ id_target_map_.erase(identifier); |
+ if (!id_url_map_.count(identifier)) { |
+ url_responsedata_map_.erase(id_url_map_[identifier]); |
+ id_url_map_.erase(identifier); |
+ } |
+} |
+ |
+void SiteIsolationPolicy::DidFinishResourceLoadForUrl( |
+ const WebKit::WebURL& web_response_url) { |
+ GURL response_url(web_response_url); |
+ |
+ if (!url_responsedata_map_.count(response_url.spec())) { |
+ ResponseMetaData meta_data = url_responsedata_map_[response_url.spec()]; |
+ url_responsedata_map_.erase(response_url.spec()); |
+ id_target_map_.erase(meta_data.request_identifier); |
+ id_url_map_.erase(meta_data.request_identifier); |
+ } |
+} |
+ |
+ResponseMetaData::CanonicalMimeType SiteIsolationPolicy::GetCanonicalMimeType( |
+ const WebURLResponse& response) { |
+ static const char TEXT_HTML[] = "text/html"; |
Charlie Reis
2013/08/09 18:48:38
Constants should be formatted as kTextHtml, etc.
dsjang
2013/08/12 22:56:17
Done.
|
+ static const char TEXT_XML[] = "text/xml"; |
+ static const char APP_RSS_XML[] = "application/rss+xml"; |
+ static const char APP_XML[] = "application/xml"; |
+ static const char APP_JSON[] = "application/json"; |
+ static const char TEXT_XJSON[] = "text/x-json"; |
+ static const char TEXT_JSON[] = "text/json"; |
+ static const char TEXT_PLAIN[] = "text/json"; |
Charlie Reis
2013/08/09 18:48:38
Shouldn't this be text/plain?
dsjang
2013/08/12 22:56:17
Done.
|
+ |
+ const std::string mime_type = response.mimeType().utf8(); |
+ |
+ LOG(ERROR) << "mimetype:" << mime_type << "==[" << TEXT_HTML << "]"; |
nasko
2013/08/09 19:07:28
Why is this TEXT_HTML there? Wouldn't it always pr
dsjang
2013/08/12 22:56:17
Done.
|
+ |
+ // These are a thorough list of the mime types crawled over the top |
+ // 50k sites related to HTML, XML, JSON, Plain. |
Charlie Reis
2013/08/09 18:48:38
This comment belongs above your constants.
dsjang
2013/08/12 22:56:17
Done.
|
+ if (LowerCaseEqualsASCII(mime_type, TEXT_HTML)) { |
+ return ResponseMetaData::HTML; |
+ } else if (LowerCaseEqualsASCII(mime_type, TEXT_XML) || |
nasko
2013/08/09 19:07:28
nit: Have you ordered these in order of decreasing
dsjang
2013/08/12 22:56:17
Done.
|
+ LowerCaseEqualsASCII(mime_type, APP_RSS_XML) || |
+ LowerCaseEqualsASCII(mime_type, APP_XML)) { |
+ return ResponseMetaData::XML; |
+ } else if (LowerCaseEqualsASCII(mime_type, APP_JSON) || |
+ LowerCaseEqualsASCII(mime_type, TEXT_XJSON) || |
+ LowerCaseEqualsASCII(mime_type, TEXT_JSON)) { |
+ return ResponseMetaData::JSON; |
+ } else if (LowerCaseEqualsASCII(mime_type, TEXT_PLAIN)) { |
+ return ResponseMetaData::Plain; |
+ } else { |
+ return ResponseMetaData::Others; |
+ } |
+} |
+ |
+bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) { |
+ // We exclude ftp:// from here. FTP doesn't provide a Content-Type |
+ // header which our policy depends on, so we cannot protect any |
+ // document from FTP servers. |
+ return url.SchemeIs("http") || url.SchemeIs("https"); |
+} |
+ |
+bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin, |
Charlie Reis
2013/08/09 18:48:38
This seems like it should be modeled more after Si
|
+ const GURL& response_url) { |
+ if (frame_origin.scheme() != response_url.scheme()) |
+ return false; |
+ |
+ // Extract the effective domains (public suffix plus one) of the |
+ // urls. |
+ |
+ // TODO(dsjang): Is there any reason why we don't use |
nasko
2013/08/09 19:07:28
This will be good to iron out before we commit the
dsjang
2013/08/12 22:56:17
Done.
|
+ // net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES |
+ // instead of |
+ // net::registry_controlled_domains::EXCLUSE_PRIVATE_REGISTRIES? If |
+ // we allow sites to use their private registries, they can use |
+ // "finer grained" sites than only using public ones. |
Charlie Reis
2013/08/09 18:48:38
Just change this to be a TODO to use INCLUDE_PRIVA
dsjang
2013/08/12 22:56:17
Done.
|
+ std::string frame_domain = |
+ net::registry_controlled_domains::GetDomainAndRegistry( |
+ frame_origin, |
+ net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
+ std::string response_domain = |
+ net::registry_controlled_domains::GetDomainAndRegistry( |
+ response_url, |
+ net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
+ |
+ return frame_domain == response_domain; |
+} |
+ |
+bool SiteIsolationPolicy::IsFrameNavigating(WebKit::WebFrame* frame) { |
+ // When a navigation starts, frame->provisionalDataSource() is set |
+ // to a not-null value which stands for the request made for the |
+ // navigation. As soon as the network request is committed to the |
+ // frame, frame->provisionalDataSource() is converted to null, and |
+ // the committed data source is moved to frame->dataSource(). This |
+ // is the most reliable way to detect whether the frame is in |
+ // navigation or not by far. |
Charlie Reis
2013/08/09 18:48:38
nit: drop "by far"
nasko
2013/08/09 19:07:28
Awesome comment! Thank you! I've learned something
dsjang
2013/08/12 22:56:17
Thank you here! I'm so glad that you got something
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
Done.
|
+ return frame->provisionalDataSource() != NULL; |
+} |
+ |
+bool SiteIsolationPolicy::IsValidCorsHeaderSet( |
+ GURL& frame_origin, |
+ GURL& website_origin, |
+ std::string access_control_origin) { |
+ |
+ size_t access_control_origin_len = access_control_origin.size(); |
+ |
+ // TODO(dsjang): Is this actually true? The server seems to return |
Charlie Reis
2013/08/09 18:48:38
Can we resolve this TODO?
dsjang
2013/08/12 22:56:17
third_party/WebKit/Source/core/loader/CrossOriginA
|
+ // an empty string or "null". |
+ if (access_control_origin_len == 0) |
+ return false; |
+ |
+ // Many websites are sending back "\"*\"" instead of "*". This is |
+ // non-standard practice, and seems not supported by the |
+ // brwoser. Refer to |
+ // CrossOriginAccessControl::passesAccessControlCheck(). |
+ |
+ // TODO(dsjang): * is not allowed for the response from a request |
+ // with cookies. This allows for more than what the renderer will |
+ // eventually be able to receive, so we won't see illegal cross-site |
+ // documents alllowed by this. We have to have t a way to see if |
Charlie Reis
2013/08/09 18:48:38
typos: alllowed, t
nasko
2013/08/09 19:07:28
nit: Not sure what "t a way" stands for. To find a
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
Done.
|
+ // this response is from a cookie-tagged request or not in the |
+ // future. |
+ if (access_control_origin == "*") |
+ return true; |
+ |
+ // TODO(dsjang): The CORS spec only treats a fully specified URL, |
+ // except for "*", but many websites are using just a domain for |
+ // access_control_origin, and this is blocked by Webkit's CORS logic |
+ // here : CrossOriginAccessControl::passesAccessControlCheck() |
+ |
+ // We don't use Webkit's existing CORS policy implementation since |
+ // their policy works in terms of origins, not sites. For |
+ // example, when frame is sub.a.com and it is not allowed to access |
+ // a document with sub1.a.com. But under Site Isolation, it's |
+ // allowed. |
Charlie Reis
2013/08/09 18:48:38
Please move this comment to the top of this functi
dsjang
2013/08/12 22:56:17
Done.
|
+ |
+ // TODO(dsjang): examine createFromString()'s behavior for a URL |
+ // containing * in it. |
+ WebKit::WebSecurityOrigin cors_security_origin = |
Charlie Reis
2013/08/09 18:48:38
Why send this through WebSecurityOrigin if we're j
dsjang
2013/08/12 22:56:17
Done.
|
+ WebKit::WebSecurityOrigin::createFromString( |
+ WebKit::WebString::fromUTF8(access_control_origin)); |
+ GURL cors_origin(cors_security_origin.toString().utf8()); |
+ |
+ LOG(ERROR) << cors_security_origin.toString().utf8(); |
Charlie Reis
2013/08/09 18:48:38
We'll need to get rid of all the log statements be
dsjang
2013/08/12 22:56:17
Sure. I'll do that when the CL is ready for commit
|
+ return IsSameSite(frame_origin, cors_origin); |
+} |
+ |
+bool SiteIsolationPolicy::SniffForHTML(const char* data, size_t length) { |
Charlie Reis
2013/08/09 18:48:38
Can you put a comment somewhere saying which funct
dsjang
2013/08/12 22:56:17
Done.
|
+ // TODO(dsjang): The content sniffer used by Chrome and Firefox are |
Charlie Reis
2013/08/09 18:48:38
This part of the comment isn't a TODO. However, i
dsjang
2013/08/12 22:56:17
Done.
|
+ // using "<!--" as one of the HTML signatures, but it also appears |
+ // in valid JavaScript, considered as well-formed JS by the browser. |
+ // Since we do not want to block any JS, we exclude it from our HTML |
+ // signatures. This can weaken our document block policy, but we can |
+ // break less websites. |
+ const char* html_signatures[] = {"<!DOCTYPE html", // HTML5 spec |
+ "<script", // HTML5 spec, Mozilla |
+ "<html", // HTML5 spec, Mozilla |
+ "<head", // HTML5 spec, Mozilla |
+ "<iframe", // Mozilla |
+ "<h1", // Mozilla |
+ "<div", // Mozilla |
+ "<font", // Mozilla |
+ "<table", // Mozilla |
+ "<a", // Mozilla |
+ "<style", // Mozilla |
+ "<title", // Mozilla |
+ "<b", // Mozilla |
+ "<body", // Mozilla |
+ "<br", "<p" // Mozilla |
+ }; |
+ return DoSignatureMatching( |
+ data, length, html_signatures, arraysize(html_signatures)); |
+} |
+ |
+bool SiteIsolationPolicy::SniffForXML(const char* data, size_t length) { |
+ const char* xml_signatures[] = {"<?xml" // Mozilla |
+ }; |
+ return DoSignatureMatching( |
+ data, length, xml_signatures, arraysize(xml_signatures)); |
+} |
+ |
+bool SiteIsolationPolicy::SniffForJSON(const char* data, size_t length) { |
+ // TODO(dsjang): We have to come up with a better way to sniff |
+ // JSON. However, even RE cannot help us that much due to the fact |
+ // that we don't do full parsing. This DFA starts with state 0, and |
+ // finds 1) {, 2) "or', 3) : in the order. This is intentionally not |
Charlie Reis
2013/08/09 18:48:38
finds {, "/', and : in that order.
dsjang
2013/08/12 22:56:17
Done.
|
+ // using a regular expression library so that we can make the |
+ // trusted code base as small as possible. State 4 is a dead state. |
Charlie Reis
2013/08/09 18:48:38
I'd change this sentence to just say that we're av
dsjang
2013/08/12 22:56:17
Done.
|
+ const int INIT_ST = 0; |
nasko
2013/08/09 19:07:28
nit: I'd use less abbreviation in these, but it is
|
+ const int LBRACE_ST = 1; |
+ const int LQUOTE_ST = 2; |
+ const int COLON_ST = 3; |
+ const int DEAD_ST = 4; |
+ |
+ int state = INIT_ST; |
+ for (size_t i = 0; i < length && state < COLON_ST; ++i, ++data) { |
+ const char c = *data; |
+ if (c == ' ' || c == '\t' || c == '\r' || c == '\n') |
+ continue; |
+ |
+ switch (state) { |
+ case INIT_ST: |
+ if (c == '{') |
+ state = LBRACE_ST; |
+ else |
+ state = DEAD_ST; |
+ break; |
+ case LBRACE_ST: |
+ if (c == '\"' || c == '\'') |
+ state = LQUOTE_ST; |
+ else |
+ state = DEAD_ST; |
+ break; |
+ case LQUOTE_ST: |
+ if (c == ':') { |
Charlie Reis
2013/08/09 18:48:38
nit: No braces needed.
dsjang
2013/08/12 22:56:17
Done.
|
+ state = COLON_ST; |
+ } |
+ break; |
+ default: |
Charlie Reis
2013/08/09 18:48:38
NOTREACHED()
dsjang
2013/08/12 22:56:17
Done.
|
+ break; |
+ } |
+ } |
+ return state == COLON_ST; |
+} |
+ |
+bool SiteIsolationPolicy::DoSignatureMatching(const char* data, |
Charlie Reis
2013/08/09 18:48:38
MatchesSignature might be a better name.
|
+ size_t length, |
+ const char* signatures[], |
+ size_t arr_size) { |
+ for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { |
+ const char* signature = signatures[sig_index]; |
+ size_t signature_length = strlen(signature); |
+ size_t i = 0; |
+ // Skip the white characters at the beginning of the document. |
+ for (i = 0; i < length; ++i) { |
+ char c = *data; |
+ if (!(c == ' ' || c == '\r' || c == '\n' || c == '\t')) { |
Charlie Reis
2013/08/09 18:48:38
nit: No braces needed on one-line clause.
dsjang
2013/08/12 22:56:17
Done.
|
+ break; |
+ } |
+ ++data; |
nasko
2013/08/09 19:07:28
Why are you modifying the input parameter? You als
dsjang
2013/08/12 22:56:17
Thanks a lot!! Done.
|
+ } |
+ length = length - i; |
+ if (length < signature_length) |
nasko
2013/08/09 19:07:28
Is there a reason why we can't put this before we
dsjang
2013/08/12 22:56:17
Done.
|
+ continue; |
+ if (!base::strncasecmp(signature, data, signature_length)) { |
+ return true; |
+ } |
+ } |
+ return false; |
+} |
+ |
+bool SiteIsolationPolicy::IsErrorStatusCode(int status_code) { |
Charlie Reis
2013/08/09 18:48:38
The only place you use this is when deciding if it
dsjang
2013/08/12 22:56:17
Done.
|
+ // Chrome only uses the content of a response with one of these |
+ // status codes for CSS/JavaScript. For images, Chrome just ignores |
+ // status code. |
+ const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, 301, 302, |
+ 303, 305, 306, 307}; |
+ for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { |
+ if (renderable_status_code[i] == status_code) |
+ return false; |
+ } |
+ return true; |
+} |
+ |
+bool SiteIsolationPolicy::SniffForJS(const char* data, size_t length) { |
+ // TODO(dsjang): This is a real hacking. The only purpose of this |
Charlie Reis
2013/08/09 18:48:38
hacking -> hack
dsjang
2013/08/12 22:56:17
Done.
|
+ // function is to try to see if there's any possibility that this |
+ // data can be JavaScript.(superset of JS). This function will be |
Charlie Reis
2013/08/09 18:48:38
nit: space, not period, after JavaScript
dsjang
2013/08/12 22:56:17
Done.
|
+ // removed for the production code. |
Charlie Reis
2013/08/09 18:48:38
will be removed once UMA stats are gathered.
dsjang
2013/08/12 22:56:17
Done.
|
+ |
+ // Search for "var " for JS detection. :-) |
+ for (size_t i = 0; i < length - 3; ++i) { |
+ if (strncmp(data, "var ", 4) == 0) { |
Charlie Reis
2013/08/09 18:48:38
nit: No brace.
dsjang
2013/08/12 22:56:17
Done.
|
+ return true; |
+ } |
+ ++data; |
+ } |
+ return false; |
+} |
+ |
+} // namespace webkit_glue |