webkit/child/site_isolation_policy.h - Issue 22254005: UMA data collector for cross-site documents(XSD)

Unified Diff: webkit/child/site_isolation_policy.h

Issue 22254005: UMA data collector for cross-site documents(XSD) (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@lkgr

Patch Set: "X-Content-Type-Options: nosniff" rule is added. Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: webkit/child/site_isolation_policy.h

diff --git a/webkit/child/site_isolation_policy.h b/webkit/child/site_isolation_policy.h

new file mode 100644

index 0000000000000000000000000000000000000000..df98e4178adab7d327998c7f33cb3bf7d6596423

--- /dev/null

+++ b/webkit/child/site_isolation_policy.h

@@ -0,0 +1,219 @@

+// of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#ifndef WEBKIT_CHILD_SITE_ISOLATION_POLICY_H_

+#define WEBKIT_CHILD_SITE_ISOLATION_POLICY_H_

+#include <map>

+#include <utility>

+#include "base/gtest_prod_util.h"

+#include "third_party/WebKit/public/platform/WebURLRequest.h"

+#include "third_party/WebKit/public/platform/WebURLResponse.h"

+#include "third_party/WebKit/public/web/WebFrame.h"

+#include "webkit/child/webkit_child_export.h"

+using WebKit::WebFrame;

+using WebKit::WebURLResponse;

+using WebKit::WebURLRequest;

+namespace webkit_glue {

+// SiteIsolationPolicy implements the cross-site document blocking policy (XSDP)

+// for Site Isolation. XSDP will monitor network responses to a renderer and

+// block illegal responses so that a compromised renderer cannot steal private

+// information from other sites. For now SiteIsolationPolicy monitors responses

+// to gather various UMA stats to see the compatibility impact of actual

+// deployment of the policy. The UMA stat categories SiteIsolationPolicy gathers

+// are as follows:

+//

+// SiteIsolation.AllResponses : # of all network responses.

+// SiteIsolation.XSD.DataLength : the length of the first packet of a response.

+// SiteIsolation.XSD.MimeType (enum):

+// # of responses from other sites, tagged with a document mime type.

+// 0:HTML, 1:XML, 2:JSON, 3:Plain, 4:Others

+// SiteIsolation.XSD.[%MIMETYPE].Blocked :

+// blocked # of cross-site document responses grouped by sniffed MIME type.

+// SiteIsolation.XSD.[%MIMETYPE].Blocked.RenderableStatusCode :

+// # of responses with renderable status code,

+// out of SiteIsolation.XSD.[%MIMETYPE].Blocked.

+// SiteIsolation.XSD.[%MIMETYPE].Blocked.NonRenderableStatusCode :

+// # of responses with non-renderable status code,

+// out of SiteIsolation.XSD.[%MIMETYPE].Blocked.

+// SiteIsolation.XSD.[%MIMETYPE].NoSniffBlocked.RenderableStatusCode :

+// # of responses failed to be sniffed for its MIME type, but blocked by

+// "X-Content-Type-Options: nosniff" header, and with renderable status code

+// out of SiteIsolation.XSD.[%MIMETYPE].Blocked.

+// SiteIsolation.XSD.[%MIMETYPE].NoSniffBlocked.NonRenderableStatusCode :

+// # of responses failed to be sniffed for its MIME type, but blocked by

+// "X-Content-Type-Options: nosniff" header, and with non-renderable status

+// code out of SiteIsolation.XSD.[%MIMETYPE].Blocked.

+// SiteIsolation.XSD.[%MIMETYPE].NotBlocked :

+// # of responses, but not blocked due to failure of mime sniffing.

+// SiteIsolation.XSD.[%MIMETYPE].NotBlocked.MaybeJS :

+// # of responses that are plausibly sniffed to be JavaScript?

Charlie Reis 2013/08/13 21:09:03 nit: End with period, not question mark.

dsjang 2013/08/13 21:49:52 Done.

+struct ResponseMetaData {

+ enum CanonicalMimeType {

+ HTML = 0,

+ XML = 1,

+ JSON = 2,

+ Plain = 3,

+ Others = 4,

+ MaxCanonicalMimeType,

+ };

+ static const char* CanonicalMimeTypeToString(CanonicalMimeType mime_type) {

+ const char* mime_type_names[] = {"HTML", "XML", "JSON", "Plain", "Others"};

+ return mime_type_names[mime_type];

+ };

+ static const char* TargetTypeToString(WebURLRequest::TargetType target_type) {

+ const char* target_type_names[] = {

+ "MainFrame", "Subframe", "Subresource", "StyleSheet", "Script",

+ "FontResource", "Image", "Object", "Media", "Worker", "SharedWorker",

+ "Prefetch", "Favicon", "XHR", "TextTrack", "Unspecified"};

+ return target_type_names[target_type];

+ };

+ ResponseMetaData();

+ std::string frame_origin;

+ GURL response_url;

+ unsigned request_identifier;

+ WebURLRequest::TargetType target_type;

+ CanonicalMimeType canonical_mime_type;

+ int http_status_code;

+ bool no_sniff;

+};

+typedef std::map<unsigned, WebURLRequest::TargetType> TargetTypeMap;

+typedef std::map<GURL, ResponseMetaData> UrlResponseMetaDataMap;

+typedef std::map<unsigned, GURL> IdUrlMap;

+class WEBKIT_CHILD_EXPORT SiteIsolationPolicy {

+ public:

+ // Registers |target_type| for |identifier| which identifies a specific

+ // request. In case HTTP redirection happens, this function is called multiple

+ // times for the same identifier. We do not depend on |target_type| to decide

+ // if a request is for navigation or not due to the redirection behavior.

+ static void WillSendRequest(unsigned identifier,

+ WebURLRequest::TargetType target_type);

+ // Registers the header information of |response|. This function obtains the

+ // target_type set by |WillSendRequest|. We have to make sure to call either

+ // SiteIsolationPolicy::DidFinishResourceLoad(identifier)| or

+ // SiteIsolationPolicy::DidFinishResourceLoadForURL(response.url()) to free

+ // the bookkepping data.

+ // TODO(dsjang): There's a possibility that two distinct responses (identified

+ // by different identifiers) are from the same url, and this results in

+ // overwriting one of the two responses' bookkeeping data. For example, when

+ // there are <iframe src="urlA" /> and <img src="urlA"> on the same page,

+ // there will be two calls of |DidReceiveResponse| with the same url, but

+ // different identifiers. This can deteriorate our UMA data. Even though we

+ // expect that this rarely happens, find a way to use identifier throughout

+ // the entire HTTP transaction here.

+ static void DidReceiveResponse(WebFrame* frame,

+ unsigned identifier,

+ const WebURLResponse& response);

+ // Examines the first network packet in case response_url is

+ // registered as a cross-site document by DidReceiveResponse().

+ // This records various kinds of UMA data stats. This function is

+ // called only if the length of received data is non-zero.

+ static void DidReceiveData(const char* payload,

+ int length,

+ WebKit::WebURL& response_url);

+ // TODO(dsjang): Either of the following two functions must be called at the

+ // end of thetransaction. WebURLLoaderImpl::didReceivedData() is not a place

Charlie Reis 2013/08/13 21:09:03 nit: the transaction

dsjang 2013/08/13 21:49:52 Done.

+ // where this can be called since it is not guaranteed that the function is

+ // called in case of network error. Instead,

+ // RenderFrameImpl::didFinishResourceLoad(identifier) and didFailLoad() are

+ // used for successful loading and failed loading, respectively.

+ static void DidFinishResourceLoad(unsigned identifier);

+ // Does the same thing as DidFinishResourceLoad(), but accepts response_url.

+ static void DidFinishResourceLoadForUrl(const WebKit::WebURL& response_url);

+private:

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, IsBlockableScheme);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, IsSameSite);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, IsValidCorsHeaderSet);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, SniffForHTML);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, SniffForXML);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, SniffForJSON);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, SniffForJS);

+ // Returns the representative mime type enum value of the mime type of

+ // response. For example, this returns the same value for all text/xml mime

+ // type families such as application/xml, application/rss+xml.

+ static ResponseMetaData::CanonicalMimeType GetCanonicalMimeType(

+ const WebURLResponse& response);

+ // Returns whether this scheme is a target of cross-site document

+ // policy(XSDP). This returns true only for http://* and https://* urls.

+ static bool IsBlockableScheme(const GURL& frame_origin);

+ // Returns whether the two urls belong to the same sites.

+ static bool IsSameSite(const GURL& frame_origin, const GURL& response_url);

+ // Returns whether there's a valid CORS header for frame_origin. This is

+ // simliar to CrossOriginAccessControl::passesAccessControlCheck(), but we use

+ // sites as our security domain, not origins.

+ // TODO(dsjang): this must be improved to be more accurate to the actual CORS

+ // specification. For now, this works conservatively, allowing XSDs that are

+ // not allowed by actual CORS rules by ignoring 1) credentials and 2)

+ // methods. Preflight requests don't matter here since they are not used to

+ // decide whether to block a document or not on the client side.

+ static bool IsValidCorsHeaderSet(GURL& frame_origin,

+ GURL& website_origin,

+ std::string access_control_origin);

+ // Returns whether the given frame is navigating. When this is true, the frame

+ // is requesting is a web page to be loaded.

+ static bool IsFrameNavigating(WebFrame* frame);

+ static bool SniffForHTML(const char* data, size_t length);

+ static bool SniffForXML(const char* data, size_t length);

+ static bool SniffForJSON(const char* data, size_t length);

+ static bool MatchesSignature(const char* data,

+ size_t length,

+ const char* signatures[],

+ size_t arr_size);

+ // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted

+ // when this class is used for actual blocking.

+ static bool SniffForJS(const char* data, size_t length);

+ // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted

+ // when this class is used for actual blocking.

+ static bool IsRenderableStatusCodeForDocument(int status_code);

+ // Maintain bookkeeping data between WillSendRequest() and

+ // DidReceiveResponse(). The key is the identifier of response.

+ static TargetTypeMap* GetIdTargetMap();

+ // Maintain data between DidReceiveResponse() and DidReceiveData(). The key

+ // is the url of response. We can't use identifier anymore from here since

+ // that information is no longer available for DidReceiveData().

+ static UrlResponseMetaDataMap* GetUrlResponseMetaDataMap();

+ // This maps the identifier of a response to the response's url. This is used

+ // to free ResponseMetaData in url_responsedata_map_, when DidReceiveData() is

+ // never called.

+ static IdUrlMap* GetIdUrlMap();

+ // Never needs to be constructed/destructed.

+ SiteIsolationPolicy() {}

+ ~SiteIsolationPolicy() {}

+ DISALLOW_COPY_AND_ASSIGN(SiteIsolationPolicy);

+};

+} // namespace content

+#endif // WEBKIT_CHILD_SITE_ISOLATION_POLICY_H_

« no previous file with comments | « content/renderer/render_frame_impl.cc ('k') | webkit/child/site_isolation_policy.cc » ('j') | webkit/child/site_isolation_policy.cc » ('J')