webkit/child/site_isolation_policy.h - Issue 22254005: UMA data collector for cross-site documents(XSD)

Unified Diff: webkit/child/site_isolation_policy.h

Issue 22254005: UMA data collector for cross-site documents(XSD) (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@lkgr

Patch Set: Comments & style have been updated. Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: webkit/child/site_isolation_policy.h

diff --git a/webkit/child/site_isolation_policy.h b/webkit/child/site_isolation_policy.h

new file mode 100644

index 0000000000000000000000000000000000000000..1006f1bda623738b84e556f434367f312ca8fbc5

--- /dev/null

+++ b/webkit/child/site_isolation_policy.h

@@ -0,0 +1,181 @@

+// of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#ifndef WEBKIT_CHILD_SITE_ISOLATION_POLICY_H_

+#define WEBKIT_CHILD_SITE_ISOLATION_POLICY_H_

+#include <map>

+#include <utility>

+#include "base/gtest_prod_util.h"

+#include "third_party/WebKit/public/web/WebFrame.h"

+#include "third_party/WebKit/public/platform/WebURLRequest.h"

+#include "third_party/WebKit/public/platform/WebURLResponse.h"

+#include "webkit/child/webkit_child_export.h"

+using WebKit::WebFrame;

+using WebKit::WebURLResponse;

+using WebKit::WebURLRequest;

+namespace webkit_glue {

+struct ResponseMetaData {

Charlie Reis 2013/08/09 18:48:38 There are some compile errors for this on linux_cl

dsjang 2013/08/12 22:56:17 Done.

+ enum CanonicalMimeType {

+ HTML = 0,

+ XML = 1,

+ JSON = 2,

+ Plain = 3,

+ Others = 4,

+ MaxCanonicalMimeType,

+ };

+ static const char* CanonicalMimeTypeToString(CanonicalMimeType mime_type) {

+ const char* mime_type_names[] = {"HTML", "XML", "JSON", "Plain", "Others"};

+ return mime_type_names[mime_type];

+ }

+ static const char* TargetTypeToString(WebURLRequest::TargetType target_type) {

+ const char* target_type_names[] = {

+ "MainFrame", "Subframe", "Subresource", "StyleSheet", "Script",

+ "FontResource", "Image", "Object", "Media", "Worker", "SharedWorker",

+ "Prefetch", "Favicon", "XHR", "TextTrack", "Unspecified"};

+ return target_type_names[target_type];

+ }

+ std::string frame_origin;

+ std::string response_url;

+ unsigned request_identifier;

+ WebURLRequest::TargetType target_type;

+ CanonicalMimeType canonical_mime_type;

+ int http_status_code;

+};

+class WEBKIT_CHILD_EXPORT SiteIsolationPolicy {

+ public:

+ // Register target_type information for identifier which identifies

nasko 2013/08/09 19:07:28 nit: |identifier|

dsjang 2013/08/12 22:56:17 Done.

+ // a specific request. In case HTTP redirection happens, this

+ // function is called multiple times for the same identifier. We do

+ // not depend on target_type to decide if a request is for

nasko 2013/08/09 19:07:28 nit: |target_type|

dsjang 2013/08/12 22:56:17 Done.

+ // navigation or not due to the redirection behavior.

+ static void WillSendRequest(unsigned identifier,

+ WebURLRequest::TargetType target_type);

+ // Register the header information of the response data. This

nasko 2013/08/09 19:07:28 nit: Registers

dsjang 2013/08/12 22:56:17 Done.

+ // function obtains the target_type set by WillSendRequest(). We

+ // have to make sure to call either

+ // SiteIsolationPolicy::DidFinishResourceLoad(identifier) or

+ // SiteIsolationPolicy::DidFinishResourceLoadForURL(response.url())

+ // to free the bookkepping data. TODO(dsjang): There's a possibility

nasko 2013/08/09 19:07:28 nit: TODO on new line.

dsjang 2013/08/12 22:56:17 Done.

+ // that two distinct responses (identified by different identifiers)

nasko 2013/08/09 19:07:28 What is an example of this? It seems strange enoug

dsjang 2013/08/12 22:56:17 Done.

+ // are from the same url, and this results in overwriting one of the

+ // two responses' bookkeeping data. This can deteriorate our UMA

+ // data. Even though we expect that this rarely happens, find a way

+ // to use identifier throughout the entire HTTP transaction here.

+ static void DidReceiveResponse(WebFrame* frame,

+ unsigned identifier,

+ const WebURLResponse& response);

+ // Examine the first network packet in case response_url is

+ // registered as a cross-site document by DidReceiveResponse().

+ // This records various kinds of UMA data stats.

+ static void DidReceiveData(const char* payload,

+ int length,

+ WebKit::WebURL& response_url);

+ // TODO(dsjang): Either of the following two functions must be

+ // called at the end of the

nasko 2013/08/09 19:07:28 nit: combine with next line, too much white space

dsjang 2013/08/12 22:56:17 Done.

+ // transaction. WebURLLoaderImpl::didReceivedData() is not a place

+ // where this can be called since it is not guaranteed that the

+ // function is called in case of network error. Instead,

+ // RenderFrameImpl::didFinishResourceLoad(identifier) and

+ // didFailLoad() are used for successful loading and failed loading,

+ // respectively.

+ static void DidFinishResourceLoad(unsigned identifier);

+ // This does the same thing as DidFinishResourceLoad(), but accepts

+ // response_url.

+ static void DidFinishResourceLoadForUrl(const WebKit::WebURL& response_url);

+private:

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, IsBlockableScheme);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, IsSameSite);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, IsValidCorsHeaderSet);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, SniffForHTML);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, SniffForXML);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, SniffForJSON);

+ FRIEND_TEST_ALL_PREFIXES(SiteIsolationPolicyTest, SniffForJS);

+ // Returns the representative mime type enum value of the mime type

+ // of response. For example, this returns the same value for all

+ // text/xml mime type families such as application/xml,

+ // application/rss+xml.

+ static ResponseMetaData::CanonicalMimeType GetCanonicalMimeType(

+ const WebURLResponse& response);

+ // Returns whether this scheme is a target of XSDP. This returns

Charlie Reis 2013/08/09 18:48:38 Note: we haven't defined XSDP anywhere in the code

nasko 2013/08/09 19:07:28 nit: This is the first time I see XSDP in this fil

dsjang 2013/08/12 22:56:17 Done.

+ // true only for http://* and https:// urls.

+ static bool IsBlockableScheme(const GURL& frame_origin);

+ // Returns whether the two urls belong to the same sites.

+ static bool IsSameSite(const GURL& frame_origin, const GURL& response_url);

+ // Returns whether there's a valid CORS header for

nasko 2013/08/09 19:07:28 nit: Another instance of too much white space at t

dsjang 2013/08/12 22:56:17 Done.

+ // frame_origin. This is simliar to

+ // CrossOriginAccessControl::passesAccessControlCheck(), but we use

+ // sites as our security domain, not origins. TODO(dsjang): this

nasko 2013/08/09 19:07:28 nit: TODO on new line

dsjang 2013/08/12 22:56:17 Done.

+ // must be improved to be more accurate to the actual CORS

+ // specification. For now, this works conservatively, allowing XSDs

+ // that are not allowed by actual CORS rules by ignoring 1)

+ // credentials and 2) methods. Preflight requests don't matter here

+ // since they are not used to decide whether to block a document or

+ // not on the client side.

+ static bool IsValidCorsHeaderSet(GURL& frame_origin,

+ GURL& website_origin,

+ std::string access_control_origin);

+ // Returns whether the given frame is nagivating. When this is true,

Charlie Reis 2013/08/09 18:48:38 typo: navigating

dsjang 2013/08/12 22:56:17 Done.

+ // the frame is requesting is a web page to be loaded.

+ static bool IsFrameNavigating(WebFrame* frame);

+ static bool SniffForHTML(const char* data, size_t length);

+ static bool SniffForXML(const char* data, size_t length);

+ static bool SniffForJSON(const char* data, size_t length);

+ static bool DoSignatureMatching(const char* data,

+ size_t length,

+ const char* signatures[],

+ size_t arr_size);

+ // TODO(dsjang): this is only needed for collecting UMA stat.

nasko 2013/08/09 19:07:28 Is there a reason for the TODO? Are you planning t

dsjang 2013/08/12 22:56:17 They are going to be deleted when deployed to do a

+ static bool SniffForJS(const char* data, size_t length);

+ // TODO(dsjang): this is only needed for collecting UMA stat.

+ static bool IsErrorStatusCode(int status_code);

+ // Maintain bookkeeping data between WillSendRequest() and

+ // DidReceiveResponse(). The key is the identifier of response.

+ static std::map<unsigned, WebURLRequest::TargetType> id_target_map_;

+ // Maintain data between DidReceiveResponse() and DidReceiveData().

+ // The key is the url of response. We can't use identifier anymore

+ // from here since that information is no longer available for

+ // DidReceiveData().

+ static std::map<std::string, ResponseMetaData> url_responsedata_map_;

+ // This maps the identifier of a response to the response's

+ // url. This is used to free ResponseMetaData in

+ // url_responsedata_map_, when DidReceiveData() is never called.

+ static std::map<unsigned, std::string> id_url_map_;

nasko 2013/08/09 19:07:28 If the second member is URL, why are you storing i

dsjang 2013/08/12 22:56:17 I thought that a string representation is more com

+ // Never needs to be constructed/destructed.

+ SiteIsolationPolicy() {}

+ ~SiteIsolationPolicy() {}

+ DISALLOW_COPY_AND_ASSIGN(SiteIsolationPolicy);

+};

+} // namespace content

+#endif // WEBKIT_CHILD_SITE_ISOLATION_POLICY_H_

« content/renderer/render_frame_impl.cc ('K') | « content/renderer/render_frame_impl.cc ('k') | webkit/child/site_isolation_policy.cc » ('j') | webkit/child/site_isolation_policy.cc » ('J')