webkit/child/site_isolation_policy.cc - Issue 22254005: UMA data collector for cross-site documents(XSD)

Side by Side Diff: webkit/child/site_isolation_policy.cc

Issue 22254005: UMA data collector for cross-site documents(XSD) (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@lkgr

Patch Set: Comments & style have been updated. Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webkit/child/site_isolation_policy.h ('K') | « webkit/child/site_isolation_policy.h ('k') | webkit/child/site_isolation_policy_unittest.cc » ('j') | webkit/child/site_isolation_policy_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "webkit/child/site_isolation_policy.h"

	6

	7 #include "base/basictypes.h"

	8 #include "base/logging.h"

	9 #include "base/metrics/histogram.h"

	10 #include "base/strings/string_util.h"

	11 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"

	12 #include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"

	13 #include "third_party/WebKit/public/platform/WebString.h"

	14 #include "third_party/WebKit/public/platform/WebURL.h"

	15 #include "third_party/WebKit/public/platform/WebURLRequest.h"

	16 #include "third_party/WebKit/public/platform/WebURLResponse.h"

	17 #include "third_party/WebKit/public/web/WebDocument.h"

	18 #include "third_party/WebKit/public/web/WebFrame.h"

	19 #include "third_party/WebKit/public/web/WebFrameClient.h"

	20 #include "third_party/WebKit/public/web/WebSecurityOrigin.h"

	21

	22 using base::strncasecmp;

	23 using WebKit::WebDocument;

	24 using WebKit::WebString;

	25 using WebKit::WebURL;

	26 using WebKit::WebURLResponse;

	27 using WebKit::WebURLRequest;

	28

	29

	30 namespace webkit_glue {

	31

	32 std::map<unsigned, WebURLRequest::TargetType>

	33 SiteIsolationPolicy::id_target_map_;

	34 std::map<std::string, ResponseMetaData>

	35 SiteIsolationPolicy::url_responsedata_map_;

	36 std::map<unsigned, std::string> SiteIsolationPolicy::id_url_map_;

	37

	38 void SiteIsolationPolicy::WillSendRequest(

	39 unsigned identifier,

	40 WebURLRequest::TargetType target_type) {

	41 // When identifier already exists in the map, it means that this

	42 // request has been redirected to issue another request. We don't

	43 // overwrite the existing target_type since it becomes

	44 // TargetIsSubresource no matter what the original target_type was.

	45 if (!id_target_map_.count(identifier))

	46 id_target_map_[identifier] = target_type;

	47 }

	48

	49 void SiteIsolationPolicy::DidReceiveResponse(WebKit::WebFrame* frame,

	50 unsigned identifier,

	51 const WebURLResponse& response) {

	52 DCHECK_EQ(id_target_map_.count(identifier),1U);

	53

	54 UMA_HISTOGRAM_COUNTS("SiteIsolation.ALL", 1);
	Charlie Reis 2013/08/09 18:48:38 ALL -> AllResponses ALL -> AllResponses nasko 2013/08/09 19:07:28 nit: TOTAL? nit: TOTAL? dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > ALL -> AllResponses Done. dsjang 2013/08/12 22:56:17 AllResponses seems more informative than TOTAL to AllResponses seems more informative than TOTAL to me. Let me know if you have a better idea. On 2013/08/09 19:07:28, nasko wrote: Show quoted text > nit: TOTAL?
	55

	56 GURL response_url = response.url();

	57 WebURLRequest::TargetType target_type = id_target_map_[identifier];

	58 id_target_map_.erase(identifier);

	59

	60 // See if this is for navigation. If it is, don't block it, under

	61 // the assumption that we will put it in an appropriate process.

	62 if (IsFrameNavigating(frame)) {

	63 LOG(INFO) << "SiteIsolationPolicy.FrameInNavigation";

	64 return;

	65 }

	66

	67 GURL frame_origin(frame->document().securityOrigin().toString());

	68

	69 if (!IsBlockableScheme(frame_origin)) {

	70 LOG(INFO) << "SiteIsolationPolicy.NotNetworkScheme:" << frame_origin;

	71 return;

	72 }

	73

	74 if (IsSameSite(frame_origin, response_url)) {

	75 LOG(INFO) << "SiteIsolationPolicy.SameSite:" << frame_origin << ","

	76 << response_url;

	77 return;

	78 }

	79

	80 ResponseMetaData::CanonicalMimeType canonical_mime_type =

	81 GetCanonicalMimeType(response);

	82

	83 if (canonical_mime_type == ResponseMetaData::Others) {

	84 LOG(INFO) << "SiteIsolationPolicy.mimetype:" << frame_origin << ","

	85 << response_url << ",[" << response.mimeType().utf8() << "]";

	86 return;

	87 }

	88

	89 // Every CORS request should have the Access-Control-Allow-Origin

	90 // header even if it is preceded by a pre-flight request. Therefore,

	91 // if this is a CORS request, it has this header.

	92 std::string access_control_origin = response.httpHeaderField(

	93 WebKit::WebString::fromUTF8("Access-Control-Allow-Origin")).utf8();

	94

	95 if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) {

	96 LOG(INFO) << "SiteIsolationPolicy.CorsIsSafe:";

	97 return;

	98 }

	99

	100 // Real XSD data collection starts from here.

	101 LOG(INFO) << "SiteIsolationPolicy.XSD:from header:" << canonical_mime_type <<

	102 ":" << response_url;

	103

	104 // TODO(dsjang): Apply X-Content-Type option here.

	105 ResponseMetaData resp_data;

	106 resp_data.frame_origin = frame_origin.spec();

	107 resp_data.response_url = response_url.spec();

	108 resp_data.request_identifier = identifier;

	109 resp_data.target_type = target_type;

	110 resp_data.canonical_mime_type = canonical_mime_type;

	111 resp_data.http_status_code = response.httpStatusCode();

	112

	113 url_responsedata_map_[resp_data.response_url] = resp_data;

	114 id_url_map_[identifier] = resp_data.response_url;

	115

	116 return;

	117 }

	118

	119 #define SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \

	120 UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".Blocked", 1); \

	121 if (ok_status_code) { \

	122 UMA_HISTOGRAM_ENUMERATION( \

	123 ""BUCKET_PREFIX".Blocked.OKStatusCode", \

	124 resp_data.target_type, \

	125 WebURLRequest::TargetIsUnspecified + 1); \

	126 } else { \

	127 UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".Blocked.ErrorStatusCode", 1); \

	128 }

	129

	130 #define SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \

	131 UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".NotBlocked", 1); \

	132 if (is_sniffed_for_js) \

	133 UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".NotBlocked.MaybeJS", 1); \

	134

	135 #define SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SNIFF_EXPR,BUCKET_PREFIX) \

	136 if (SNIFF_EXPR) { \

	137 SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \

	138 } else { \

	139 SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \

	140 }

	141

	142 void SiteIsolationPolicy::DidReceiveData(const char* data,

	143 int length,

	144 WebURL& web_response_url) {

	145 // We only record XSDs whose content is actually non-zero.
	Charlie Reis 2013/08/09 18:48:38 Make sure XSDs is spelled out somewhere (perhaps i Make sure XSDs is spelled out somewhere (perhaps in the class-level comment of the .h file). Also, what do you mean by "content is actually non-zero"? The length is non-zero? Or that we didn't see zero responses? nasko 2013/08/09 19:07:28 How do we know the content is non-zero based on th How do we know the content is non-zero based on the URL? dsjang 2013/08/12 22:56:17 This means that we can't see zero responses here. This means that we can't see zero responses here. Moved this comment to .h file. On 2013/08/09 18:48:38, creis wrote: Show quoted text > Make sure XSDs is spelled out somewhere (perhaps in the class-level comment of > the .h file). > > Also, what do you mean by "content is actually non-zero"? The length is > non-zero? Or that we didn't see zero responses? dsjang 2013/08/12 22:56:17 We don't know. What I meant was that DidReceiveDat We don't know. What I meant was that DidReceiveData() is never called for zero responses. On 2013/08/09 19:07:28, nasko wrote: Show quoted text > How do we know the content is non-zero based on the URL?
	146 GURL response_url(web_response_url);

	147

	148 std::string response_url_str = response_url.spec();

	149 if (url_responsedata_map_.count(response_url_str) == 0)

	150 return;

	151

	152 DCHECK_EQ(url_responsedata_map_.count(response_url_str), 1U);

	153 ResponseMetaData resp_data = url_responsedata_map_[response_url_str];

	154 url_responsedata_map_.erase(response_url_str);

	155

	156 // Record the length of the first received network packet to see if

	157 // it's enough for sniffing.

	158 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", length);

	159

	160 // Record the entire number of responses with a specific mime
	Charlie Reis 2013/08/09 18:48:38 number of XSD reponses number of XSD reponses dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > number of XSD reponses Done.
	161 // type(text/html, text/xml, etc).
	Charlie Reis 2013/08/09 18:48:38 nit: space after type nit: space after type dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > nit: space after type Done.
	162 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",

	163 resp_data.canonical_mime_type,

	164 ResponseMetaData::MaxCanonicalMimeType);

	165

	166 // Blocking only happens when the content is sniffed for

	167 // HTML/JSON/XML. So if the status code is an error status code, it
	Charlie Reis 2013/08/09 18:48:38 it -> blocking it it -> blocking it dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > it -> blocking it Done.
	168 // is not disruptive by the following reasons : 1) the blocked

	169 // content is not a binary object (such as an image) since it is

	170 // sniffed as a text document. 2) then, this blocking only breaks
	Charlie Reis 2013/08/09 18:48:38 nit: Either capitalize each sentence or use a semi nit: Either capitalize each sentence or use a semicolon instead of a period between the numbered phrases. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > nit: Either capitalize each sentence or use a semicolon instead of a period > between the numbered phrases. Done.
	171 // the renderer behavior only if it is either JavaScript or

	172 // CSS. However, the renderer doesn't use the contents of JS/CSS

	173 // with unaffected status code(e.g, 404). *) the renderer is
	Charlie Reis 2013/08/09 18:48:38 nit: space after code Also, why ")" rather than " nit: space after code Also, why ")" rather than "3)"?
	174 // expected not to use the cross-site document content for purposes

	175 // other than JS/CSS (e.g, XHR).

	176 bool ok_status_code = !IsErrorStatusCode(resp_data.http_status_code);

	177

	178 // This is only used for measuring false-negative analysis for
	Charlie Reis 2013/08/09 18:48:38 nit: "measuring" and "analysis" are redundant here nit: "measuring" and "analysis" are redundant here. Drop "measuring". dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > nit: "measuring" and "analysis" are redundant here. Drop "measuring". Done.
	179 // non-blocked resources.

	180 bool is_sniffed_for_js = SniffForJS(data, length);

	181

	182 // Record the number of responses whose content is sniffed for what

	183 // its mime type claims it to be. For example, we apply a HTML

	184 // sniffer for a document tagged with text/html here. Whenever this

	185 // check becomes true, we'll block the response.

	186 switch (resp_data.canonical_mime_type) {

	187 case ResponseMetaData::HTML:

	188 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForHTML(data, length),

	189 "SiteIsolation.XSD.MimeType.HTML");
	Charlie Reis 2013/08/09 18:48:38 I don't think we need "MimeType" in the stat name I don't think we need "MimeType" in the stat name (here and below). dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > I don't think we need "MimeType" in the stat name (here and below). Done.
	190 break;

	191 case ResponseMetaData::XML:

	192 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForXML(data, length),

	193 "SiteIsolation.XSD.MimeType.XML");

	194 break;

	195 case ResponseMetaData::JSON:

	196 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForJSON(data, length),

	197 "SiteIsolation.XSD.MimeType.JSON");

	198 break;

	199 case ResponseMetaData::Plain:

	200 if (SniffForHTML(data, length)) {

	201 SITE_ISOLATION_POLICY_COUNT_BLOCK(

	202 "SiteIsolation.XSD.MimeType.Plain.HTML");

	203 } else if (SniffForXML(data, length)) {

	204 SITE_ISOLATION_POLICY_COUNT_BLOCK(

	205 "SiteIsolation.XSD.MimeType.Plain.XML");

	206 } else if (SniffForJSON(data, length)) {

	207 SITE_ISOLATION_POLICY_COUNT_BLOCK(

	208 "SiteIsolation.XSD.MimeType.Plain.JSON");

	209 } else if (is_sniffed_for_js) {

	210 SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(

	211 "SiteIsolation.XSD.MimeType.Plain");

	212 }

	213 break;

	214 default :

	215 DCHECK(false);
	Charlie Reis 2013/08/09 18:48:38 Use NOTREACHED() instead. Use NOTREACHED() instead. nasko 2013/08/09 19:07:28 nit: You can add << "message" to clarify why this nit: You can add << "message" to clarify why this is failing, which will print to the console. It saves a roundtrip to the source code location to figure out the cause of break. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > Use NOTREACHED() instead. Done. dsjang 2013/08/12 22:56:17 This is very useful. Thanks for letting me know th This is very useful. Thanks for letting me know that. On 2013/08/09 19:07:28, nasko wrote: Show quoted text > nit: You can add << "message" to clarify why this is failing, which will print > to the console. It saves a roundtrip to the source code location to figure out > the cause of break.
	216 break;

	217 }

	218 }

	219

	220 #undef SITE_ISOLATION_POLICY_COUNT_BLOCK

	221 #undef SITE_ISOLATION_POLICY_COUNT_NOTBLOCK

	222 #undef SITE_ISOLATION_POLICY_SNIFF_AND_COUNT
	Charlie Reis 2013/08/09 18:48:38 Probably should undef these in reverse order, sinc Probably should undef these in reverse order, since SNIFF_AND_COUNT depends on the others. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > Probably should undef these in reverse order, since SNIFF_AND_COUNT depends on > the others. Done.
	223

	224 void SiteIsolationPolicy::DidFinishResourceLoad(unsigned identifier) {

	225 id_target_map_.erase(identifier);

	226 if (!id_url_map_.count(identifier)) {

	227 url_responsedata_map_.erase(id_url_map_[identifier]);

	228 id_url_map_.erase(identifier);

	229 }

	230 }

	231

	232 void SiteIsolationPolicy::DidFinishResourceLoadForUrl(

	233 const WebKit::WebURL& web_response_url) {

	234 GURL response_url(web_response_url);

	235

	236 if (!url_responsedata_map_.count(response_url.spec())) {

	237 ResponseMetaData meta_data = url_responsedata_map_[response_url.spec()];

	238 url_responsedata_map_.erase(response_url.spec());

	239 id_target_map_.erase(meta_data.request_identifier);

	240 id_url_map_.erase(meta_data.request_identifier);

	241 }

	242 }

	243

	244 ResponseMetaData::CanonicalMimeType SiteIsolationPolicy::GetCanonicalMimeType(

	245 const WebURLResponse& response) {

	246 static const char TEXT_HTML[] = "text/html";
	Charlie Reis 2013/08/09 18:48:38 Constants should be formatted as kTextHtml, etc. Constants should be formatted as kTextHtml, etc. See content_constants.cc for examples. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > Constants should be formatted as kTextHtml, etc. See content_constants.cc for > examples. Done.
	247 static const char TEXT_XML[] = "text/xml";

	248 static const char APP_RSS_XML[] = "application/rss+xml";

	249 static const char APP_XML[] = "application/xml";

	250 static const char APP_JSON[] = "application/json";

	251 static const char TEXT_XJSON[] = "text/x-json";

	252 static const char TEXT_JSON[] = "text/json";

	253 static const char TEXT_PLAIN[] = "text/json";
	Charlie Reis 2013/08/09 18:48:38 Shouldn't this be text/plain? Shouldn't this be text/plain? dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > Shouldn't this be text/plain? Done.
	254

	255 const std::string mime_type = response.mimeType().utf8();

	256

	257 LOG(ERROR) << "mimetype:" << mime_type << "==[" << TEXT_HTML << "]";
	nasko 2013/08/09 19:07:28 Why is this TEXT_HTML there? Wouldn't it always pr Why is this TEXT_HTML there? Wouldn't it always print the same? dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 19:07:28, nasko wrote: > Why is this TEXT_HTML there? Wouldn't it always print the same? Done.
	258

	259 // These are a thorough list of the mime types crawled over the top

	260 // 50k sites related to HTML, XML, JSON, Plain.
	Charlie Reis 2013/08/09 18:48:38 This comment belongs above your constants. This comment belongs above your constants. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > This comment belongs above your constants. Done.
	261 if (LowerCaseEqualsASCII(mime_type, TEXT_HTML)) {

	262 return ResponseMetaData::HTML;

	263 } else if (LowerCaseEqualsASCII(mime_type, TEXT_XML) \|\|
	nasko 2013/08/09 19:07:28 nit: Have you ordered these in order of decreasing nit: Have you ordered these in order of decreasing probability of occurrence? I'd do it to have higher likelihood of short circuiting the if statement. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 19:07:28, nasko wrote: > nit: Have you ordered these in order of decreasing probability of occurrence? > I'd do it to have higher likelihood of short circuiting the if statement. Done.
	264 LowerCaseEqualsASCII(mime_type, APP_RSS_XML) \|\|

	265 LowerCaseEqualsASCII(mime_type, APP_XML)) {

	266 return ResponseMetaData::XML;

	267 } else if (LowerCaseEqualsASCII(mime_type, APP_JSON) \|\|

	268 LowerCaseEqualsASCII(mime_type, TEXT_XJSON) \|\|

	269 LowerCaseEqualsASCII(mime_type, TEXT_JSON)) {

	270 return ResponseMetaData::JSON;

	271 } else if (LowerCaseEqualsASCII(mime_type, TEXT_PLAIN)) {

	272 return ResponseMetaData::Plain;

	273 } else {

	274 return ResponseMetaData::Others;

	275 }

	276 }

	277

	278 bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) {

	279 // We exclude ftp:// from here. FTP doesn't provide a Content-Type

	280 // header which our policy depends on, so we cannot protect any

	281 // document from FTP servers.

	282 return url.SchemeIs("http") \|\| url.SchemeIs("https");

	283 }

	284

	285 bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin,
	Charlie Reis 2013/08/09 18:48:38 This seems like it should be modeled more after Si This seems like it should be modeled more after SiteInstance::IsSameWebSite. You can ignore GetEffectiveURL, but what about URLs that aren't valid? Also, SameDomainOrHost seems like a more convenient check than calling GetDomainAndRegistry twice. I don't think you need an equivalent to IsURLSameAsAnySiteInstance, since the kChromeUI* URLs aren't visible in the renderer.
	286 const GURL& response_url) {

	287 if (frame_origin.scheme() != response_url.scheme())

	288 return false;

	289

	290 // Extract the effective domains (public suffix plus one) of the

	291 // urls.

	292

	293 // TODO(dsjang): Is there any reason why we don't use
	nasko 2013/08/09 19:07:28 This will be good to iron out before we commit the This will be good to iron out before we commit the code. Changing UMA stats semantics later can bring confusion when looking over the full collection. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 19:07:28, nasko wrote: > This will be good to iron out before we commit the code. Changing UMA stats > semantics later can bring confusion when looking over the full collection. Done.
	294 // net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES

	295 // instead of

	296 // net::registry_controlled_domains::EXCLUSE_PRIVATE_REGISTRIES? If

	297 // we allow sites to use their private registries, they can use

	298 // "finer grained" sites than only using public ones.
	Charlie Reis 2013/08/09 18:48:38 Just change this to be a TODO to use INCLUDE_PRIVA Just change this to be a TODO to use INCLUDE_PRIVATE_REGISTRIES when http://crbug.com/7988 is fixed. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > Just change this to be a TODO to use INCLUDE_PRIVATE_REGISTRIES when > http://crbug.com/7988 is fixed. Done.
	299 std::string frame_domain =

	300 net::registry_controlled_domains::GetDomainAndRegistry(

	301 frame_origin,

	302 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);

	303 std::string response_domain =

	304 net::registry_controlled_domains::GetDomainAndRegistry(

	305 response_url,

	306 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);

	307

	308 return frame_domain == response_domain;

	309 }

	310

	311 bool SiteIsolationPolicy::IsFrameNavigating(WebKit::WebFrame* frame) {

	312 // When a navigation starts, frame->provisionalDataSource() is set

	313 // to a not-null value which stands for the request made for the

	314 // navigation. As soon as the network request is committed to the

	315 // frame, frame->provisionalDataSource() is converted to null, and

	316 // the committed data source is moved to frame->dataSource(). This

	317 // is the most reliable way to detect whether the frame is in

	318 // navigation or not by far.
	Charlie Reis 2013/08/09 18:48:38 nit: drop "by far" nit: drop "by far" nasko 2013/08/09 19:07:28 Awesome comment! Thank you! I've learned something Awesome comment! Thank you! I've learned something new : ). dsjang 2013/08/12 22:56:17 Thank you here! I'm so glad that you got something Thank you here! I'm so glad that you got something from reviewing my code. On 2013/08/09 19:07:28, nasko wrote: Show quoted text > Awesome comment! Thank you! I've learned something new : ). dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 19:07:28, nasko wrote: > Awesome comment! Thank you! I've learned something new : ). Done. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > nit: drop "by far" Done.
	319 return frame->provisionalDataSource() != NULL;

	320 }

	321

	322 bool SiteIsolationPolicy::IsValidCorsHeaderSet(

	323 GURL& frame_origin,

	324 GURL& website_origin,

	325 std::string access_control_origin) {

	326

	327 size_t access_control_origin_len = access_control_origin.size();

	328

	329 // TODO(dsjang): Is this actually true? The server seems to return
	Charlie Reis 2013/08/09 18:48:38 Can we resolve this TODO? Can we resolve this TODO? dsjang 2013/08/12 22:56:17 third_party/WebKit/Source/core/loader/CrossOriginA third_party/WebKit/Source/core/loader/CrossOriginAccessControl.cpp is not using this heuristic. So removed this line. On 2013/08/09 18:48:38, creis wrote: Show quoted text > Can we resolve this TODO?
	330 // an empty string or "null".

	331 if (access_control_origin_len == 0)

	332 return false;

	333

	334 // Many websites are sending back "\"\"" instead of "". This is

	335 // non-standard practice, and seems not supported by the

	336 // brwoser. Refer to

	337 // CrossOriginAccessControl::passesAccessControlCheck().

	338

	339 // TODO(dsjang): * is not allowed for the response from a request

	340 // with cookies. This allows for more than what the renderer will

	341 // eventually be able to receive, so we won't see illegal cross-site

	342 // documents alllowed by this. We have to have t a way to see if
	Charlie Reis 2013/08/09 18:48:38 typos: alllowed, t typos: alllowed, t nasko 2013/08/09 19:07:28 nit: Not sure what "t a way" stands for. To find a nit: Not sure what "t a way" stands for. To find a way? dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 19:07:28, nasko wrote: > nit: Not sure what "t a way" stands for. To find a way? Done. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > typos: alllowed, t Done.
	343 // this response is from a cookie-tagged request or not in the

	344 // future.

	345 if (access_control_origin == "*")

	346 return true;

	347

	348 // TODO(dsjang): The CORS spec only treats a fully specified URL,

	349 // except for "*", but many websites are using just a domain for

	350 // access_control_origin, and this is blocked by Webkit's CORS logic

	351 // here : CrossOriginAccessControl::passesAccessControlCheck()

	352

	353 // We don't use Webkit's existing CORS policy implementation since

	354 // their policy works in terms of origins, not sites. For

	355 // example, when frame is sub.a.com and it is not allowed to access

	356 // a document with sub1.a.com. But under Site Isolation, it's

	357 // allowed.
	Charlie Reis 2013/08/09 18:48:38 Please move this comment to the top of this functi Please move this comment to the top of this function. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > Please move this comment to the top of this function. Done.
	358

	359 // TODO(dsjang): examine createFromString()'s behavior for a URL

	360 // containing * in it.

	361 WebKit::WebSecurityOrigin cors_security_origin =
	Charlie Reis 2013/08/09 18:48:38 Why send this through WebSecurityOrigin if we're j Why send this through WebSecurityOrigin if we're just converting it back to a GURL? dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > Why send this through WebSecurityOrigin if we're just converting it back to a > GURL? Done.
	362 WebKit::WebSecurityOrigin::createFromString(

	363 WebKit::WebString::fromUTF8(access_control_origin));

	364 GURL cors_origin(cors_security_origin.toString().utf8());

	365

	366 LOG(ERROR) << cors_security_origin.toString().utf8();
	Charlie Reis 2013/08/09 18:48:38 We'll need to get rid of all the log statements be We'll need to get rid of all the log statements before committing. dsjang 2013/08/12 22:56:17 Sure. I'll do that when the CL is ready for commit Sure. I'll do that when the CL is ready for committing. On 2013/08/09 18:48:38, creis wrote: Show quoted text > We'll need to get rid of all the log statements before committing.
	367 return IsSameSite(frame_origin, cors_origin);

	368 }

	369

	370 bool SiteIsolationPolicy::SniffForHTML(const char* data, size_t length) {
	Charlie Reis 2013/08/09 18:48:38 Can you put a comment somewhere saying which funct Can you put a comment somewhere saying which function this code is based on? dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > Can you put a comment somewhere saying which function this code is based on? Done.
	371 // TODO(dsjang): The content sniffer used by Chrome and Firefox are
	Charlie Reis 2013/08/09 18:48:38 This part of the comment isn't a TODO. However, i This part of the comment isn't a TODO. However, it may be worth adding a TODO to parameterize the original sniffing logic based on whether to include <!-- as a valid signature or not, so that we can remove this function. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > This part of the comment isn't a TODO. However, it may be worth adding a TODO > to parameterize the original sniffing logic based on whether to include <!-- as > a valid signature or not, so that we can remove this function. Done.
	372 // using "<!--" as one of the HTML signatures, but it also appears

	373 // in valid JavaScript, considered as well-formed JS by the browser.

	374 // Since we do not want to block any JS, we exclude it from our HTML

	375 // signatures. This can weaken our document block policy, but we can

	376 // break less websites.

	377 const char* html_signatures[] = {"<!DOCTYPE html", // HTML5 spec

	378 "<script", // HTML5 spec, Mozilla

	379 "<html", // HTML5 spec, Mozilla

	380 "<head", // HTML5 spec, Mozilla

	381 "<iframe", // Mozilla

	382 "<h1", // Mozilla

	383 "<div", // Mozilla

	384 "<font", // Mozilla

	385 "<table", // Mozilla

	386 "<a", // Mozilla

	387 "<style", // Mozilla

	388 "<title", // Mozilla

	389 "<b", // Mozilla

	390 "<body", // Mozilla

	391 "<br", "<p" // Mozilla

	392 };

	393 return DoSignatureMatching(

	394 data, length, html_signatures, arraysize(html_signatures));

	395 }

	396

	397 bool SiteIsolationPolicy::SniffForXML(const char* data, size_t length) {

	398 const char* xml_signatures[] = {"<?xml" // Mozilla

	399 };

	400 return DoSignatureMatching(

	401 data, length, xml_signatures, arraysize(xml_signatures));

	402 }

	403

	404 bool SiteIsolationPolicy::SniffForJSON(const char* data, size_t length) {

	405 // TODO(dsjang): We have to come up with a better way to sniff

	406 // JSON. However, even RE cannot help us that much due to the fact

	407 // that we don't do full parsing. This DFA starts with state 0, and

	408 // finds 1) {, 2) "or', 3) : in the order. This is intentionally not
	Charlie Reis 2013/08/09 18:48:38 finds {, "/', and : in that order. finds {, "/', and : in that order. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > finds {, "/', and : in that order. Done.
	409 // using a regular expression library so that we can make the

	410 // trusted code base as small as possible. State 4 is a dead state.
	Charlie Reis 2013/08/09 18:48:38 I'd change this sentence to just say that we're av I'd change this sentence to just say that we're avoiding adding a dependency on a regular expression library. The trusted computing base argument could go either way: a library would be more vetted than this hand-coded parser but also larger and more complex. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > I'd change this sentence to just say that we're avoiding adding a dependency on > a regular expression library. The trusted computing base argument could go > either way: a library would be more vetted than this hand-coded parser but also > larger and more complex. Done.
	411 const int INIT_ST = 0;
	nasko 2013/08/09 19:07:28 nit: I'd use less abbreviation in these, but it is nit: I'd use less abbreviation in these, but it is up to you.
	412 const int LBRACE_ST = 1;

	413 const int LQUOTE_ST = 2;

	414 const int COLON_ST = 3;

	415 const int DEAD_ST = 4;

	416

	417 int state = INIT_ST;

	418 for (size_t i = 0; i < length && state < COLON_ST; ++i, ++data) {

	419 const char c = *data;

	420 if (c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n')

	421 continue;

	422

	423 switch (state) {

	424 case INIT_ST:

	425 if (c == '{')

	426 state = LBRACE_ST;

	427 else

	428 state = DEAD_ST;

	429 break;

	430 case LBRACE_ST:

	431 if (c == '\"' \|\| c == '\'')

	432 state = LQUOTE_ST;

	433 else

	434 state = DEAD_ST;

	435 break;

	436 case LQUOTE_ST:

	437 if (c == ':') {
	Charlie Reis 2013/08/09 18:48:38 nit: No braces needed. nit: No braces needed. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > nit: No braces needed. Done.
	438 state = COLON_ST;

	439 }

	440 break;

	441 default:
	Charlie Reis 2013/08/09 18:48:38 NOTREACHED() NOTREACHED() dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > NOTREACHED() Done.
	442 break;

	443 }

	444 }

	445 return state == COLON_ST;

	446 }

	447

	448 bool SiteIsolationPolicy::DoSignatureMatching(const char* data,
	Charlie Reis 2013/08/09 18:48:38 MatchesSignature might be a better name. MatchesSignature might be a better name.
	449 size_t length,

	450 const char* signatures[],

	451 size_t arr_size) {

	452 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {

	453 const char* signature = signatures[sig_index];

	454 size_t signature_length = strlen(signature);

	455 size_t i = 0;

	456 // Skip the white characters at the beginning of the document.

	457 for (i = 0; i < length; ++i) {

	458 char c = *data;

	459 if (!(c == ' ' \|\| c == '\r' \|\| c == '\n' \|\| c == '\t')) {
	Charlie Reis 2013/08/09 18:48:38 nit: No braces needed on one-line clause. nit: No braces needed on one-line clause. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > nit: No braces needed on one-line clause. Done.
	460 break;

	461 }

	462 ++data;
	nasko 2013/08/09 19:07:28 Why are you modifying the input parameter? You als Why are you modifying the input parameter? You also don't reset it across outside for loop iterations, so this seems wrong on first glance. dsjang 2013/08/12 22:56:17 Thanks a lot!! Done. Show quoted text On 2013/08/09 19:07:28, nasko wrote: > Why are you modifying the input parameter? You also don't reset it across > outside for loop iterations, so this seems wrong on first glance. Thanks a lot!! Done.
	463 }

	464 length = length - i;

	465 if (length < signature_length)
	nasko 2013/08/09 19:07:28 Is there a reason why we can't put this before we Is there a reason why we can't put this before we start comparing? If we don't have enough data from the wire to compare to the current signature, we shouldn't even try it, right? dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 19:07:28, nasko wrote: > Is there a reason why we can't put this before we start comparing? If we don't > have enough data from the wire to compare to the current signature, we shouldn't > even try it, right? Done.
	466 continue;

	467 if (!base::strncasecmp(signature, data, signature_length)) {

	468 return true;

	469 }

	470 }

	471 return false;

	472 }

	473

	474 bool SiteIsolationPolicy::IsErrorStatusCode(int status_code) {
	Charlie Reis 2013/08/09 18:48:38 The only place you use this is when deciding if it The only place you use this is when deciding if it's renderable, so it seems strange to invert the result. Perhaps this should be IsRenderableStatusCode instead of IsErrorStatusCode. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > The only place you use this is when deciding if it's renderable, so it seems > strange to invert the result. Perhaps this should be IsRenderableStatusCode > instead of IsErrorStatusCode. Done.
	475 // Chrome only uses the content of a response with one of these

	476 // status codes for CSS/JavaScript. For images, Chrome just ignores

	477 // status code.

	478 const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, 301, 302,

	479 303, 305, 306, 307};

	480 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {

	481 if (renderable_status_code[i] == status_code)

	482 return false;

	483 }

	484 return true;

	485 }

	486

	487 bool SiteIsolationPolicy::SniffForJS(const char* data, size_t length) {

	488 // TODO(dsjang): This is a real hacking. The only purpose of this
	Charlie Reis 2013/08/09 18:48:38 hacking -> hack hacking -> hack dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > hacking -> hack Done.
	489 // function is to try to see if there's any possibility that this

	490 // data can be JavaScript.(superset of JS). This function will be
	Charlie Reis 2013/08/09 18:48:38 nit: space, not period, after JavaScript nit: space, not period, after JavaScript dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > nit: space, not period, after JavaScript Done.
	491 // removed for the production code.
	Charlie Reis 2013/08/09 18:48:38 will be removed once UMA stats are gathered. will be removed once UMA stats are gathered. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > will be removed once UMA stats are gathered. Done.
	492

	493 // Search for "var " for JS detection. :-)

	494 for (size_t i = 0; i < length - 3; ++i) {

	495 if (strncmp(data, "var ", 4) == 0) {
	Charlie Reis 2013/08/09 18:48:38 nit: No brace. nit: No brace. dsjang 2013/08/12 22:56:17 Done. Show quoted text On 2013/08/09 18:48:38, creis wrote: > nit: No brace. Done.
	496 return true;

	497 }

	498 ++data;

	499 }

	500 return false;

	501 }

	502

	503 } // namespace webkit_glue

OLD	NEW