webkit/child/site_isolation_policy.cc - Issue 22254005: UMA data collector for cross-site documents(XSD)

Side by Side Diff: webkit/child/site_isolation_policy.cc

Issue 22254005: UMA data collector for cross-site documents(XSD) (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@lkgr

Patch Set: Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "webkit/child/site_isolation_policy.h"

	6

	7 #include "base/basictypes.h"

	8 #include "base/logging.h"

	9 #include "base/metrics/histogram.h"

	10 #include "base/strings/string_util.h"

	11 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"

	12 #include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"

	13 #include "third_party/WebKit/public/platform/WebString.h"

	14 #include "third_party/WebKit/public/platform/WebURL.h"

	15 #include "third_party/WebKit/public/platform/WebURLRequest.h"

	16 #include "third_party/WebKit/public/platform/WebURLResponse.h"

	17 #include "third_party/WebKit/public/web/WebDocument.h"

	18 #include "third_party/WebKit/public/web/WebFrame.h"

	19 #include "third_party/WebKit/public/web/WebFrameClient.h"

	20 #include "third_party/WebKit/public/web/WebSecurityOrigin.h"

	21

	22 using WebKit::WebURLResponse;

	23 using WebKit::WebURLRequest;

	24 using WebKit::WebURL;

	25 using WebKit::WebString;

	26 using WebKit::WebDocument;

	27

	28 namespace webkit_glue {

	29

	30 std::map<unsigned, WebURLRequest::TargetType>
	nasko 2013/08/06 17:29:27 Why are these needed in this file? They are alread Why are these needed in this file? They are already in the header as private variables. dsjang 2013/08/07 00:19:07 Done. Show quoted text On 2013/08/06 17:29:27, nasko wrote: > Why are these needed in this file? They are already in the header as private > variables. Done.
	31 SiteIsolationPolicy::id_target_map_;

	32 std::map<std::string, ResponseMetaData>

	33 SiteIsolationPolicy::url_responsedata_map_;

	34 std::map<unsigned, std::string> SiteIsolationPolicy::id_url_map_;

	35

	36 void SiteIsolationPolicy::WillSendRequest(

	37 unsigned identifier,

	38 WebURLRequest::TargetType target_type) {

	39 // This happens when the original request is redirected.

	40 if (id_target_map_.count(identifier) != 0) {

	41 // This check actually can fail. If it is, which target_type do we

	42 // have to record between the old one and the new one? When

	43 // redirection happens, target_type becomes 2. TODO(dsjang):

	44 // let's disable this code and see what happens on onclickads.com

	45 // for googleads JavaScript code assigned to an image.

	46 if (id_target_map_[identifier] != target_type) {

	47 id_target_map_[identifier] = target_type;

	48 }

	49 }

	50 id_target_map_[identifier] = target_type;

	51 }

	52

	53 void SiteIsolationPolicy::DidReceiveResponse(WebKit::WebFrame* frame,

	54 unsigned identifier,

	55 const WebURLResponse& response) {

	56
	nasko 2013/08/06 17:29:27 nit: no need for empty line here. nit: no need for empty line here. dsjang 2013/08/07 00:19:07 Done. Show quoted text On 2013/08/06 17:29:27, nasko wrote: > nit: no need for empty line here. Done.
	57 DCHECK(id_target_map_.count(identifier) == 1);

	58

	59 UMA_HISTOGRAM_COUNTS("XSDP.ALL", 1);

	60

	61 GURL response_url = response.url();

	62 WebURLRequest::TargetType target_type = id_target_map_[identifier];

	63 id_target_map_.erase(identifier);

	64

	65 // See if this is for navigation. If it is, let it pass.

	66 if (IsFrameNotCommitted(frame)) {

	67 LOG(INFO) << "SiteIsolationPolicy.FrameNotCommitted";

	68 return;

	69 }

	70

	71 GURL frame_origin(frame->document().securityOrigin().toString().utf8());

	72

	73 // TODO(dsjang): Find out all non-network scheme here.

	74 // If it's the data: scheme, we can let it pass through.

	75 if (IsSafeScheme(frame_origin)) {

	76 LOG(INFO) << "SiteIsolationPolicy.SafeScheme:" << frame_origin;

	77 return;

	78 }

	79

	80 if (IsSameSite(frame_origin, response_url)) {

	81 LOG(INFO) << "SiteIsolationPolicy.SameSite:" << frame_origin << ","

	82 << response_url;

	83 return;

	84 }

	85

	86 ResponseMetaData::CanonicalMimeType canonical_mime_type =

	87 GetCanonicalMimeType(response);

	88

	89 if (canonical_mime_type == ResponseMetaData::IsOthers) {

	90 LOG(INFO) << "SiteIsolationPolicy.mimetype:" << frame_origin << ","

	91 << response_url << "," << response.mimeType().utf8();

	92 return;

	93 }

	94

	95 std::string access_control_origin = response

	96 .httpHeaderField(

	97 WebKit::WebString::fromUTF8("Access-Control-Allow-Origin")).utf8();

	98

	99 if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) {

	100 LOG(INFO) << "SiteIsolationPolicy.CorsisSafe:";
	nasko 2013/08/06 17:29:27 nit: CorsIsSafe nit: CorsIsSafe dsjang 2013/08/07 00:19:07 Done. Show quoted text On 2013/08/06 17:29:27, nasko wrote: > nit: CorsIsSafe Done.
	101 return;

	102 }

	103

	104 // Real data collection starts from here.

	105 //

	106 // XSDP.XSD.%MIMECODE is a shortened name for

	107 // XSDP.All.NNav.NSafeScheme.NSMIMEType.NSCORS.%MIMECODE from now
	nasko 2013/08/06 17:29:27 What is the meaning of this string? Putting a comm What is the meaning of this string? Putting a comment that outlines how the UMA stats are organized and their meaning will be very very useful. Without that, I'm somewhat lost in subsequent code. dsjang 2013/08/07 00:19:07 Done. Show quoted text On 2013/08/06 17:29:27, nasko wrote: > What is the meaning of this string? Putting a comment that outlines how the UMA > stats are organized and their meaning will be very very useful. Without that, > I'm somewhat lost in subsequent code. Done.
	108 // on.

	109

	110 LOG(INFO) << "SiteIsolationPolicy.XSD!!!:" << response_url;

	111

	112 ResponseMetaData metaData;

	113 metaData.frame_origin = frame_origin.spec();

	114 metaData.response_url = response_url.spec();

	115 metaData.identifier = identifier;

	116 metaData.target_type = target_type;

	117 metaData.canonical_mime_type = canonical_mime_type;

	118 metaData.http_status_code = response.httpStatusCode();

	119

	120 url_responsedata_map_[metaData.response_url] = metaData;

	121 id_url_map_[identifier] = metaData.response_url;

	122

	123 return;

	124 }

	125

	126 void SiteIsolationPolicy::DidReceiveData(const char* data,

	127 int length,

	128 WebURL& web_response_url) {

	129 GURL response_url(web_response_url);

	130

	131 std::string response_url_str = response_url.spec();

	132 if (url_responsedata_map_.count(response_url_str) == 0)
	nasko 2013/08/06 17:29:27 Is this a valid case? When will we have seen a req Is this a valid case? When will we have seen a request, but receive no response? dsjang 2013/08/07 00:19:07 url_responsedata_map_ only maintains url for cross url_responsedata_map_ only maintains url for cross-site responses detected by DidReceiveResponse(). Therefore, if the url for received data does not exist in url_responsedata_map_, it means that this data is not for a cross-site response. So we skip the check. On 2013/08/06 17:29:27, nasko wrote: Show quoted text > Is this a valid case? When will we have seen a request, but receive no response?
	133 return;

	134

	135 // Record the length of the first received network packet to see if

	136 // it's enough for sniffing.

	137 UMA_HISTOGRAM_COUNTS("XSDP.XSD.DataLength", length);

	138

	139 DCHECK(url_responsedata_map_.count(response_url_str) == 1);

	140 ResponseMetaData metaData = url_responsedata_map_[response_url_str];

	141 url_responsedata_map_.erase(response_url_str);

	142

	143 std::string uma_bucket_name("XSDP.XSD.");

	144 uma_bucket_name.append(ResponseMetaData::CanonicalMimeTypeToString(

	145 metaData.canonical_mime_type));

	146 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	147

	148 // TODO(dsjang): sometimes the length of payload can be not enough to do

	149 // correct content sniffing. If that happens, put it into a buffer

	150 // so that we can do it later.

	151 bool verified_for_blocking = false;

	152 switch (metaData.canonical_mime_type) {

	153 case ResponseMetaData::IsHTML:

	154 if (SniffForHTML(data, length)) {

	155 uma_bucket_name.append(".Verified");

	156 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	157 verified_for_blocking = true;

	158 }

	159 break;

	160 case ResponseMetaData::IsXML:

	161 if (SniffForXML(data, length)) {

	162 uma_bucket_name.append(".Verified");

	163 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	164 verified_for_blocking = true;

	165 }

	166 break;

	167 case ResponseMetaData::IsJSON:

	168 if (SniffForJSON(data, length)) {

	169 uma_bucket_name.append(".Verified");

	170 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	171 verified_for_blocking = true;

	172 }

	173 break;

	174 case ResponseMetaData::IsPlain:

	175 if (SniffForHTML(data, length)) {

	176 uma_bucket_name.append(".Verified.HTML");

	177 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	178 verified_for_blocking = true;

	179 } else if (SniffForXML(data, length)) {

	180 uma_bucket_name.append(".Verified.XML");

	181 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	182 verified_for_blocking = true;

	183 } else if (SniffForJSON(data, length)) {

	184 uma_bucket_name.append(".Verified.JSON");

	185 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	186 verified_for_blocking = true;

	187 }

	188 break;

	189 case ResponseMetaData::IsOthers:

	190 DCHECK(false);

	191 break;

	192 }

	193

	194 // We block these. See how many of them have unaffected status code.

	195 if (verified_for_blocking) {

	196 if (UnaffectedStatusCode(metaData.http_status_code)) {

	197 // This is a blocking that does not affect the browser behavior

	198 // by the following reasons : 1) this is not a binary object

	199 // (such as an image) since this is sniffed as a text

	200 // document. 2) then, this blocking only breaks the renderer

	201 // behavior only if it is either JavaScript or CSS. However, the

	202 // renderer doesn't use the contents of JS/CSS with unaffected

	203 // status code(e.g, 404). *) the renderer is expected not to use

	204 // the cross-site document content for purposes other than

	205 // JS/CSS (e.g, XHR).

	206 uma_bucket_name.append(".UnaffectedStatusCode.");

	207 std::stringstream stat_code_strm;

	208 LOG(INFO) << "Blocked:UNAFFECTED STAT CODE:" << metaData.http_status_code;

	209 stat_code_strm << metaData.http_status_code;

	210 uma_bucket_name.append(stat_code_strm.str());

	211 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	212 } else {

	213 LOG(INFO) << "Blocked:AFFECTED STAT CODE:" << metaData.http_status_code;

	214 // This blocking can be disruptive if it was actually JS, and

	215 // requested for JS.

	216 uma_bucket_name.append(".NUnaffectedStatusCode.");

	217 uma_bucket_name.append(

	218 ResponseMetaData::TargetTypeToString(metaData.target_type));

	219 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	220 if (SniffForJS(data, length)) {

	221 // This shows if this blocking can be JS.

	222 uma_bucket_name.append(".MaybeJS");

	223 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	224 }

	225 }

	226 } else {

	227 LOG(INFO) << "Not Blocked:sniffing failed:";

	228 // Not blocked. How many of them can be JS? This is only useful

	229 // for studying non-blocked documents.

	230 if (SniffForJS(data, length)) {

	231 uma_bucket_name.append(".NVerified.MaybeJS");

	232 UMA_HISTOGRAM_COUNTS(uma_bucket_name.data(), 1);

	233 }

	234 }

	235 }

	236

	237 void SiteIsolationPolicy::DidFinishResourceLoad(unsigned identifier) {

	238 id_target_map_.erase(identifier);

	239 if (id_url_map_.count(identifier) > 0) {

	240 url_responsedata_map_.erase(id_url_map_[identifier]);

	241 id_url_map_.erase(identifier);

	242 }

	243 }

	244

	245 void SiteIsolationPolicy::DidFinishResourceLoad(

	246 WebKit::WebURL& web_response_url) {

	247 GURL response_url(web_response_url);

	248

	249 if (url_responsedata_map_.count(response_url.spec()) > 0) {

	250 ResponseMetaData meta_data = url_responsedata_map_[response_url.spec()];

	251 url_responsedata_map_.erase(response_url.spec());

	252 id_target_map_.erase(meta_data.identifier);

	253 id_url_map_.erase(meta_data.identifier);

	254 }

	255 }

	256

	257 ResponseMetaData::CanonicalMimeType SiteIsolationPolicy::GetCanonicalMimeType(

	258 const WebURLResponse& response) {

	259 // RFC 2045 says: "The type, subtype, and parameter names are not

	260 // case sensitive." If you have a MIME type of text/plain that's a

	261 // type of text and a subtype of plain. So, per the spec, these are

	262 // not case sensitive.

	263 std::string mime_type = response.mimeType().utf8();

	264 StringToLowerASCII(&mime_type);
	nasko 2013/08/06 17:29:27 The mime_type string is UTF8, yet you are using AS The mime_type string is UTF8, yet you are using ASCII function. This doesn't seem right. dsjang 2013/08/07 00:19:07 I found that all the crawled mime types are origin I found that all the crawled mime types are originally lowercase on the top 50k sites. And existing mime type handling code doesn't do lowercasing. I'll remove the StringToLowerASCII call entirely here. On 2013/08/06 17:29:27, nasko wrote: Show quoted text > The mime_type string is UTF8, yet you are using ASCII function. This doesn't > seem right.
	265

	266 const char* const document_mime_types[] = {

	267 "text/html", "text/xml", "application/rss+xml", "application/xml",

	268 "application/json", "text/x-json", "text/json", "text/plain"};

	269 size_t i = 0;

	270 for (i = 0; i < 8; ++i) {

	271 if (!strcmp(document_mime_types[i], mime_type.data())) {
	nasko 2013/08/06 17:29:27 This is unsafe comparison, you should be bounding This is unsafe comparison, you should be bounding memory access by the size of the inputs. dsjang 2013/08/07 00:19:07 Switched to std::string::operator==(). On 2013/08 Switched to std::string::operator==(). On 2013/08/06 17:29:27, nasko wrote: Show quoted text > This is unsafe comparison, you should be bounding memory access by the size of > the inputs.
	272 break;

	273 }

	274 }

	275

	276 if (i == 0) {
	nasko 2013/08/06 17:29:27 Using constants like these seems unclean to me. Is Using constants like these seems unclean to me. Is there a better way to structure this? dsjang 2013/08/07 00:19:07 Done. Show quoted text On 2013/08/06 17:29:27, nasko wrote: > Using constants like these seems unclean to me. Is there a better way to > structure this? Done.
	277 return ResponseMetaData::IsHTML;

	278 } else if (1 <= i && i < 4) {

	279 return ResponseMetaData::IsXML;

	280 } else if (4 <= i && i < 7) {

	281 return ResponseMetaData::IsJSON;

	282 } else if (i == 7) {

	283 return ResponseMetaData::IsPlain;

	284 } else {

	285 return ResponseMetaData::IsOthers;

	286 }

	287 }

	288

	289 bool SiteIsolationPolicy::IsSafeScheme(GURL& url) {
	nasko 2013/08/06 17:29:27 What about blob URLs? Are those safe too? What about blob URLs? Are those safe too? dsjang 2013/08/07 00:19:07 Switched to blacklisting from whitelisting. On 20 Switched to blacklisting from whitelisting. On 2013/08/06 17:29:27, nasko wrote: Show quoted text > What about blob URLs? Are those safe too?
	290 return url.scheme() == "data";

	291 }

	292

	293 bool SiteIsolationPolicy::IsSameSite(GURL& frame_origin, GURL& response_url) {

	294 if (frame_origin.scheme() != response_url.scheme())

	295 return false;

	296

	297 // Extract the effective domains (public suffix plus one) of the

	298 // urls.

	299 std::string frame_domain =

	300 net::registry_controlled_domains::GetDomainAndRegistry(

	301 frame_origin,

	302 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
	nasko 2013/08/06 17:29:27 Is there a reason we are deviating from how SiteIn Is there a reason we are deviating from how SiteInstance does same site checks? The code there does exclude private registries. dsjang 2013/08/07 00:19:07 I thought allowing private registries here enables I thought allowing private registries here enables sites to use a finer definition of sites. Knowing SiteIsolation is using only the public registry, I'll switch to the public one. On 2013/08/06 17:29:27, nasko wrote: Show quoted text > Is there a reason we are deviating from how SiteInstance does same site checks? > The code there does exclude private registries.
	303 std::string response_domain =

	304 net::registry_controlled_domains::GetDomainAndRegistry(

	305 response_url,

	306 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);

	307

	308 return frame_domain == response_domain;

	309 }

	310

	311 bool SiteIsolationPolicy::IsFrameNotCommitted(WebKit::WebFrame* frame) {
	nasko 2013/08/06 17:29:27 This name is a bit confusing, can you put a descri This name is a bit confusing, can you put a description for it? dsjang 2013/08/07 00:19:07 Done. Show quoted text On 2013/08/06 17:29:27, nasko wrote: > This name is a bit confusing, can you put a description for it? Done.
	312 return frame->provisionalDataSource() != NULL;

	313 }

	314

	315 bool SiteIsolationPolicy::IsValidCorsHeaderSet(

	316 GURL& frame_origin,

	317 GURL& website_origin,

	318 std::string access_control_origin) {

	319

	320 size_t access_control_origin_len = access_control_origin.size();

	321

	322 // TODO(dsjang): Is this actually true? The server seems to return

	323 // an empty string or "null".

	324 if (access_control_origin_len == 0)

	325 return false;

	326

	327 // Strip quotes off from it. This is non-standard practice, but many
	nasko 2013/08/06 17:29:27 Don't we have code that parses the header that we Don't we have code that parses the header that we can reuse? I'm always worried about duplicating parsing logic. dsjang 2013/08/07 00:19:07 Done. Show quoted text On 2013/08/06 17:29:27, nasko wrote: > Don't we have code that parses the header that we can reuse? I'm always worried > about duplicating parsing logic. Done.
	328 // websites use quote strings surrounding the actual header value.

	329 if (access_control_origin_len > 2) {

	330 char first = access_control_origin[0];

	331 char last = access_control_origin[access_control_origin_len - 1];

	332 if ((first == '\"' && last == '\"') \|\| (first == '\'' && last == '\'')) {

	333 access_control_origin =

	334 access_control_origin.substr(1, access_control_origin_len - 2);

	335 }

	336 }

	337

	338 // TODO(dsjang): * is not allowed for the response from a request

	339 // with cookies. This allows for more than what the renderer will

	340 // eventually be able to receive, so we won't see illegal cross-site

	341 // documents alllowed by this. We have to have t a way to see if

	342 // this response is from a cookie-tagged request or not in the

	343 // future.

	344 if (access_control_origin == "*")
	nasko 2013/08/06 17:29:27 You talk about cookie-tagged requests, but there i You talk about cookie-tagged requests, but there is no such check. Am I missing something? dsjang 2013/08/07 00:19:07 Yes, it has to be incorporated here in the future, Yes, it has to be incorporated here in the future, but we allow for them to pass here even thought they can be blocked by the actual CORS policy. On 2013/08/06 17:29:27, nasko wrote: Show quoted text > You talk about cookie-tagged requests, but there is no such check. Am I missing > something?
	345 return true;

	346

	347 // TODO(dsjang): The CORS spec only treats a fully specified URL,

	348 // not just a domain here. Confirm this ad-hoc rule to be

	349 // correct. If this doesn't start with a scheme(http://, https://),
	nasko 2013/08/06 17:29:27 This sounds scary. Is this correct? This sounds scary. Is this correct? dsjang 2013/08/07 00:19:07 This was actually scarily wrong :-). I found out t This was actually scarily wrong :-). I found out the Blink's CORS implemtation doesn't allow it. This code will be gone too. On 2013/08/06 17:29:27, nasko wrote: Show quoted text > This sounds scary. Is this correct?
	350 // it inherits the site's scheme.

	351 if (access_control_origin.find("http://") != 0 &&

	352 access_control_origin.find("https://") != 0) {

	353 access_control_origin.insert(0, website_origin.scheme() + "://");

	354 }

	355

	356 LOG(ERROR) << access_control_origin;

	357

	358 // We don't use Webkit's

	359 // frame->securityOrigin().canAccess(WebSecurityOrigin::createFromString(acc

	360 // ess_control_origin)))here since their .canAccess works in terms of origins,

	361 // not sites. For example, when frame is sub.a.com and it is not allowed

	362 // to access a document with sub1.a.com. But under Site Isolation,

	363 // it's allowed.

	364

	365 // TODO(dsjang): examine createFromString()'s behavior for a URL

	366 // containing * in it.

	367 WebKit::WebSecurityOrigin cors_security_origin =

	368 WebKit::WebSecurityOrigin::createFromString(

	369 WebKit::WebString::fromUTF8(access_control_origin));

	370 GURL cors_origin(cors_security_origin.toString().utf8());

	371

	372 LOG(ERROR) << cors_security_origin.toString().utf8();

	373 return IsSameSite(frame_origin, cors_origin);

	374 }

	375

	376 bool SiteIsolationPolicy::SniffForHTML(const char* data, size_t length) {

	377 // TODO(dsjang): The content sniffer used by Chrome and Firefox are

	378 // using "<!--" as one of the HTML signatures, but it also appears

	379 // in valid JavaScript, considered as well-formed JS by the browser.

	380 // Since we do not want to block any JS, we exclude it from our HTML

	381 // signatures. This can weaken our document block policy, but we can

	382 // break less websites.

	383 const char* html_signatures[] = {"<!DOCTYPE html", // HTML5 spec

	384 "<script", // HTML5 spec, Mozilla

	385 "<html", // HTML5 spec, Mozilla

	386 "<head", // HTML5 spec, Mozilla

	387 "<iframe", // Mozilla

	388 "<h1", // Mozilla

	389 "<div", // Mozilla

	390 "<font", // Mozilla

	391 "<table", // Mozilla

	392 "<a", // Mozilla

	393 "<style", // Mozilla

	394 "<title", // Mozilla

	395 "<b", // Mozilla

	396 "<body", // Mozilla

	397 "<br", "<p" // Mozilla

	398 };

	399 return DoSignatureMatching(

	400 data, length, html_signatures, arraysize(html_signatures));

	401 }

	402

	403 bool SiteIsolationPolicy::SniffForXML(const char* data, size_t length) {

	404 const char* xml_signatures[] = {"<?xml" // Mozilla

	405 };

	406 return DoSignatureMatching(

	407 data, length, xml_signatures, arraysize(xml_signatures));

	408 }

	409

	410 bool SiteIsolationPolicy::SniffForJSON(const char* data, size_t length) {

	411 // TODO(dsjang): We have to come up with a better way to sniff

	412 // JSON. However, even RE cannot help us that much due to the fact

	413 // that we don't do full parsing. This DFA finds 1) {, 2) "(or'),

	414 // 3) : in the order. This is intentionally not using a regular

	415 // expression library so that we can make the trusted code base as

	416 // small as possible.

	417 int state = 0;

	418 for (size_t i = 0; i < length && state < 3; ++i, ++data) {

	419 char c = *data;

	420 if (c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n')

	421 continue;

	422

	423 switch (state) {

	424 case 0:

	425 if (c == '{')

	426 state = 1;
	nasko 2013/08/06 17:29:27 Please use symbolic names or describe what those n Please use symbolic names or describe what those numbers mean. dsjang 2013/08/07 00:19:07 Done. Show quoted text On 2013/08/06 17:29:27, nasko wrote: > Please use symbolic names or describe what those numbers mean. Done.
	427 else

	428 state = 4;

	429 break;

	430 case 1:

	431 if (c == '\"' \|\| c == '\'')

	432 state = 2;

	433 else

	434 state = 4;

	435 break;

	436 case 2:

	437 if (c == ':') {

	438 state = 3;

	439 }

	440 break;

	441 default:

	442 break;

	443 }

	444 }

	445 return state == 3;

	446 }

	447

	448 bool SiteIsolationPolicy::DoSignatureMatching(const char* data,

	449 size_t length,

	450 const char* signatures[],

	451 size_t arr_size) {

	452 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {

	453 const char* signature = signatures[sig_index];

	454 size_t signature_length = strlen(signature);

	455 size_t i = 0;

	456 // Skip the white characters at the beginning of the document.

	457 for (i = 0; i < length; ++i) {

	458 char c = *data;

	459 if (!(c == ' ' \|\| c == '\r' \|\| c == '\n' \|\| c == '\t')) {

	460 break;

	461 }

	462 ++data;

	463 }

	464 length = length - i;

	465 if (length < signature_length)

	466 continue;

	467 if (base::strncasecmp(signature, data, signature_length) == 0) {

	468 return true;

	469 }

	470 }

	471 return false;

	472 }

	473

	474 bool SiteIsolationPolicy::UnaffectedStatusCode(int status_code) {

	475 // Chrome only uses the content of a response with one of these

	476 // status codes for CSS/JavaScript. For images, Chrome just ignores

	477 // status code.

	478 const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, 301, 302,

	479 303, 305, 306, 307};

	480 for (size_t i = 0; i < 12; ++i) {

	481 if (renderable_status_code[i] == status_code)

	482 return false;

	483 }

	484 return true;

	485 }

	486

	487 bool SiteIsolationPolicy::SniffForJS(const char* data, size_t length) {

	488 // TODO(dsjang): This is a real hacking. The only purpose of this

	489 // function is to try to see if there's any possibility that this

	490 // data can be JavaScript.(superset of JS). This function will be

	491 // removed for the production code.

	492

	493 // Search for "var " for JS detection. :-)

	494 for (size_t i = 0; i < length - 3; ++i) {

	495 if (strncmp(data, "var ", 4) == 0) {

	496 return true;

	497 }

	498 ++data;

	499 }

	500 return false;

	501 }

	502 }
	nasko 2013/08/06 17:29:27 Leave an empty line and put a // namespace comment Leave an empty line and put a // namespace comment.
OLD	NEW

« webkit/child/site_isolation_policy.h ('K') | « webkit/child/site_isolation_policy.h ('k') | webkit/child/site_isolation_policy_unittest.cc » ('j') | no next file with comments »