Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "content/child/site_isolation_policy.h" | |
| 6 | |
| 7 #include "base/basictypes.h" | |
| 8 #include "base/command_line.h" | |
| 9 #include "base/logging.h" | |
| 10 #include "base/metrics/histogram.h" | |
| 11 #include "base/strings/string_util.h" | |
| 12 #include "content/public/common/content_switches.h" | |
| 13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" | |
| 14 #include "net/http/http_response_headers.h" | |
| 15 #include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h" | |
| 16 #include "third_party/WebKit/public/platform/WebString.h" | |
| 17 #include "third_party/WebKit/public/platform/WebURL.h" | |
| 18 #include "third_party/WebKit/public/platform/WebURLRequest.h" | |
| 19 #include "third_party/WebKit/public/platform/WebURLResponse.h" | |
| 20 #include "third_party/WebKit/public/web/WebDocument.h" | |
| 21 #include "third_party/WebKit/public/web/WebFrame.h" | |
| 22 #include "third_party/WebKit/public/web/WebFrameClient.h" | |
| 23 #include "third_party/WebKit/public/web/WebSecurityOrigin.h" | |
| 24 | |
| 25 using WebKit::WebDocument; | |
| 26 using WebKit::WebString; | |
| 27 using WebKit::WebURL; | |
| 28 using WebKit::WebURLResponse; | |
| 29 using WebKit::WebURLRequest; | |
| 30 | |
| 31 namespace content { | |
| 32 | |
| 33 namespace { | |
| 34 | |
| 35 // MIME types | |
| 36 const char kTextHtml[] = "text/html"; | |
| 37 const char kTextXml[] = "text/xml"; | |
| 38 const char xAppRssXml[] = "application/rss+xml"; | |
| 39 const char kAppXml[] = "application/xml"; | |
| 40 const char kAppJson[] = "application/json"; | |
| 41 const char kTextJson[] = "text/json"; | |
| 42 const char kTextXjson[] = "text/x-json"; | |
| 43 const char kTextPlain[] = "text/plain"; | |
| 44 | |
| 45 } // anonymous namespace | |
|
awong
2013/08/26 20:31:41
nit:
} // namespace
| |
| 46 | |
| 47 SiteIsolationPolicy::ResponseMetaData::ResponseMetaData() {} | |
| 48 | |
| 49 void SiteIsolationPolicy::OnReceivedResponse( | |
| 50 int request_id, | |
| 51 GURL& frame_origin, | |
|
awong
2013/08/26 20:31:41
non-const refs are not allowed in function signatu
| |
| 52 GURL& response_url, | |
| 53 ResourceType::Type resource_type, | |
| 54 const webkit_glue::ResourceResponseInfo& info) { | |
| 55 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1); | |
| 56 | |
| 57 // See if this is for navigation. If it is, don't block it, under the | |
| 58 // assumption that we will put it in an appropriate process. | |
| 59 if (ResourceType::IsFrame(resource_type)) | |
| 60 return; | |
| 61 | |
| 62 if (!IsBlockableScheme(response_url)) | |
| 63 return; | |
| 64 | |
| 65 if (IsSameSite(frame_origin, response_url)) | |
| 66 return; | |
| 67 | |
| 68 SiteIsolationPolicy::ResponseMetaData::CanonicalMimeType canonical_mime_type = | |
| 69 GetCanonicalMimeType(info.mime_type); | |
| 70 | |
| 71 if (canonical_mime_type == SiteIsolationPolicy::ResponseMetaData::Others) | |
| 72 return; | |
| 73 | |
| 74 // Every CORS request should have the Access-Control-Allow-Origin header even | |
| 75 // if it is preceded by a pre-flight request. Therefore, if this is a CORS | |
| 76 // request, it has this header. response.httpHeaderField() internally uses | |
| 77 // case-insensitive matching for the header name. | |
| 78 std::string access_control_origin; | |
| 79 | |
| 80 // We can use a case-insensitive header name for EnumerateHeader(). | |
| 81 info.headers->EnumerateHeader( | |
| 82 NULL, "access-control-allow-origin", &access_control_origin); | |
| 83 if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) | |
| 84 return; | |
| 85 | |
| 86 // Real XSD data collection starts from here. | |
| 87 std::string no_sniff; | |
| 88 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff); | |
| 89 | |
| 90 ResponseMetaData resp_data; | |
| 91 resp_data.frame_origin = frame_origin.spec(); | |
| 92 resp_data.response_url = response_url; | |
| 93 resp_data.resource_type = resource_type; | |
| 94 resp_data.canonical_mime_type = canonical_mime_type; | |
| 95 resp_data.http_status_code = info.headers->response_code(); | |
| 96 resp_data.no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff"); | |
| 97 | |
| 98 RequestIdToMetaDataMap* metadata_map = GetRequestIdToMetaDataMap(); | |
| 99 (*metadata_map)[request_id] = resp_data; | |
| 100 } | |
| 101 | |
| 102 // These macros are defined here so that we prevent code size bloat-up due to | |
| 103 // the UMA_HISTOGRAM_* macros. Similar logic is used for recording UMA stats for | |
| 104 // different MIME types, but we cannot create a helper function for this since | |
| 105 // UMA_HISTOGRAM_* macros do not accept variables as their bucket names. As a | |
| 106 // solution, macros are used instead to capture the repeated pattern for | |
| 107 // recording UMA stats. TODO(dsjang): this is only needed for collecting UMA | |
| 108 // stat. Will be deleted when this class is used for actual blocking. | |
| 109 | |
| 110 #define SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \ | |
| 111 UMA_HISTOGRAM_COUNTS( BUCKET_PREFIX ".Blocked", 1); \ | |
| 112 result = true; \ | |
| 113 if (renderable_status_code) { \ | |
| 114 UMA_HISTOGRAM_ENUMERATION( \ | |
| 115 BUCKET_PREFIX ".Blocked.RenderableStatusCode", \ | |
| 116 resp_data.resource_type, \ | |
| 117 WebURLRequest::TargetIsUnspecified + 1); \ | |
| 118 } else { \ | |
| 119 UMA_HISTOGRAM_COUNTS(BUCKET_PREFIX ".Blocked.NonRenderableStatusCode",1);\ | |
| 120 } | |
| 121 | |
| 122 #define SITE_ISOLATION_POLICY_COUNT_NO_SNIFF_BLOCK(BUCKET_PREFIX) \ | |
| 123 UMA_HISTOGRAM_COUNTS( BUCKET_PREFIX ".NoSniffBlocked", 1); \ | |
| 124 result = true; \ | |
| 125 if (renderable_status_code) { \ | |
| 126 UMA_HISTOGRAM_ENUMERATION( \ | |
| 127 BUCKET_PREFIX ".NoSniffBlocked.RenderableStatusCode", \ | |
| 128 resp_data.resource_type, \ | |
| 129 WebURLRequest::TargetIsUnspecified + 1); \ | |
| 130 } else { \ | |
| 131 UMA_HISTOGRAM_ENUMERATION( \ | |
| 132 BUCKET_PREFIX ".NoSniffBlocked.NonRenderableStatusCode", \ | |
| 133 resp_data.resource_type, \ | |
| 134 WebURLRequest::TargetIsUnspecified + 1); \ | |
| 135 } | |
| 136 | |
| 137 #define SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \ | |
| 138 UMA_HISTOGRAM_COUNTS(BUCKET_PREFIX ".NotBlocked", 1); \ | |
| 139 if (is_sniffed_for_js) \ | |
| 140 UMA_HISTOGRAM_COUNTS(BUCKET_PREFIX ".NotBlocked.MaybeJS", 1); \ | |
| 141 | |
| 142 #define SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SNIFF_EXPR,BUCKET_PREFIX) \ | |
| 143 if (SNIFF_EXPR) { \ | |
| 144 SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \ | |
| 145 } else { \ | |
| 146 if (resp_data.no_sniff) { \ | |
| 147 SITE_ISOLATION_POLICY_COUNT_NO_SNIFF_BLOCK(BUCKET_PREFIX) \ | |
| 148 } else { \ | |
| 149 SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \ | |
| 150 } \ | |
| 151 } | |
| 152 | |
| 153 bool SiteIsolationPolicy::ShouldBlockResponse( | |
| 154 int request_id, | |
| 155 const char* data, | |
| 156 int length, | |
| 157 std::string* alternative_data) { | |
| 158 RequestIdToMetaDataMap* metadata_map = GetRequestIdToMetaDataMap(); | |
| 159 RequestIdToResultMap* result_map = GetRequestIdToResultMap(); | |
| 160 | |
| 161 // If there's an entry for |request_id| in blocked_map, this request's first | |
| 162 // data packet has already been examined. We can return the result here. | |
| 163 if (result_map->count(request_id) != 0) { | |
| 164 if ((*result_map)[request_id]) { | |
| 165 // Here, the blocking result has been set for the previous run of | |
| 166 // ShouldBlockResponse(), so we set alternative data to an empty string so | |
| 167 // that ResourceDispatcher doesn't call its peer's onReceivedData() with | |
| 168 // the alternative data. | |
| 169 alternative_data->erase(); | |
| 170 return true; | |
| 171 } | |
| 172 return false; | |
| 173 } | |
| 174 | |
| 175 // If result_map doesn't have an entry for |request_id|, we're receiving the | |
| 176 // first data packet for request_id. If request_id is not registered, this | |
| 177 // request is identified as a non-target of our policy. So we return true. | |
| 178 if (metadata_map->count(request_id) == 0) { | |
| 179 // We set request_id to true so that we always return true for this request. | |
| 180 (*result_map)[request_id] = false; | |
| 181 return false; | |
| 182 } | |
| 183 | |
| 184 // We now look at the first data packet received for request_id. | |
| 185 ResponseMetaData resp_data = (*metadata_map)[request_id]; | |
| 186 metadata_map->erase(request_id); | |
| 187 | |
| 188 // Record the length of the first received network packet to see if it's | |
| 189 // enough for sniffing. | |
| 190 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", length); | |
| 191 | |
| 192 // Record the number of cross-site document responses with a specific mime | |
| 193 // type (text/html, text/xml, etc). | |
| 194 UMA_HISTOGRAM_ENUMERATION( | |
| 195 "SiteIsolation.XSD.MimeType", | |
| 196 resp_data.canonical_mime_type, | |
| 197 SiteIsolationPolicy::ResponseMetaData::MaxCanonicalMimeType); | |
| 198 | |
| 199 // Store the result of cross-site document blocking analysis. True means we | |
| 200 // can return this document to the renderer, false means that we have to block | |
| 201 // the response data. | |
| 202 bool result = false; | |
| 203 | |
| 204 // The content is blocked if it is sniffed for HTML/JSON/XML. When the blocked | |
| 205 // response is with an error status code, it is not disruptive by the | |
| 206 // following reasons : 1) the blocked content is not a binary object (such as | |
| 207 // an image) since it is sniffed for text; 2) then, this blocking only breaks | |
| 208 // the renderer behavior only if it is either JavaScript or CSS. However, the | |
| 209 // renderer doesn't use the contents of JS/CSS with unaffected status code | |
| 210 // (e.g, 404). 3) the renderer is expected not to use the cross-site document | |
| 211 // content for purposes other than JS/CSS (e.g, XHR). | |
| 212 bool renderable_status_code = IsRenderableStatusCodeForDocument( | |
| 213 resp_data.http_status_code); | |
| 214 | |
| 215 // This is only used for false-negative analysis for non-blocked resources. | |
| 216 bool is_sniffed_for_js = SniffForJS(data, length); | |
| 217 | |
| 218 // Record the number of responses whose content is sniffed for what its mime | |
| 219 // type claims it to be. For example, we apply a HTML sniffer for a document | |
| 220 // tagged with text/html here. Whenever this check becomes true, we'll block | |
| 221 // the response. | |
| 222 switch (resp_data.canonical_mime_type) { | |
| 223 case SiteIsolationPolicy::ResponseMetaData::HTML: | |
| 224 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForHTML(data, length), | |
| 225 "SiteIsolation.XSD.HTML"); | |
| 226 break; | |
| 227 case SiteIsolationPolicy::ResponseMetaData::XML: | |
| 228 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForXML(data, length), | |
| 229 "SiteIsolation.XSD.XML"); | |
| 230 break; | |
| 231 case SiteIsolationPolicy::ResponseMetaData::JSON: | |
| 232 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForJSON(data, length), | |
| 233 "SiteIsolation.XSD.JSON"); | |
| 234 break; | |
| 235 case SiteIsolationPolicy::ResponseMetaData::Plain: | |
| 236 if (SniffForHTML(data, length)) { | |
| 237 SITE_ISOLATION_POLICY_COUNT_BLOCK( | |
| 238 "SiteIsolation.XSD.Plain.HTML"); | |
| 239 } else if (SniffForXML(data, length)) { | |
| 240 SITE_ISOLATION_POLICY_COUNT_BLOCK( | |
| 241 "SiteIsolation.XSD.Plain.XML"); | |
| 242 } else if (SniffForJSON(data, length)) { | |
| 243 SITE_ISOLATION_POLICY_COUNT_BLOCK( | |
| 244 "SiteIsolation.XSD.Plain.JSON"); | |
| 245 } else if (is_sniffed_for_js) { | |
| 246 if (resp_data.no_sniff) { | |
| 247 SITE_ISOLATION_POLICY_COUNT_NO_SNIFF_BLOCK( | |
| 248 "SiteIsolation.XSD.Plain"); | |
| 249 } else { | |
| 250 SITE_ISOLATION_POLICY_COUNT_NOTBLOCK( | |
| 251 "SiteIsolation.XSD.Plain"); | |
| 252 } | |
| 253 } | |
| 254 break; | |
| 255 default : | |
| 256 NOTREACHED() << | |
| 257 "Not a blockable mime type. This mime type shouldn't reach here."; | |
|
awong
2013/08/26 20:31:41
Also output the mime_type.
NOTREACHED() << "Not a
| |
| 258 break; | |
| 259 } | |
| 260 | |
| 261 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); | |
|
awong
2013/08/26 20:31:41
Might as well keep this a pointer so it doesn't lo
| |
| 262 if (!command_line.HasSwitch(switches::kBlockCrossSiteDocuments)) | |
| 263 result = false; | |
| 264 (*result_map)[request_id] = result; | |
| 265 | |
| 266 if (result) { | |
| 267 alternative_data->erase(); | |
| 268 alternative_data->insert(0, " "); | |
| 269 LOG(ERROR) << resp_data.response_url | |
| 270 << " is blocked as an illegal cross-site document from " | |
| 271 << resp_data.frame_origin; | |
| 272 | |
| 273 } | |
| 274 return result; | |
| 275 } | |
| 276 | |
| 277 #undef SITE_ISOLATION_POLICY_COUNT_NOTBLOCK | |
|
awong
2013/08/26 20:31:41
How bad does this look if you expand out the macro
| |
| 278 #undef SITE_ISOLATION_POLICY_SNIFF_AND_COUNT | |
| 279 #undef SITE_ISOLATION_POLICY_COUNT_BLOCK | |
| 280 | |
| 281 void SiteIsolationPolicy::OnRequestComplete(int request_id) { | |
| 282 RequestIdToMetaDataMap* metadata_map = GetRequestIdToMetaDataMap(); | |
| 283 RequestIdToResultMap* result_map = GetRequestIdToResultMap(); | |
| 284 metadata_map->erase(request_id); | |
| 285 result_map->erase(request_id); | |
| 286 } | |
| 287 | |
| 288 SiteIsolationPolicy::ResponseMetaData::CanonicalMimeType | |
| 289 SiteIsolationPolicy::GetCanonicalMimeType(const std::string& mime_type) { | |
| 290 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) { | |
| 291 return SiteIsolationPolicy::ResponseMetaData::HTML; | |
| 292 } | |
| 293 | |
| 294 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) { | |
| 295 return SiteIsolationPolicy::ResponseMetaData::Plain; | |
| 296 } | |
| 297 | |
| 298 if (LowerCaseEqualsASCII(mime_type, kAppJson) || | |
| 299 LowerCaseEqualsASCII(mime_type, kTextJson) || | |
| 300 LowerCaseEqualsASCII(mime_type, kTextXjson)) { | |
| 301 return SiteIsolationPolicy::ResponseMetaData::JSON; | |
| 302 } | |
| 303 | |
| 304 if (LowerCaseEqualsASCII(mime_type, kTextXml) || | |
| 305 LowerCaseEqualsASCII(mime_type, xAppRssXml) || | |
| 306 LowerCaseEqualsASCII(mime_type, kAppXml)) { | |
| 307 return SiteIsolationPolicy::ResponseMetaData::XML; | |
| 308 } | |
| 309 | |
| 310 return SiteIsolationPolicy::ResponseMetaData::Others; | |
| 311 | |
| 312 } | |
| 313 | |
| 314 bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) { | |
| 315 // We exclude ftp:// from here. FTP doesn't provide a Content-Type | |
| 316 // header which our policy depends on, so we cannot protect any | |
| 317 // document from FTP servers. | |
| 318 return url.SchemeIs("http") || url.SchemeIs("https"); | |
| 319 } | |
| 320 | |
| 321 bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin, | |
| 322 const GURL& response_url) { | |
| 323 | |
| 324 if (!frame_origin.is_valid() || !response_url.is_valid()) | |
| 325 return false; | |
| 326 | |
| 327 if (frame_origin.scheme() != response_url.scheme()) | |
| 328 return false; | |
| 329 | |
| 330 // SameDomainOrHost() extracts the effective domains (public suffix plus one) | |
| 331 // from the two URLs and compare them. | |
| 332 // TODO(dsjang): use INCLUDE_PRIVATE_REGISTRIES when http://crbug.com/7988 is | |
| 333 // fixed. | |
| 334 return net::registry_controlled_domains::SameDomainOrHost( | |
| 335 frame_origin, | |
| 336 response_url, | |
| 337 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | |
| 338 } | |
| 339 | |
| 340 bool SiteIsolationPolicy::IsFrameNavigating(WebKit::WebFrame* frame) { | |
| 341 // When a navigation starts, frame->provisionalDataSource() is set | |
| 342 // to a not-null value which stands for the request made for the | |
| 343 // navigation. As soon as the network request is committed to the | |
| 344 // frame, frame->provisionalDataSource() is converted to null, and | |
| 345 // the committed data source is moved to frame->dataSource(). This | |
| 346 // is the most reliable way to detect whether the frame is in | |
| 347 // navigation or not. | |
| 348 return frame->provisionalDataSource() != NULL; | |
| 349 } | |
| 350 | |
| 351 // We don't use Webkit's existing CORS policy implementation since | |
| 352 // their policy works in terms of origins, not sites. For example, | |
| 353 // when frame is sub.a.com and it is not allowed to access a document | |
| 354 // with sub1.a.com. But under Site Isolation, it's allowed. | |
| 355 bool SiteIsolationPolicy::IsValidCorsHeaderSet( | |
| 356 GURL& frame_origin, | |
| 357 GURL& website_origin, | |
| 358 std::string access_control_origin) { | |
|
awong
2013/08/26 20:31:41
Should this be const std::string&?
| |
| 359 // Many websites are sending back "\"*\"" instead of "*". This is | |
| 360 // non-standard practice, and not supported by Chrome. Refer to | |
| 361 // CrossOriginAccessControl::passesAccessControlCheck(). | |
| 362 | |
| 363 // TODO(dsjang): * is not allowed for the response from a request | |
| 364 // with cookies. This allows for more than what the renderer will | |
| 365 // eventually be able to receive, so we won't see illegal cross-site | |
| 366 // documents allowed by this. We have to find a way to see if this | |
| 367 // response is from a cookie-tagged request or not in the future. | |
| 368 if (access_control_origin == "*") | |
| 369 return true; | |
| 370 | |
| 371 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for | |
| 372 // "*", but many websites are using just a domain for access_control_origin, | |
| 373 // and this is blocked by Webkit's CORS logic here : | |
| 374 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set | |
| 375 // is_valid() to false when it is created from a URL containing * in the | |
| 376 // domain part. | |
| 377 | |
| 378 GURL cors_origin(access_control_origin); | |
| 379 return IsSameSite(frame_origin, cors_origin); | |
| 380 } | |
| 381 | |
| 382 // This function is a slight modification of |net::SniffForHTML|. | |
| 383 bool SiteIsolationPolicy::SniffForHTML(const char* data, size_t length) { | |
|
awong
2013/08/26 20:31:41
Converting all these functions to taking a StringP
| |
| 384 // The content sniffer used by Chrome and Firefox are using "<!--" | |
| 385 // as one of the HTML signatures, but it also appears in valid | |
| 386 // JavaScript, considered as well-formed JS by the browser. Since | |
| 387 // we do not want to block any JS, we exclude it from our HTML | |
| 388 // signatures. This can weaken our document block policy, but we can | |
| 389 // break less websites. | |
| 390 // TODO(dsjang): parameterize |net::SniffForHTML| with an option | |
| 391 // that decides whether to include <!-- or not, so that we can | |
| 392 // remove this function. | |
| 393 const char* html_signatures[] = {"<!DOCTYPE html", // HTML5 spec | |
|
awong
2013/08/26 20:31:41
General locally scoped constants should be declare
| |
| 394 "<script", // HTML5 spec, Mozilla | |
| 395 "<html", // HTML5 spec, Mozilla | |
| 396 "<head", // HTML5 spec, Mozilla | |
| 397 "<iframe", // Mozilla | |
| 398 "<h1", // Mozilla | |
| 399 "<div", // Mozilla | |
| 400 "<font", // Mozilla | |
| 401 "<table", // Mozilla | |
| 402 "<a", // Mozilla | |
| 403 "<style", // Mozilla | |
| 404 "<title", // Mozilla | |
| 405 "<b", // Mozilla | |
| 406 "<body", // Mozilla | |
| 407 "<br", "<p", // Mozilla | |
| 408 "<?xml" // Mozilla | |
| 409 }; | |
| 410 | |
| 411 if (MatchesSignature( | |
| 412 data, length, html_signatures, arraysize(html_signatures))) | |
| 413 return true; | |
|
awong
2013/08/26 20:31:41
with a linebreak in the conditional, use braces.
| |
| 414 | |
| 415 // "<!--" is specially treated since web JS can use "<!--" "-->" pair for | |
| 416 // comments. | |
| 417 const char* comment_begins[] = {"<!--" }; | |
| 418 | |
| 419 if (MatchesSignature( | |
| 420 data, length, comment_begins, arraysize(comment_begins))) { | |
| 421 // Search for --> and do SniffForHTML after that. If we can find the | |
| 422 // comment's end, we start HTML sniffing from there again. | |
| 423 const char end_comment[] = "-->"; | |
| 424 const size_t end_comment_size = strlen(end_comment); | |
| 425 | |
| 426 for (size_t i = 0; i <= length - end_comment_size; ++i) { | |
| 427 if (!strncmp(data + i, end_comment, end_comment_size)) { | |
| 428 size_t skipped = i + end_comment_size; | |
| 429 return SniffForHTML(data + skipped, length - skipped); | |
|
awong
2013/08/26 20:31:41
This recursion is based on data from the user (the
| |
| 430 } | |
| 431 } | |
| 432 } | |
| 433 | |
| 434 return false; | |
| 435 } | |
| 436 | |
| 437 bool SiteIsolationPolicy::SniffForXML(const char* data, size_t length) { | |
| 438 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for | |
| 439 // this signature. However, XML is case-sensitive. Don't we have to | |
| 440 // be more lenient only to block documents starting with the exact | |
| 441 // string <?xml rather than <?XML ? | |
| 442 const char* xml_signatures[] = {"<?xml" // Mozilla | |
| 443 }; | |
| 444 return MatchesSignature( | |
| 445 data, length, xml_signatures, arraysize(xml_signatures)); | |
| 446 } | |
| 447 | |
| 448 bool SiteIsolationPolicy::SniffForJSON(const char* data, size_t length) { | |
| 449 // TODO(dsjang): We have to come up with a better way to sniff | |
| 450 // JSON. However, even RE cannot help us that much due to the fact | |
| 451 // that we don't do full parsing. This DFA starts with state 0, and | |
| 452 // finds {, "/' and : in that order. We're avoiding adding a | |
| 453 // dependency on a regular expression library. | |
| 454 const int kInitState = 0; | |
| 455 const int kLeftBraceState = 1; | |
| 456 const int kLeftQuoteState = 2; | |
| 457 const int kColonState = 3; | |
| 458 const int kDeadState = 4; | |
| 459 | |
| 460 int state = kInitState; | |
| 461 for (size_t i = 0; i < length && state < kColonState; ++i) { | |
| 462 const char c = data[i]; | |
| 463 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') | |
| 464 continue; | |
| 465 | |
| 466 switch (state) { | |
| 467 case kInitState: | |
| 468 if (c == '{') | |
| 469 state = kLeftBraceState; | |
| 470 else | |
| 471 state = kDeadState; | |
| 472 break; | |
| 473 case kLeftBraceState: | |
| 474 if (c == '\"' || c == '\'') | |
| 475 state = kLeftQuoteState; | |
| 476 else | |
| 477 state = kDeadState; | |
| 478 break; | |
| 479 case kLeftQuoteState: | |
| 480 if (c == ':') | |
| 481 state = kColonState; | |
| 482 break; | |
| 483 default: | |
| 484 NOTREACHED(); | |
| 485 break; | |
| 486 } | |
| 487 } | |
| 488 return state == kColonState; | |
| 489 } | |
| 490 | |
| 491 bool SiteIsolationPolicy::MatchesSignature(const char* raw_data, | |
| 492 size_t raw_length, | |
| 493 const char* signatures[], | |
| 494 size_t arr_size) { | |
| 495 size_t start = 0; | |
| 496 // Skip white characters at the beginning of the document. | |
| 497 for (start = 0; start < raw_length; ++start) { | |
| 498 char c = raw_data[start]; | |
| 499 if (!(c == ' ' || c == '\t' || c == '\r' || c == '\n')) | |
| 500 break; | |
| 501 } | |
| 502 | |
| 503 // There is no not-whitespace character in this document. | |
| 504 if (!(start < raw_length)) | |
| 505 return false; | |
| 506 | |
| 507 const char* data = raw_data + start; | |
| 508 size_t length = raw_length - start; | |
| 509 | |
| 510 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { | |
| 511 const char* signature = signatures[sig_index]; | |
| 512 size_t signature_length = strlen(signature); | |
| 513 | |
| 514 if (length < signature_length) | |
| 515 continue; | |
| 516 | |
| 517 if (!base::strncasecmp(signature, data, signature_length)) | |
| 518 return true; | |
| 519 } | |
| 520 return false; | |
| 521 } | |
| 522 | |
| 523 bool SiteIsolationPolicy::IsRenderableStatusCodeForDocument(int status_code) { | |
| 524 // Chrome only uses the content of a response with one of these status codes | |
| 525 // for CSS/JavaScript. For images, Chrome just ignores status code. | |
| 526 const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, 301, 302, | |
| 527 303, 305, 306, 307}; | |
| 528 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { | |
| 529 if (renderable_status_code[i] == status_code) | |
| 530 return true; | |
| 531 } | |
| 532 return false; | |
| 533 } | |
| 534 | |
| 535 bool SiteIsolationPolicy::SniffForJS(const char* data, size_t length) { | |
| 536 // TODO(dsjang): This is a real hack. The only purpose of this function is to | |
| 537 // try to see if there's any possibility that this data can be JavaScript | |
| 538 // (superset of JS). This function will be removed once UMA stats are | |
| 539 // gathered. | |
| 540 | |
| 541 // Search for "var " for JS detection. | |
| 542 for (size_t i = 0; i < length - 3; ++i) { | |
| 543 if (strncmp(data + i, "var ", 4) == 0) | |
|
awong
2013/08/26 20:31:41
For safe use of strncmp, your "n" should always be
| |
| 544 return true; | |
| 545 } | |
| 546 return false; | |
| 547 } | |
| 548 | |
| 549 SiteIsolationPolicy::RequestIdToMetaDataMap* | |
| 550 SiteIsolationPolicy::GetRequestIdToMetaDataMap() { | |
| 551 CR_DEFINE_STATIC_LOCAL(RequestIdToMetaDataMap, metadata_map_, ()); | |
|
awong
2013/08/26 20:31:41
This variable is declared like a static local, but
| |
| 552 return &metadata_map_; | |
| 553 } | |
| 554 | |
| 555 SiteIsolationPolicy::RequestIdToResultMap* | |
| 556 SiteIsolationPolicy::GetRequestIdToResultMap() { | |
| 557 CR_DEFINE_STATIC_LOCAL(RequestIdToResultMap, result_map_, ()); | |
| 558 return &result_map_; | |
| 559 } | |
| 560 | |
| 561 } // namespace content | |
| OLD | NEW |