| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/child/site_isolation_policy.h" | 5 #include "content/child/site_isolation_stats_gatherer.h" |
| 6 | 6 |
| 7 #include "base/basictypes.h" | |
| 8 #include "base/command_line.h" | |
| 9 #include "base/lazy_instance.h" | |
| 10 #include "base/logging.h" | |
| 11 #include "base/metrics/histogram.h" | 7 #include "base/metrics/histogram.h" |
| 8 #include "base/strings/string_piece.h" |
| 12 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
| 13 #include "content/public/common/content_switches.h" | |
| 14 #include "content/public/common/resource_response_info.h" | 10 #include "content/public/common/resource_response_info.h" |
| 15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" | |
| 16 #include "net/http/http_response_headers.h" | 11 #include "net/http/http_response_headers.h" |
| 17 | 12 |
| 18 using base::StringPiece; | |
| 19 | |
| 20 namespace content { | 13 namespace content { |
| 21 | 14 |
| 22 namespace { | 15 namespace { |
| 23 | 16 |
| 24 // The gathering of UMA stats for site isolation is deactivated by default, and | 17 // The gathering of UMA stats for site isolation is deactivated by default, and |
| 25 // only activated in renderer processes. | 18 // only activated in renderer processes. |
| 26 static bool g_stats_gathering_enabled = false; | 19 static bool g_stats_gathering_enabled = false; |
| 27 | 20 |
| 28 // MIME types | |
| 29 const char kTextHtml[] = "text/html"; | |
| 30 const char kTextXml[] = "text/xml"; | |
| 31 const char xAppRssXml[] = "application/rss+xml"; | |
| 32 const char kAppXml[] = "application/xml"; | |
| 33 const char kAppJson[] = "application/json"; | |
| 34 const char kTextJson[] = "text/json"; | |
| 35 const char kTextXjson[] = "text/x-json"; | |
| 36 const char kTextPlain[] = "text/plain"; | |
| 37 | |
| 38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted | |
| 39 // when this class is used for actual blocking. | |
| 40 bool IsRenderableStatusCode(int status_code) { | 21 bool IsRenderableStatusCode(int status_code) { |
| 41 // Chrome only uses the content of a response with one of these status codes | 22 // Chrome only uses the content of a response with one of these status codes |
| 42 // for CSS/JavaScript. For images, Chrome just ignores status code. | 23 // for CSS/JavaScript. For images, Chrome just ignores status code. |
| 43 const int renderable_status_code[] = { | 24 const int renderable_status_code[] = { |
| 44 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307}; | 25 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307}; |
| 45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { | 26 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { |
| 46 if (renderable_status_code[i] == status_code) | 27 if (renderable_status_code[i] == status_code) |
| 47 return true; | 28 return true; |
| 48 } | 29 } |
| 49 return false; | 30 return false; |
| 50 } | 31 } |
| 51 | 32 |
| 52 bool MatchesSignature(StringPiece data, | |
| 53 const StringPiece signatures[], | |
| 54 size_t arr_size) { | |
| 55 size_t offset = data.find_first_not_of(" \t\r\n"); | |
| 56 // There is no not-whitespace character in this document. | |
| 57 if (offset == base::StringPiece::npos) | |
| 58 return false; | |
| 59 | |
| 60 data.remove_prefix(offset); | |
| 61 size_t length = data.length(); | |
| 62 | |
| 63 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { | |
| 64 const StringPiece& signature = signatures[sig_index]; | |
| 65 size_t signature_length = signature.length(); | |
| 66 if (length < signature_length) | |
| 67 continue; | |
| 68 | |
| 69 if (base::LowerCaseEqualsASCII(data.begin(), | |
| 70 data.begin() + signature_length, | |
| 71 signature.data())) | |
| 72 return true; | |
| 73 } | |
| 74 return false; | |
| 75 } | |
| 76 | |
| 77 void IncrementHistogramCount(const std::string& name) { | 33 void IncrementHistogramCount(const std::string& name) { |
| 78 // The default value of min, max, bucket_count are copied from histogram.h. | 34 // The default value of min, max, bucket_count are copied from histogram.h. |
| 79 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet( | 35 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet( |
| 80 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag); | 36 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag); |
| 81 histogram_pointer->Add(1); | 37 histogram_pointer->Add(1); |
| 82 } | 38 } |
| 83 | 39 |
| 84 void IncrementHistogramEnum(const std::string& name, | 40 void IncrementHistogramEnum(const std::string& name, |
| 85 uint32 sample, | 41 uint32 sample, |
| 86 uint32 boundary_value) { | 42 uint32 boundary_value) { |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 204 | 160 |
| 205 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk( | 161 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk( |
| 206 const linked_ptr<SiteIsolationResponseMetaData>& resp_data, | 162 const linked_ptr<SiteIsolationResponseMetaData>& resp_data, |
| 207 const char* raw_data, | 163 const char* raw_data, |
| 208 int raw_length) { | 164 int raw_length) { |
| 209 if (!g_stats_gathering_enabled) | 165 if (!g_stats_gathering_enabled) |
| 210 return false; | 166 return false; |
| 211 | 167 |
| 212 DCHECK(resp_data.get()); | 168 DCHECK(resp_data.get()); |
| 213 | 169 |
| 214 StringPiece data(raw_data, raw_length); | 170 base::StringPiece data(raw_data, raw_length); |
| 215 | 171 |
| 216 // Record the length of the first received chunk of data to see if it's enough | 172 // Record the length of the first received chunk of data to see if it's enough |
| 217 // for sniffing. | 173 // for sniffing. |
| 218 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length); | 174 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length); |
| 219 | 175 |
| 220 // Record the number of cross-site document responses with a specific mime | 176 // Record the number of cross-site document responses with a specific mime |
| 221 // type (text/html, text/xml, etc). | 177 // type (text/html, text/xml, etc). |
| 222 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType", | 178 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType", |
| 223 resp_data->canonical_mime_type, | 179 resp_data->canonical_mime_type, |
| 224 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX); | 180 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX); |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 284 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true); | 240 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true); |
| 285 } else { | 241 } else { |
| 286 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain", | 242 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain", |
| 287 sniffed_as_js); | 243 sniffed_as_js); |
| 288 } | 244 } |
| 289 } | 245 } |
| 290 | 246 |
| 291 return would_block; | 247 return would_block; |
| 292 } | 248 } |
| 293 | 249 |
| 294 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType( | 250 bool SiteIsolationStatsGatherer::SniffForJS(base::StringPiece data) { |
| 295 const std::string& mime_type) { | |
| 296 if (base::LowerCaseEqualsASCII(mime_type, kTextHtml)) { | |
| 297 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML; | |
| 298 } | |
| 299 | |
| 300 if (base::LowerCaseEqualsASCII(mime_type, kTextPlain)) { | |
| 301 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN; | |
| 302 } | |
| 303 | |
| 304 if (base::LowerCaseEqualsASCII(mime_type, kAppJson) || | |
| 305 base::LowerCaseEqualsASCII(mime_type, kTextJson) || | |
| 306 base::LowerCaseEqualsASCII(mime_type, kTextXjson)) { | |
| 307 return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON; | |
| 308 } | |
| 309 | |
| 310 if (base::LowerCaseEqualsASCII(mime_type, kTextXml) || | |
| 311 base::LowerCaseEqualsASCII(mime_type, xAppRssXml) || | |
| 312 base::LowerCaseEqualsASCII(mime_type, kAppXml)) { | |
| 313 return CROSS_SITE_DOCUMENT_MIME_TYPE_XML; | |
| 314 } | |
| 315 | |
| 316 return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS; | |
| 317 } | |
| 318 | |
| 319 bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) { | |
| 320 // We exclude ftp:// from here. FTP doesn't provide a Content-Type | |
| 321 // header which our policy depends on, so we cannot protect any | |
| 322 // document from FTP servers. | |
| 323 return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme); | |
| 324 } | |
| 325 | |
| 326 bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin, | |
| 327 const GURL& response_url) { | |
| 328 if (!frame_origin.is_valid() || !response_url.is_valid()) | |
| 329 return false; | |
| 330 | |
| 331 if (frame_origin.scheme() != response_url.scheme()) | |
| 332 return false; | |
| 333 | |
| 334 // SameDomainOrHost() extracts the effective domains (public suffix plus one) | |
| 335 // from the two URLs and compare them. | |
| 336 return net::registry_controlled_domains::SameDomainOrHost( | |
| 337 frame_origin, response_url, | |
| 338 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); | |
| 339 } | |
| 340 | |
| 341 // We don't use Webkit's existing CORS policy implementation since | |
| 342 // their policy works in terms of origins, not sites. For example, | |
| 343 // when frame is sub.a.com and it is not allowed to access a document | |
| 344 // with sub1.a.com. But under Site Isolation, it's allowed. | |
| 345 bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet( | |
| 346 const GURL& frame_origin, | |
| 347 const GURL& website_origin, | |
| 348 const std::string& access_control_origin) { | |
| 349 // Many websites are sending back "\"*\"" instead of "*". This is | |
| 350 // non-standard practice, and not supported by Chrome. Refer to | |
| 351 // CrossOriginAccessControl::passesAccessControlCheck(). | |
| 352 | |
| 353 // TODO(dsjang): * is not allowed for the response from a request | |
| 354 // with cookies. This allows for more than what the renderer will | |
| 355 // eventually be able to receive, so we won't see illegal cross-site | |
| 356 // documents allowed by this. We have to find a way to see if this | |
| 357 // response is from a cookie-tagged request or not in the future. | |
| 358 if (access_control_origin == "*") | |
| 359 return true; | |
| 360 | |
| 361 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for | |
| 362 // "*", but many websites are using just a domain for access_control_origin, | |
| 363 // and this is blocked by Webkit's CORS logic here : | |
| 364 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set | |
| 365 // is_valid() to false when it is created from a URL containing * in the | |
| 366 // domain part. | |
| 367 | |
| 368 GURL cors_origin(access_control_origin); | |
| 369 return IsSameSite(frame_origin, cors_origin); | |
| 370 } | |
| 371 | |
| 372 // This function is a slight modification of |net::SniffForHTML|. | |
| 373 bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) { | |
| 374 // The content sniffer used by Chrome and Firefox are using "<!--" | |
| 375 // as one of the HTML signatures, but it also appears in valid | |
| 376 // JavaScript, considered as well-formed JS by the browser. Since | |
| 377 // we do not want to block any JS, we exclude it from our HTML | |
| 378 // signatures. This can weaken our document block policy, but we can | |
| 379 // break less websites. | |
| 380 // TODO(dsjang): parameterize |net::SniffForHTML| with an option | |
| 381 // that decides whether to include <!-- or not, so that we can | |
| 382 // remove this function. | |
| 383 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser | |
| 384 // process, we should do single-thread checking here for the static | |
| 385 // initializer. | |
| 386 static const StringPiece kHtmlSignatures[] = { | |
| 387 StringPiece("<!DOCTYPE html"), // HTML5 spec | |
| 388 StringPiece("<script"), // HTML5 spec, Mozilla | |
| 389 StringPiece("<html"), // HTML5 spec, Mozilla | |
| 390 StringPiece("<head"), // HTML5 spec, Mozilla | |
| 391 StringPiece("<iframe"), // Mozilla | |
| 392 StringPiece("<h1"), // Mozilla | |
| 393 StringPiece("<div"), // Mozilla | |
| 394 StringPiece("<font"), // Mozilla | |
| 395 StringPiece("<table"), // Mozilla | |
| 396 StringPiece("<a"), // Mozilla | |
| 397 StringPiece("<style"), // Mozilla | |
| 398 StringPiece("<title"), // Mozilla | |
| 399 StringPiece("<b"), // Mozilla | |
| 400 StringPiece("<body"), // Mozilla | |
| 401 StringPiece("<br"), // Mozilla | |
| 402 StringPiece("<p"), // Mozilla | |
| 403 StringPiece("<?xml") // Mozilla | |
| 404 }; | |
| 405 | |
| 406 while (data.length() > 0) { | |
| 407 if (MatchesSignature(data, kHtmlSignatures, arraysize(kHtmlSignatures))) | |
| 408 return true; | |
| 409 | |
| 410 // If we cannot find "<!--", we fail sniffing this as HTML. | |
| 411 static const StringPiece kCommentBegins[] = {StringPiece("<!--")}; | |
| 412 if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins))) | |
| 413 break; | |
| 414 | |
| 415 // Search for --> and do SniffForHTML after that. If we can find the | |
| 416 // comment's end, we start HTML sniffing from there again. | |
| 417 static const char kEndComment[] = "-->"; | |
| 418 size_t offset = data.find(kEndComment); | |
| 419 if (offset == base::StringPiece::npos) | |
| 420 break; | |
| 421 | |
| 422 // Proceed to the index next to the ending comment (-->). | |
| 423 data.remove_prefix(offset + strlen(kEndComment)); | |
| 424 } | |
| 425 | |
| 426 return false; | |
| 427 } | |
| 428 | |
| 429 bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) { | |
| 430 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for | |
| 431 // this signature. However, XML is case-sensitive. Don't we have to | |
| 432 // be more lenient only to block documents starting with the exact | |
| 433 // string <?xml rather than <?XML ? | |
| 434 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser | |
| 435 // process, we should do single-thread checking here for the static | |
| 436 // initializer. | |
| 437 static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")}; | |
| 438 return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures)); | |
| 439 } | |
| 440 | |
| 441 bool CrossSiteDocumentClassifier::SniffForJSON(base::StringPiece data) { | |
| 442 // TODO(dsjang): We have to come up with a better way to sniff | |
| 443 // JSON. However, even RE cannot help us that much due to the fact | |
| 444 // that we don't do full parsing. This DFA starts with state 0, and | |
| 445 // finds {, "/' and : in that order. We're avoiding adding a | |
| 446 // dependency on a regular expression library. | |
| 447 enum { | |
| 448 kStartState, | |
| 449 kLeftBraceState, | |
| 450 kLeftQuoteState, | |
| 451 kColonState, | |
| 452 kTerminalState, | |
| 453 } state = kStartState; | |
| 454 | |
| 455 size_t length = data.length(); | |
| 456 for (size_t i = 0; i < length && state < kColonState; ++i) { | |
| 457 const char c = data[i]; | |
| 458 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') | |
| 459 continue; | |
| 460 | |
| 461 switch (state) { | |
| 462 case kStartState: | |
| 463 if (c == '{') | |
| 464 state = kLeftBraceState; | |
| 465 else | |
| 466 state = kTerminalState; | |
| 467 break; | |
| 468 case kLeftBraceState: | |
| 469 if (c == '\"' || c == '\'') | |
| 470 state = kLeftQuoteState; | |
| 471 else | |
| 472 state = kTerminalState; | |
| 473 break; | |
| 474 case kLeftQuoteState: | |
| 475 if (c == ':') | |
| 476 state = kColonState; | |
| 477 break; | |
| 478 case kColonState: | |
| 479 case kTerminalState: | |
| 480 NOTREACHED(); | |
| 481 break; | |
| 482 } | |
| 483 } | |
| 484 return state == kColonState; | |
| 485 } | |
| 486 | |
| 487 bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) { | |
| 488 // The purpose of this function is to try to see if there's any possibility | 251 // The purpose of this function is to try to see if there's any possibility |
| 489 // that this data can be JavaScript (superset of JS). Search for "var " for JS | 252 // that this data can be JavaScript (superset of JS). Search for "var " for JS |
| 490 // detection. This is a real hack and should only be used for stats gathering. | 253 // detection. This is a real hack and should only be used for stats gathering. |
| 491 return data.find("var ") != base::StringPiece::npos; | 254 return data.find("var ") != base::StringPiece::npos; |
| 492 } | 255 } |
| 493 | 256 |
| 494 } // namespace content | 257 } // namespace content |
| OLD | NEW |