Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
|
alexmos
2015/06/13 00:27:03
2015
ncarter (slow)
2015/06/16 22:43:22
Done.
| |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/child/site_isolation_policy.h" | 5 #include "content/common/cross_site_document_classifier.h" |
| 6 | 6 |
| 7 #include "base/basictypes.h" | 7 #include "base/basictypes.h" |
| 8 #include "base/command_line.h" | 8 #include "base/command_line.h" |
| 9 #include "base/lazy_instance.h" | 9 #include "base/lazy_instance.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| 11 #include "base/metrics/histogram.h" | 11 #include "base/metrics/histogram.h" |
| 12 #include "base/strings/string_util.h" | 12 #include "base/strings/string_util.h" |
| 13 #include "content/public/common/content_switches.h" | 13 #include "content/public/common/content_switches.h" |
| 14 #include "content/public/common/resource_response_info.h" | 14 #include "content/public/common/resource_response_info.h" |
| 15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" | 15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| 16 #include "net/http/http_response_headers.h" | 16 #include "net/http/http_response_headers.h" |
| 17 | 17 |
| 18 using base::StringPiece; | 18 using base::StringPiece; |
| 19 | 19 |
| 20 namespace content { | 20 namespace content { |
| 21 | 21 |
| 22 namespace { | 22 namespace { |
| 23 | 23 |
| 24 // The gathering of UMA stats for site isolation is deactivated by default, and | |
| 25 // only activated in renderer processes. | |
| 26 static bool g_stats_gathering_enabled = false; | |
| 27 | |
| 28 // MIME types | 24 // MIME types |
| 29 const char kTextHtml[] = "text/html"; | 25 const char kTextHtml[] = "text/html"; |
| 30 const char kTextXml[] = "text/xml"; | 26 const char kTextXml[] = "text/xml"; |
| 31 const char xAppRssXml[] = "application/rss+xml"; | 27 const char xAppRssXml[] = "application/rss+xml"; |
| 32 const char kAppXml[] = "application/xml"; | 28 const char kAppXml[] = "application/xml"; |
| 33 const char kAppJson[] = "application/json"; | 29 const char kAppJson[] = "application/json"; |
| 34 const char kTextJson[] = "text/json"; | 30 const char kTextJson[] = "text/json"; |
| 35 const char kTextXjson[] = "text/x-json"; | 31 const char kTextXjson[] = "text/x-json"; |
| 36 const char kTextPlain[] = "text/plain"; | 32 const char kTextPlain[] = "text/plain"; |
| 37 | 33 |
| 38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted | |
| 39 // when this class is used for actual blocking. | |
| 40 bool IsRenderableStatusCode(int status_code) { | |
| 41 // Chrome only uses the content of a response with one of these status codes | |
| 42 // for CSS/JavaScript. For images, Chrome just ignores status code. | |
| 43 const int renderable_status_code[] = { | |
| 44 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307}; | |
| 45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { | |
| 46 if (renderable_status_code[i] == status_code) | |
| 47 return true; | |
| 48 } | |
| 49 return false; | |
| 50 } | |
| 51 | |
| 52 bool MatchesSignature(StringPiece data, | 34 bool MatchesSignature(StringPiece data, |
| 53 const StringPiece signatures[], | 35 const StringPiece signatures[], |
| 54 size_t arr_size) { | 36 size_t arr_size) { |
| 55 size_t offset = data.find_first_not_of(" \t\r\n"); | 37 size_t offset = data.find_first_not_of(" \t\r\n"); |
| 56 // There is no not-whitespace character in this document. | 38 // There is no not-whitespace character in this document. |
| 57 if (offset == base::StringPiece::npos) | 39 if (offset == base::StringPiece::npos) |
| 58 return false; | 40 return false; |
| 59 | 41 |
| 60 data.remove_prefix(offset); | 42 data.remove_prefix(offset); |
| 61 size_t length = data.length(); | 43 size_t length = data.length(); |
| 62 | 44 |
| 63 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { | 45 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { |
| 64 const StringPiece& signature = signatures[sig_index]; | 46 const StringPiece& signature = signatures[sig_index]; |
| 65 size_t signature_length = signature.length(); | 47 size_t signature_length = signature.length(); |
| 66 if (length < signature_length) | 48 if (length < signature_length) |
| 67 continue; | 49 continue; |
| 68 | 50 |
| 69 if (LowerCaseEqualsASCII(data.begin(), data.begin() + signature_length, | 51 if (LowerCaseEqualsASCII(data.begin(), data.begin() + signature_length, |
| 70 signature.data())) | 52 signature.data())) |
| 71 return true; | 53 return true; |
| 72 } | 54 } |
| 73 return false; | 55 return false; |
| 74 } | 56 } |
| 75 | 57 |
| 76 void IncrementHistogramCount(const std::string& name) { | |
| 77 // The default value of min, max, bucket_count are copied from histogram.h. | |
| 78 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet( | |
| 79 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag); | |
| 80 histogram_pointer->Add(1); | |
| 81 } | |
| 82 | |
| 83 void IncrementHistogramEnum(const std::string& name, | |
| 84 uint32 sample, | |
| 85 uint32 boundary_value) { | |
| 86 // The default value of min, max, bucket_count are copied from histogram.h. | |
| 87 base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet( | |
| 88 name, 1, boundary_value, boundary_value + 1, | |
| 89 base::HistogramBase::kUmaTargetedHistogramFlag); | |
| 90 histogram_pointer->Add(sample); | |
| 91 } | |
| 92 | |
| 93 void HistogramCountBlockedResponse( | |
| 94 const std::string& bucket_prefix, | |
| 95 const linked_ptr<SiteIsolationResponseMetaData>& resp_data, | |
| 96 bool nosniff_block) { | |
| 97 std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked"); | |
| 98 IncrementHistogramCount(bucket_prefix + block_label); | |
| 99 | |
| 100 // The content is blocked if it is sniffed as HTML/JSON/XML. When | |
| 101 // the blocked response is with an error status code, it is not | |
| 102 // disruptive for the following reasons : 1) the blocked content is | |
| 103 // not a binary object (such as an image) since it is sniffed as | |
| 104 // text; 2) then, this blocking only breaks the renderer behavior | |
| 105 // only if it is either JavaScript or CSS. However, the renderer | |
| 106 // doesn't use the contents of JS/CSS with unaffected status code | |
| 107 // (e.g, 404). 3) the renderer is expected not to use the cross-site | |
| 108 // document content for purposes other than JS/CSS (e.g, XHR). | |
| 109 bool renderable_status_code = | |
| 110 IsRenderableStatusCode(resp_data->http_status_code); | |
| 111 | |
| 112 if (renderable_status_code) { | |
| 113 IncrementHistogramEnum( | |
| 114 bucket_prefix + block_label + ".RenderableStatusCode", | |
| 115 resp_data->resource_type, RESOURCE_TYPE_LAST_TYPE); | |
| 116 } else { | |
| 117 IncrementHistogramCount(bucket_prefix + block_label + | |
| 118 ".NonRenderableStatusCode"); | |
| 119 } | |
| 120 } | |
| 121 | |
| 122 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix, | |
| 123 bool sniffed_as_js) { | |
| 124 IncrementHistogramCount(bucket_prefix + ".NotBlocked"); | |
| 125 if (sniffed_as_js) | |
| 126 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS"); | |
| 127 } | |
| 128 | |
| 129 } // namespace | 58 } // namespace |
| 130 | 59 |
| 131 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() { | |
| 132 } | |
| 133 | |
| 134 void SiteIsolationStatsGatherer::SetEnabled(bool enabled) { | |
| 135 g_stats_gathering_enabled = enabled; | |
| 136 } | |
| 137 | |
| 138 linked_ptr<SiteIsolationResponseMetaData> | |
| 139 SiteIsolationStatsGatherer::OnReceivedResponse( | |
| 140 const GURL& frame_origin, | |
| 141 const GURL& response_url, | |
| 142 ResourceType resource_type, | |
| 143 int origin_pid, | |
| 144 const ResourceResponseInfo& info) { | |
| 145 if (!g_stats_gathering_enabled) | |
| 146 return linked_ptr<SiteIsolationResponseMetaData>(); | |
| 147 | |
| 148 // if |origin_pid| is non-zero, it means that this response is for a plugin | |
| 149 // spawned from this renderer process. We exclude responses for plugins for | |
| 150 // now, but eventually, we're going to make plugin processes directly talk to | |
| 151 // the browser process so that we don't apply cross-site document blocking to | |
| 152 // them. | |
| 153 if (origin_pid) | |
| 154 return linked_ptr<SiteIsolationResponseMetaData>(); | |
| 155 | |
| 156 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1); | |
| 157 | |
| 158 // See if this is for navigation. If it is, don't block it, under the | |
| 159 // assumption that we will put it in an appropriate process. | |
| 160 if (IsResourceTypeFrame(resource_type)) | |
| 161 return linked_ptr<SiteIsolationResponseMetaData>(); | |
| 162 | |
| 163 if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url)) | |
| 164 return linked_ptr<SiteIsolationResponseMetaData>(); | |
| 165 | |
| 166 if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url)) | |
| 167 return linked_ptr<SiteIsolationResponseMetaData>(); | |
| 168 | |
| 169 CrossSiteDocumentMimeType canonical_mime_type = | |
| 170 CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type); | |
| 171 | |
| 172 if (canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS) | |
| 173 return linked_ptr<SiteIsolationResponseMetaData>(); | |
| 174 | |
| 175 // Every CORS request should have the Access-Control-Allow-Origin header even | |
| 176 // if it is preceded by a pre-flight request. Therefore, if this is a CORS | |
| 177 // request, it has this header. response.httpHeaderField() internally uses | |
| 178 // case-insensitive matching for the header name. | |
| 179 std::string access_control_origin; | |
| 180 | |
| 181 // We can use a case-insensitive header name for EnumerateHeader(). | |
| 182 info.headers->EnumerateHeader(NULL, "access-control-allow-origin", | |
| 183 &access_control_origin); | |
| 184 if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet( | |
| 185 frame_origin, response_url, access_control_origin)) | |
| 186 return linked_ptr<SiteIsolationResponseMetaData>(); | |
| 187 | |
| 188 // Real XSD data collection starts from here. | |
| 189 std::string no_sniff; | |
| 190 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff); | |
| 191 | |
| 192 linked_ptr<SiteIsolationResponseMetaData> resp_data( | |
| 193 new SiteIsolationResponseMetaData); | |
| 194 resp_data->frame_origin = frame_origin.spec(); | |
| 195 resp_data->response_url = response_url; | |
| 196 resp_data->resource_type = resource_type; | |
| 197 resp_data->canonical_mime_type = canonical_mime_type; | |
| 198 resp_data->http_status_code = info.headers->response_code(); | |
| 199 resp_data->no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff"); | |
| 200 | |
| 201 return resp_data; | |
| 202 } | |
| 203 | |
| 204 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk( | |
| 205 const linked_ptr<SiteIsolationResponseMetaData>& resp_data, | |
| 206 const char* raw_data, | |
| 207 int raw_length) { | |
| 208 if (!g_stats_gathering_enabled) | |
| 209 return false; | |
| 210 | |
| 211 DCHECK(resp_data.get()); | |
| 212 | |
| 213 StringPiece data(raw_data, raw_length); | |
| 214 | |
| 215 // Record the length of the first received chunk of data to see if it's enough | |
| 216 // for sniffing. | |
| 217 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length); | |
| 218 | |
| 219 // Record the number of cross-site document responses with a specific mime | |
| 220 // type (text/html, text/xml, etc). | |
| 221 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType", | |
| 222 resp_data->canonical_mime_type, | |
| 223 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX); | |
| 224 | |
| 225 // Store the result of cross-site document blocking analysis. | |
| 226 bool would_block = false; | |
| 227 bool sniffed_as_js = SniffForJS(data); | |
| 228 | |
| 229 // Record the number of responses whose content is sniffed for what its mime | |
| 230 // type claims it to be. For example, we apply a HTML sniffer for a document | |
| 231 // tagged with text/html here. Whenever this check becomes true, we'll block | |
| 232 // the response. | |
| 233 if (resp_data->canonical_mime_type != CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN) { | |
| 234 std::string bucket_prefix; | |
| 235 bool sniffed_as_target_document = false; | |
| 236 if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_HTML) { | |
| 237 bucket_prefix = "SiteIsolation.XSD.HTML"; | |
| 238 sniffed_as_target_document = | |
| 239 CrossSiteDocumentClassifier::SniffForHTML(data); | |
| 240 } else if (resp_data->canonical_mime_type == | |
| 241 CROSS_SITE_DOCUMENT_MIME_TYPE_XML) { | |
| 242 bucket_prefix = "SiteIsolation.XSD.XML"; | |
| 243 sniffed_as_target_document = | |
| 244 CrossSiteDocumentClassifier::SniffForXML(data); | |
| 245 } else if (resp_data->canonical_mime_type == | |
| 246 CROSS_SITE_DOCUMENT_MIME_TYPE_JSON) { | |
| 247 bucket_prefix = "SiteIsolation.XSD.JSON"; | |
| 248 sniffed_as_target_document = | |
| 249 CrossSiteDocumentClassifier::SniffForJSON(data); | |
| 250 } else { | |
| 251 NOTREACHED() << "Not a blockable mime type: " | |
| 252 << resp_data->canonical_mime_type; | |
| 253 } | |
| 254 | |
| 255 if (sniffed_as_target_document) { | |
| 256 would_block = true; | |
| 257 HistogramCountBlockedResponse(bucket_prefix, resp_data, false); | |
| 258 } else { | |
| 259 if (resp_data->no_sniff) { | |
| 260 would_block = true; | |
| 261 HistogramCountBlockedResponse(bucket_prefix, resp_data, true); | |
| 262 } else { | |
| 263 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js); | |
| 264 } | |
| 265 } | |
| 266 } else { | |
| 267 // This block is for plain text documents. We apply our HTML, XML, | |
| 268 // and JSON sniffer to a text document in the order, and block it | |
| 269 // if any of them succeeds in sniffing. | |
| 270 std::string bucket_prefix; | |
| 271 if (CrossSiteDocumentClassifier::SniffForHTML(data)) | |
| 272 bucket_prefix = "SiteIsolation.XSD.Plain.HTML"; | |
| 273 else if (CrossSiteDocumentClassifier::SniffForXML(data)) | |
| 274 bucket_prefix = "SiteIsolation.XSD.Plain.XML"; | |
| 275 else if (CrossSiteDocumentClassifier::SniffForJSON(data)) | |
| 276 bucket_prefix = "SiteIsolation.XSD.Plain.JSON"; | |
| 277 | |
| 278 if (bucket_prefix.size() > 0) { | |
| 279 would_block = true; | |
| 280 HistogramCountBlockedResponse(bucket_prefix, resp_data, false); | |
| 281 } else if (resp_data->no_sniff) { | |
| 282 would_block = true; | |
| 283 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true); | |
| 284 } else { | |
| 285 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain", | |
| 286 sniffed_as_js); | |
| 287 } | |
| 288 } | |
| 289 | |
| 290 return would_block; | |
| 291 } | |
| 292 | |
| 293 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType( | 60 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType( |
| 294 const std::string& mime_type) { | 61 const std::string& mime_type) { |
| 295 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) { | 62 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) { |
| 296 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML; | 63 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML; |
| 297 } | 64 } |
| 298 | 65 |
| 299 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) { | 66 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) { |
| 300 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN; | 67 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN; |
| 301 } | 68 } |
| 302 | 69 |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 476 break; | 243 break; |
| 477 case kColonState: | 244 case kColonState: |
| 478 case kTerminalState: | 245 case kTerminalState: |
| 479 NOTREACHED(); | 246 NOTREACHED(); |
| 480 break; | 247 break; |
| 481 } | 248 } |
| 482 } | 249 } |
| 483 return state == kColonState; | 250 return state == kColonState; |
| 484 } | 251 } |
| 485 | 252 |
| 486 bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) { | |
| 487 // The purpose of this function is to try to see if there's any possibility | |
| 488 // that this data can be JavaScript (superset of JS). Search for "var " for JS | |
| 489 // detection. This is a real hack and should only be used for stats gathering. | |
| 490 return data.find("var ") != base::StringPiece::npos; | |
| 491 } | |
| 492 | |
| 493 } // namespace content | 253 } // namespace content |
| OLD | NEW |