OLD | NEW |
| (Empty) |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "content/child/site_isolation_policy.h" | |
6 | |
7 #include "base/basictypes.h" | |
8 #include "base/command_line.h" | |
9 #include "base/lazy_instance.h" | |
10 #include "base/logging.h" | |
11 #include "base/metrics/histogram.h" | |
12 #include "base/strings/string_util.h" | |
13 #include "content/public/common/content_switches.h" | |
14 #include "content/public/common/resource_response_info.h" | |
15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" | |
16 #include "net/http/http_response_headers.h" | |
17 | |
18 using base::StringPiece; | |
19 | |
20 namespace content { | |
21 | |
22 namespace { | |
23 | |
24 // The gathering of UMA stats for site isolation is deactivated by default, and | |
25 // only activated in renderer processes. | |
26 static bool g_stats_gathering_enabled = false; | |
27 | |
28 // MIME types | |
29 const char kTextHtml[] = "text/html"; | |
30 const char kTextXml[] = "text/xml"; | |
31 const char xAppRssXml[] = "application/rss+xml"; | |
32 const char kAppXml[] = "application/xml"; | |
33 const char kAppJson[] = "application/json"; | |
34 const char kTextJson[] = "text/json"; | |
35 const char kTextXjson[] = "text/x-json"; | |
36 const char kTextPlain[] = "text/plain"; | |
37 | |
38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted | |
39 // when this class is used for actual blocking. | |
40 bool IsRenderableStatusCode(int status_code) { | |
41 // Chrome only uses the content of a response with one of these status codes | |
42 // for CSS/JavaScript. For images, Chrome just ignores status code. | |
43 const int renderable_status_code[] = { | |
44 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307}; | |
45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { | |
46 if (renderable_status_code[i] == status_code) | |
47 return true; | |
48 } | |
49 return false; | |
50 } | |
51 | |
52 bool MatchesSignature(StringPiece data, | |
53 const StringPiece signatures[], | |
54 size_t arr_size) { | |
55 size_t offset = data.find_first_not_of(" \t\r\n"); | |
56 // There is no not-whitespace character in this document. | |
57 if (offset == base::StringPiece::npos) | |
58 return false; | |
59 | |
60 data.remove_prefix(offset); | |
61 size_t length = data.length(); | |
62 | |
63 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { | |
64 const StringPiece& signature = signatures[sig_index]; | |
65 size_t signature_length = signature.length(); | |
66 if (length < signature_length) | |
67 continue; | |
68 | |
69 if (base::LowerCaseEqualsASCII(data.begin(), | |
70 data.begin() + signature_length, | |
71 signature.data())) | |
72 return true; | |
73 } | |
74 return false; | |
75 } | |
76 | |
77 void IncrementHistogramCount(const std::string& name) { | |
78 // The default value of min, max, bucket_count are copied from histogram.h. | |
79 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet( | |
80 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag); | |
81 histogram_pointer->Add(1); | |
82 } | |
83 | |
84 void IncrementHistogramEnum(const std::string& name, | |
85 uint32 sample, | |
86 uint32 boundary_value) { | |
87 // The default value of min, max, bucket_count are copied from histogram.h. | |
88 base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet( | |
89 name, 1, boundary_value, boundary_value + 1, | |
90 base::HistogramBase::kUmaTargetedHistogramFlag); | |
91 histogram_pointer->Add(sample); | |
92 } | |
93 | |
94 void HistogramCountBlockedResponse( | |
95 const std::string& bucket_prefix, | |
96 const linked_ptr<SiteIsolationResponseMetaData>& resp_data, | |
97 bool nosniff_block) { | |
98 std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked"); | |
99 IncrementHistogramCount(bucket_prefix + block_label); | |
100 | |
101 // The content is blocked if it is sniffed as HTML/JSON/XML. When | |
102 // the blocked response is with an error status code, it is not | |
103 // disruptive for the following reasons : 1) the blocked content is | |
104 // not a binary object (such as an image) since it is sniffed as | |
105 // text; 2) then, this blocking only breaks the renderer behavior | |
106 // only if it is either JavaScript or CSS. However, the renderer | |
107 // doesn't use the contents of JS/CSS with unaffected status code | |
108 // (e.g, 404). 3) the renderer is expected not to use the cross-site | |
109 // document content for purposes other than JS/CSS (e.g, XHR). | |
110 bool renderable_status_code = | |
111 IsRenderableStatusCode(resp_data->http_status_code); | |
112 | |
113 if (renderable_status_code) { | |
114 IncrementHistogramEnum( | |
115 bucket_prefix + block_label + ".RenderableStatusCode", | |
116 resp_data->resource_type, RESOURCE_TYPE_LAST_TYPE); | |
117 } else { | |
118 IncrementHistogramCount(bucket_prefix + block_label + | |
119 ".NonRenderableStatusCode"); | |
120 } | |
121 } | |
122 | |
123 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix, | |
124 bool sniffed_as_js) { | |
125 IncrementHistogramCount(bucket_prefix + ".NotBlocked"); | |
126 if (sniffed_as_js) | |
127 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS"); | |
128 } | |
129 | |
130 } // namespace | |
131 | |
132 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() { | |
133 } | |
134 | |
135 void SiteIsolationStatsGatherer::SetEnabled(bool enabled) { | |
136 g_stats_gathering_enabled = enabled; | |
137 } | |
138 | |
139 linked_ptr<SiteIsolationResponseMetaData> | |
140 SiteIsolationStatsGatherer::OnReceivedResponse( | |
141 const GURL& frame_origin, | |
142 const GURL& response_url, | |
143 ResourceType resource_type, | |
144 int origin_pid, | |
145 const ResourceResponseInfo& info) { | |
146 if (!g_stats_gathering_enabled) | |
147 return linked_ptr<SiteIsolationResponseMetaData>(); | |
148 | |
149 // if |origin_pid| is non-zero, it means that this response is for a plugin | |
150 // spawned from this renderer process. We exclude responses for plugins for | |
151 // now, but eventually, we're going to make plugin processes directly talk to | |
152 // the browser process so that we don't apply cross-site document blocking to | |
153 // them. | |
154 if (origin_pid) | |
155 return linked_ptr<SiteIsolationResponseMetaData>(); | |
156 | |
157 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1); | |
158 | |
159 // See if this is for navigation. If it is, don't block it, under the | |
160 // assumption that we will put it in an appropriate process. | |
161 if (IsResourceTypeFrame(resource_type)) | |
162 return linked_ptr<SiteIsolationResponseMetaData>(); | |
163 | |
164 if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url)) | |
165 return linked_ptr<SiteIsolationResponseMetaData>(); | |
166 | |
167 if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url)) | |
168 return linked_ptr<SiteIsolationResponseMetaData>(); | |
169 | |
170 CrossSiteDocumentMimeType canonical_mime_type = | |
171 CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type); | |
172 | |
173 if (canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS) | |
174 return linked_ptr<SiteIsolationResponseMetaData>(); | |
175 | |
176 // Every CORS request should have the Access-Control-Allow-Origin header even | |
177 // if it is preceded by a pre-flight request. Therefore, if this is a CORS | |
178 // request, it has this header. response.httpHeaderField() internally uses | |
179 // case-insensitive matching for the header name. | |
180 std::string access_control_origin; | |
181 | |
182 // We can use a case-insensitive header name for EnumerateHeader(). | |
183 info.headers->EnumerateHeader(NULL, "access-control-allow-origin", | |
184 &access_control_origin); | |
185 if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet( | |
186 frame_origin, response_url, access_control_origin)) | |
187 return linked_ptr<SiteIsolationResponseMetaData>(); | |
188 | |
189 // Real XSD data collection starts from here. | |
190 std::string no_sniff; | |
191 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff); | |
192 | |
193 linked_ptr<SiteIsolationResponseMetaData> resp_data( | |
194 new SiteIsolationResponseMetaData); | |
195 resp_data->frame_origin = frame_origin.spec(); | |
196 resp_data->response_url = response_url; | |
197 resp_data->resource_type = resource_type; | |
198 resp_data->canonical_mime_type = canonical_mime_type; | |
199 resp_data->http_status_code = info.headers->response_code(); | |
200 resp_data->no_sniff = base::LowerCaseEqualsASCII(no_sniff, "nosniff"); | |
201 | |
202 return resp_data; | |
203 } | |
204 | |
205 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk( | |
206 const linked_ptr<SiteIsolationResponseMetaData>& resp_data, | |
207 const char* raw_data, | |
208 int raw_length) { | |
209 if (!g_stats_gathering_enabled) | |
210 return false; | |
211 | |
212 DCHECK(resp_data.get()); | |
213 | |
214 StringPiece data(raw_data, raw_length); | |
215 | |
216 // Record the length of the first received chunk of data to see if it's enough | |
217 // for sniffing. | |
218 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length); | |
219 | |
220 // Record the number of cross-site document responses with a specific mime | |
221 // type (text/html, text/xml, etc). | |
222 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType", | |
223 resp_data->canonical_mime_type, | |
224 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX); | |
225 | |
226 // Store the result of cross-site document blocking analysis. | |
227 bool would_block = false; | |
228 bool sniffed_as_js = SniffForJS(data); | |
229 | |
230 // Record the number of responses whose content is sniffed for what its mime | |
231 // type claims it to be. For example, we apply a HTML sniffer for a document | |
232 // tagged with text/html here. Whenever this check becomes true, we'll block | |
233 // the response. | |
234 if (resp_data->canonical_mime_type != CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN) { | |
235 std::string bucket_prefix; | |
236 bool sniffed_as_target_document = false; | |
237 if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_HTML) { | |
238 bucket_prefix = "SiteIsolation.XSD.HTML"; | |
239 sniffed_as_target_document = | |
240 CrossSiteDocumentClassifier::SniffForHTML(data); | |
241 } else if (resp_data->canonical_mime_type == | |
242 CROSS_SITE_DOCUMENT_MIME_TYPE_XML) { | |
243 bucket_prefix = "SiteIsolation.XSD.XML"; | |
244 sniffed_as_target_document = | |
245 CrossSiteDocumentClassifier::SniffForXML(data); | |
246 } else if (resp_data->canonical_mime_type == | |
247 CROSS_SITE_DOCUMENT_MIME_TYPE_JSON) { | |
248 bucket_prefix = "SiteIsolation.XSD.JSON"; | |
249 sniffed_as_target_document = | |
250 CrossSiteDocumentClassifier::SniffForJSON(data); | |
251 } else { | |
252 NOTREACHED() << "Not a blockable mime type: " | |
253 << resp_data->canonical_mime_type; | |
254 } | |
255 | |
256 if (sniffed_as_target_document) { | |
257 would_block = true; | |
258 HistogramCountBlockedResponse(bucket_prefix, resp_data, false); | |
259 } else { | |
260 if (resp_data->no_sniff) { | |
261 would_block = true; | |
262 HistogramCountBlockedResponse(bucket_prefix, resp_data, true); | |
263 } else { | |
264 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js); | |
265 } | |
266 } | |
267 } else { | |
268 // This block is for plain text documents. We apply our HTML, XML, | |
269 // and JSON sniffer to a text document in the order, and block it | |
270 // if any of them succeeds in sniffing. | |
271 std::string bucket_prefix; | |
272 if (CrossSiteDocumentClassifier::SniffForHTML(data)) | |
273 bucket_prefix = "SiteIsolation.XSD.Plain.HTML"; | |
274 else if (CrossSiteDocumentClassifier::SniffForXML(data)) | |
275 bucket_prefix = "SiteIsolation.XSD.Plain.XML"; | |
276 else if (CrossSiteDocumentClassifier::SniffForJSON(data)) | |
277 bucket_prefix = "SiteIsolation.XSD.Plain.JSON"; | |
278 | |
279 if (bucket_prefix.size() > 0) { | |
280 would_block = true; | |
281 HistogramCountBlockedResponse(bucket_prefix, resp_data, false); | |
282 } else if (resp_data->no_sniff) { | |
283 would_block = true; | |
284 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true); | |
285 } else { | |
286 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain", | |
287 sniffed_as_js); | |
288 } | |
289 } | |
290 | |
291 return would_block; | |
292 } | |
293 | |
294 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType( | |
295 const std::string& mime_type) { | |
296 if (base::LowerCaseEqualsASCII(mime_type, kTextHtml)) { | |
297 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML; | |
298 } | |
299 | |
300 if (base::LowerCaseEqualsASCII(mime_type, kTextPlain)) { | |
301 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN; | |
302 } | |
303 | |
304 if (base::LowerCaseEqualsASCII(mime_type, kAppJson) || | |
305 base::LowerCaseEqualsASCII(mime_type, kTextJson) || | |
306 base::LowerCaseEqualsASCII(mime_type, kTextXjson)) { | |
307 return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON; | |
308 } | |
309 | |
310 if (base::LowerCaseEqualsASCII(mime_type, kTextXml) || | |
311 base::LowerCaseEqualsASCII(mime_type, xAppRssXml) || | |
312 base::LowerCaseEqualsASCII(mime_type, kAppXml)) { | |
313 return CROSS_SITE_DOCUMENT_MIME_TYPE_XML; | |
314 } | |
315 | |
316 return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS; | |
317 } | |
318 | |
319 bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) { | |
320 // We exclude ftp:// from here. FTP doesn't provide a Content-Type | |
321 // header which our policy depends on, so we cannot protect any | |
322 // document from FTP servers. | |
323 return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme); | |
324 } | |
325 | |
326 bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin, | |
327 const GURL& response_url) { | |
328 if (!frame_origin.is_valid() || !response_url.is_valid()) | |
329 return false; | |
330 | |
331 if (frame_origin.scheme() != response_url.scheme()) | |
332 return false; | |
333 | |
334 // SameDomainOrHost() extracts the effective domains (public suffix plus one) | |
335 // from the two URLs and compare them. | |
336 return net::registry_controlled_domains::SameDomainOrHost( | |
337 frame_origin, response_url, | |
338 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); | |
339 } | |
340 | |
341 // We don't use Webkit's existing CORS policy implementation since | |
342 // their policy works in terms of origins, not sites. For example, | |
343 // when frame is sub.a.com and it is not allowed to access a document | |
344 // with sub1.a.com. But under Site Isolation, it's allowed. | |
345 bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet( | |
346 const GURL& frame_origin, | |
347 const GURL& website_origin, | |
348 const std::string& access_control_origin) { | |
349 // Many websites are sending back "\"*\"" instead of "*". This is | |
350 // non-standard practice, and not supported by Chrome. Refer to | |
351 // CrossOriginAccessControl::passesAccessControlCheck(). | |
352 | |
353 // TODO(dsjang): * is not allowed for the response from a request | |
354 // with cookies. This allows for more than what the renderer will | |
355 // eventually be able to receive, so we won't see illegal cross-site | |
356 // documents allowed by this. We have to find a way to see if this | |
357 // response is from a cookie-tagged request or not in the future. | |
358 if (access_control_origin == "*") | |
359 return true; | |
360 | |
361 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for | |
362 // "*", but many websites are using just a domain for access_control_origin, | |
363 // and this is blocked by Webkit's CORS logic here : | |
364 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set | |
365 // is_valid() to false when it is created from a URL containing * in the | |
366 // domain part. | |
367 | |
368 GURL cors_origin(access_control_origin); | |
369 return IsSameSite(frame_origin, cors_origin); | |
370 } | |
371 | |
372 // This function is a slight modification of |net::SniffForHTML|. | |
373 bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) { | |
374 // The content sniffer used by Chrome and Firefox are using "<!--" | |
375 // as one of the HTML signatures, but it also appears in valid | |
376 // JavaScript, considered as well-formed JS by the browser. Since | |
377 // we do not want to block any JS, we exclude it from our HTML | |
378 // signatures. This can weaken our document block policy, but we can | |
379 // break less websites. | |
380 // TODO(dsjang): parameterize |net::SniffForHTML| with an option | |
381 // that decides whether to include <!-- or not, so that we can | |
382 // remove this function. | |
383 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser | |
384 // process, we should do single-thread checking here for the static | |
385 // initializer. | |
386 static const StringPiece kHtmlSignatures[] = { | |
387 StringPiece("<!DOCTYPE html"), // HTML5 spec | |
388 StringPiece("<script"), // HTML5 spec, Mozilla | |
389 StringPiece("<html"), // HTML5 spec, Mozilla | |
390 StringPiece("<head"), // HTML5 spec, Mozilla | |
391 StringPiece("<iframe"), // Mozilla | |
392 StringPiece("<h1"), // Mozilla | |
393 StringPiece("<div"), // Mozilla | |
394 StringPiece("<font"), // Mozilla | |
395 StringPiece("<table"), // Mozilla | |
396 StringPiece("<a"), // Mozilla | |
397 StringPiece("<style"), // Mozilla | |
398 StringPiece("<title"), // Mozilla | |
399 StringPiece("<b"), // Mozilla | |
400 StringPiece("<body"), // Mozilla | |
401 StringPiece("<br"), // Mozilla | |
402 StringPiece("<p"), // Mozilla | |
403 StringPiece("<?xml") // Mozilla | |
404 }; | |
405 | |
406 while (data.length() > 0) { | |
407 if (MatchesSignature(data, kHtmlSignatures, arraysize(kHtmlSignatures))) | |
408 return true; | |
409 | |
410 // If we cannot find "<!--", we fail sniffing this as HTML. | |
411 static const StringPiece kCommentBegins[] = {StringPiece("<!--")}; | |
412 if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins))) | |
413 break; | |
414 | |
415 // Search for --> and do SniffForHTML after that. If we can find the | |
416 // comment's end, we start HTML sniffing from there again. | |
417 static const char kEndComment[] = "-->"; | |
418 size_t offset = data.find(kEndComment); | |
419 if (offset == base::StringPiece::npos) | |
420 break; | |
421 | |
422 // Proceed to the index next to the ending comment (-->). | |
423 data.remove_prefix(offset + strlen(kEndComment)); | |
424 } | |
425 | |
426 return false; | |
427 } | |
428 | |
429 bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) { | |
430 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for | |
431 // this signature. However, XML is case-sensitive. Don't we have to | |
432 // be more lenient only to block documents starting with the exact | |
433 // string <?xml rather than <?XML ? | |
434 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser | |
435 // process, we should do single-thread checking here for the static | |
436 // initializer. | |
437 static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")}; | |
438 return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures)); | |
439 } | |
440 | |
441 bool CrossSiteDocumentClassifier::SniffForJSON(base::StringPiece data) { | |
442 // TODO(dsjang): We have to come up with a better way to sniff | |
443 // JSON. However, even RE cannot help us that much due to the fact | |
444 // that we don't do full parsing. This DFA starts with state 0, and | |
445 // finds {, "/' and : in that order. We're avoiding adding a | |
446 // dependency on a regular expression library. | |
447 enum { | |
448 kStartState, | |
449 kLeftBraceState, | |
450 kLeftQuoteState, | |
451 kColonState, | |
452 kTerminalState, | |
453 } state = kStartState; | |
454 | |
455 size_t length = data.length(); | |
456 for (size_t i = 0; i < length && state < kColonState; ++i) { | |
457 const char c = data[i]; | |
458 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') | |
459 continue; | |
460 | |
461 switch (state) { | |
462 case kStartState: | |
463 if (c == '{') | |
464 state = kLeftBraceState; | |
465 else | |
466 state = kTerminalState; | |
467 break; | |
468 case kLeftBraceState: | |
469 if (c == '\"' || c == '\'') | |
470 state = kLeftQuoteState; | |
471 else | |
472 state = kTerminalState; | |
473 break; | |
474 case kLeftQuoteState: | |
475 if (c == ':') | |
476 state = kColonState; | |
477 break; | |
478 case kColonState: | |
479 case kTerminalState: | |
480 NOTREACHED(); | |
481 break; | |
482 } | |
483 } | |
484 return state == kColonState; | |
485 } | |
486 | |
487 bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) { | |
488 // The purpose of this function is to try to see if there's any possibility | |
489 // that this data can be JavaScript (superset of JS). Search for "var " for JS | |
490 // detection. This is a real hack and should only be used for stats gathering. | |
491 return data.find("var ") != base::StringPiece::npos; | |
492 } | |
493 | |
494 } // namespace content | |
OLD | NEW |