Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(494)

Side by Side Diff: content/child/site_isolation_policy.cc

Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@rename_policy_to_sniffer3
Patch Set: Re-upload. Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/child/site_isolation_policy.h"
6
7 #include "base/basictypes.h"
8 #include "base/command_line.h"
9 #include "base/lazy_instance.h"
10 #include "base/logging.h"
11 #include "base/metrics/histogram.h"
12 #include "base/strings/string_util.h"
13 #include "content/public/common/content_switches.h"
14 #include "content/public/common/resource_response_info.h"
15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
16 #include "net/http/http_response_headers.h"
17
18 using base::StringPiece;
19
20 namespace content {
21
22 namespace {
23
24 // The gathering of UMA stats for site isolation is deactivated by default, and
25 // only activated in renderer processes.
26 static bool g_stats_gathering_enabled = false;
27
28 // MIME types
29 const char kTextHtml[] = "text/html";
30 const char kTextXml[] = "text/xml";
31 const char xAppRssXml[] = "application/rss+xml";
32 const char kAppXml[] = "application/xml";
33 const char kAppJson[] = "application/json";
34 const char kTextJson[] = "text/json";
35 const char kTextXjson[] = "text/x-json";
36 const char kTextPlain[] = "text/plain";
37
38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted
39 // when this class is used for actual blocking.
40 bool IsRenderableStatusCode(int status_code) {
41 // Chrome only uses the content of a response with one of these status codes
42 // for CSS/JavaScript. For images, Chrome just ignores status code.
43 const int renderable_status_code[] = {
44 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};
45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {
46 if (renderable_status_code[i] == status_code)
47 return true;
48 }
49 return false;
50 }
51
52 bool MatchesSignature(StringPiece data,
53 const StringPiece signatures[],
54 size_t arr_size) {
55 size_t offset = data.find_first_not_of(" \t\r\n");
56 // There is no not-whitespace character in this document.
57 if (offset == base::StringPiece::npos)
58 return false;
59
60 data.remove_prefix(offset);
61 size_t length = data.length();
62
63 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {
64 const StringPiece& signature = signatures[sig_index];
65 size_t signature_length = signature.length();
66 if (length < signature_length)
67 continue;
68
69 if (base::LowerCaseEqualsASCII(data.begin(),
70 data.begin() + signature_length,
71 signature.data()))
72 return true;
73 }
74 return false;
75 }
76
77 void IncrementHistogramCount(const std::string& name) {
78 // The default value of min, max, bucket_count are copied from histogram.h.
79 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(
80 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);
81 histogram_pointer->Add(1);
82 }
83
84 void IncrementHistogramEnum(const std::string& name,
85 uint32 sample,
86 uint32 boundary_value) {
87 // The default value of min, max, bucket_count are copied from histogram.h.
88 base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet(
89 name, 1, boundary_value, boundary_value + 1,
90 base::HistogramBase::kUmaTargetedHistogramFlag);
91 histogram_pointer->Add(sample);
92 }
93
94 void HistogramCountBlockedResponse(
95 const std::string& bucket_prefix,
96 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,
97 bool nosniff_block) {
98 std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked");
99 IncrementHistogramCount(bucket_prefix + block_label);
100
101 // The content is blocked if it is sniffed as HTML/JSON/XML. When
102 // the blocked response is with an error status code, it is not
103 // disruptive for the following reasons : 1) the blocked content is
104 // not a binary object (such as an image) since it is sniffed as
105 // text; 2) then, this blocking only breaks the renderer behavior
106 // only if it is either JavaScript or CSS. However, the renderer
107 // doesn't use the contents of JS/CSS with unaffected status code
108 // (e.g, 404). 3) the renderer is expected not to use the cross-site
109 // document content for purposes other than JS/CSS (e.g, XHR).
110 bool renderable_status_code =
111 IsRenderableStatusCode(resp_data->http_status_code);
112
113 if (renderable_status_code) {
114 IncrementHistogramEnum(
115 bucket_prefix + block_label + ".RenderableStatusCode",
116 resp_data->resource_type, RESOURCE_TYPE_LAST_TYPE);
117 } else {
118 IncrementHistogramCount(bucket_prefix + block_label +
119 ".NonRenderableStatusCode");
120 }
121 }
122
123 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix,
124 bool sniffed_as_js) {
125 IncrementHistogramCount(bucket_prefix + ".NotBlocked");
126 if (sniffed_as_js)
127 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS");
128 }
129
130 } // namespace
131
132 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {
133 }
134
135 void SiteIsolationStatsGatherer::SetEnabled(bool enabled) {
136 g_stats_gathering_enabled = enabled;
137 }
138
139 linked_ptr<SiteIsolationResponseMetaData>
140 SiteIsolationStatsGatherer::OnReceivedResponse(
141 const GURL& frame_origin,
142 const GURL& response_url,
143 ResourceType resource_type,
144 int origin_pid,
145 const ResourceResponseInfo& info) {
146 if (!g_stats_gathering_enabled)
147 return linked_ptr<SiteIsolationResponseMetaData>();
148
149 // if |origin_pid| is non-zero, it means that this response is for a plugin
150 // spawned from this renderer process. We exclude responses for plugins for
151 // now, but eventually, we're going to make plugin processes directly talk to
152 // the browser process so that we don't apply cross-site document blocking to
153 // them.
154 if (origin_pid)
155 return linked_ptr<SiteIsolationResponseMetaData>();
156
157 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);
158
159 // See if this is for navigation. If it is, don't block it, under the
160 // assumption that we will put it in an appropriate process.
161 if (IsResourceTypeFrame(resource_type))
162 return linked_ptr<SiteIsolationResponseMetaData>();
163
164 if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url))
165 return linked_ptr<SiteIsolationResponseMetaData>();
166
167 if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url))
168 return linked_ptr<SiteIsolationResponseMetaData>();
169
170 CrossSiteDocumentMimeType canonical_mime_type =
171 CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type);
172
173 if (canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS)
174 return linked_ptr<SiteIsolationResponseMetaData>();
175
176 // Every CORS request should have the Access-Control-Allow-Origin header even
177 // if it is preceded by a pre-flight request. Therefore, if this is a CORS
178 // request, it has this header. response.httpHeaderField() internally uses
179 // case-insensitive matching for the header name.
180 std::string access_control_origin;
181
182 // We can use a case-insensitive header name for EnumerateHeader().
183 info.headers->EnumerateHeader(NULL, "access-control-allow-origin",
184 &access_control_origin);
185 if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet(
186 frame_origin, response_url, access_control_origin))
187 return linked_ptr<SiteIsolationResponseMetaData>();
188
189 // Real XSD data collection starts from here.
190 std::string no_sniff;
191 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);
192
193 linked_ptr<SiteIsolationResponseMetaData> resp_data(
194 new SiteIsolationResponseMetaData);
195 resp_data->frame_origin = frame_origin.spec();
196 resp_data->response_url = response_url;
197 resp_data->resource_type = resource_type;
198 resp_data->canonical_mime_type = canonical_mime_type;
199 resp_data->http_status_code = info.headers->response_code();
200 resp_data->no_sniff = base::LowerCaseEqualsASCII(no_sniff, "nosniff");
201
202 return resp_data;
203 }
204
205 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(
206 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,
207 const char* raw_data,
208 int raw_length) {
209 if (!g_stats_gathering_enabled)
210 return false;
211
212 DCHECK(resp_data.get());
213
214 StringPiece data(raw_data, raw_length);
215
216 // Record the length of the first received chunk of data to see if it's enough
217 // for sniffing.
218 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);
219
220 // Record the number of cross-site document responses with a specific mime
221 // type (text/html, text/xml, etc).
222 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",
223 resp_data->canonical_mime_type,
224 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX);
225
226 // Store the result of cross-site document blocking analysis.
227 bool would_block = false;
228 bool sniffed_as_js = SniffForJS(data);
229
230 // Record the number of responses whose content is sniffed for what its mime
231 // type claims it to be. For example, we apply a HTML sniffer for a document
232 // tagged with text/html here. Whenever this check becomes true, we'll block
233 // the response.
234 if (resp_data->canonical_mime_type != CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN) {
235 std::string bucket_prefix;
236 bool sniffed_as_target_document = false;
237 if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_HTML) {
238 bucket_prefix = "SiteIsolation.XSD.HTML";
239 sniffed_as_target_document =
240 CrossSiteDocumentClassifier::SniffForHTML(data);
241 } else if (resp_data->canonical_mime_type ==
242 CROSS_SITE_DOCUMENT_MIME_TYPE_XML) {
243 bucket_prefix = "SiteIsolation.XSD.XML";
244 sniffed_as_target_document =
245 CrossSiteDocumentClassifier::SniffForXML(data);
246 } else if (resp_data->canonical_mime_type ==
247 CROSS_SITE_DOCUMENT_MIME_TYPE_JSON) {
248 bucket_prefix = "SiteIsolation.XSD.JSON";
249 sniffed_as_target_document =
250 CrossSiteDocumentClassifier::SniffForJSON(data);
251 } else {
252 NOTREACHED() << "Not a blockable mime type: "
253 << resp_data->canonical_mime_type;
254 }
255
256 if (sniffed_as_target_document) {
257 would_block = true;
258 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
259 } else {
260 if (resp_data->no_sniff) {
261 would_block = true;
262 HistogramCountBlockedResponse(bucket_prefix, resp_data, true);
263 } else {
264 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js);
265 }
266 }
267 } else {
268 // This block is for plain text documents. We apply our HTML, XML,
269 // and JSON sniffer to a text document in the order, and block it
270 // if any of them succeeds in sniffing.
271 std::string bucket_prefix;
272 if (CrossSiteDocumentClassifier::SniffForHTML(data))
273 bucket_prefix = "SiteIsolation.XSD.Plain.HTML";
274 else if (CrossSiteDocumentClassifier::SniffForXML(data))
275 bucket_prefix = "SiteIsolation.XSD.Plain.XML";
276 else if (CrossSiteDocumentClassifier::SniffForJSON(data))
277 bucket_prefix = "SiteIsolation.XSD.Plain.JSON";
278
279 if (bucket_prefix.size() > 0) {
280 would_block = true;
281 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
282 } else if (resp_data->no_sniff) {
283 would_block = true;
284 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);
285 } else {
286 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",
287 sniffed_as_js);
288 }
289 }
290
291 return would_block;
292 }
293
294 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType(
295 const std::string& mime_type) {
296 if (base::LowerCaseEqualsASCII(mime_type, kTextHtml)) {
297 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML;
298 }
299
300 if (base::LowerCaseEqualsASCII(mime_type, kTextPlain)) {
301 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN;
302 }
303
304 if (base::LowerCaseEqualsASCII(mime_type, kAppJson) ||
305 base::LowerCaseEqualsASCII(mime_type, kTextJson) ||
306 base::LowerCaseEqualsASCII(mime_type, kTextXjson)) {
307 return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON;
308 }
309
310 if (base::LowerCaseEqualsASCII(mime_type, kTextXml) ||
311 base::LowerCaseEqualsASCII(mime_type, xAppRssXml) ||
312 base::LowerCaseEqualsASCII(mime_type, kAppXml)) {
313 return CROSS_SITE_DOCUMENT_MIME_TYPE_XML;
314 }
315
316 return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS;
317 }
318
319 bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) {
320 // We exclude ftp:// from here. FTP doesn't provide a Content-Type
321 // header which our policy depends on, so we cannot protect any
322 // document from FTP servers.
323 return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme);
324 }
325
326 bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin,
327 const GURL& response_url) {
328 if (!frame_origin.is_valid() || !response_url.is_valid())
329 return false;
330
331 if (frame_origin.scheme() != response_url.scheme())
332 return false;
333
334 // SameDomainOrHost() extracts the effective domains (public suffix plus one)
335 // from the two URLs and compare them.
336 return net::registry_controlled_domains::SameDomainOrHost(
337 frame_origin, response_url,
338 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
339 }
340
341 // We don't use Webkit's existing CORS policy implementation since
342 // their policy works in terms of origins, not sites. For example,
343 // when frame is sub.a.com and it is not allowed to access a document
344 // with sub1.a.com. But under Site Isolation, it's allowed.
345 bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet(
346 const GURL& frame_origin,
347 const GURL& website_origin,
348 const std::string& access_control_origin) {
349 // Many websites are sending back "\"*\"" instead of "*". This is
350 // non-standard practice, and not supported by Chrome. Refer to
351 // CrossOriginAccessControl::passesAccessControlCheck().
352
353 // TODO(dsjang): * is not allowed for the response from a request
354 // with cookies. This allows for more than what the renderer will
355 // eventually be able to receive, so we won't see illegal cross-site
356 // documents allowed by this. We have to find a way to see if this
357 // response is from a cookie-tagged request or not in the future.
358 if (access_control_origin == "*")
359 return true;
360
361 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for
362 // "*", but many websites are using just a domain for access_control_origin,
363 // and this is blocked by Webkit's CORS logic here :
364 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set
365 // is_valid() to false when it is created from a URL containing * in the
366 // domain part.
367
368 GURL cors_origin(access_control_origin);
369 return IsSameSite(frame_origin, cors_origin);
370 }
371
372 // This function is a slight modification of |net::SniffForHTML|.
373 bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) {
374 // The content sniffer used by Chrome and Firefox are using "<!--"
375 // as one of the HTML signatures, but it also appears in valid
376 // JavaScript, considered as well-formed JS by the browser. Since
377 // we do not want to block any JS, we exclude it from our HTML
378 // signatures. This can weaken our document block policy, but we can
379 // break less websites.
380 // TODO(dsjang): parameterize |net::SniffForHTML| with an option
381 // that decides whether to include <!-- or not, so that we can
382 // remove this function.
383 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser
384 // process, we should do single-thread checking here for the static
385 // initializer.
386 static const StringPiece kHtmlSignatures[] = {
387 StringPiece("<!DOCTYPE html"), // HTML5 spec
388 StringPiece("<script"), // HTML5 spec, Mozilla
389 StringPiece("<html"), // HTML5 spec, Mozilla
390 StringPiece("<head"), // HTML5 spec, Mozilla
391 StringPiece("<iframe"), // Mozilla
392 StringPiece("<h1"), // Mozilla
393 StringPiece("<div"), // Mozilla
394 StringPiece("<font"), // Mozilla
395 StringPiece("<table"), // Mozilla
396 StringPiece("<a"), // Mozilla
397 StringPiece("<style"), // Mozilla
398 StringPiece("<title"), // Mozilla
399 StringPiece("<b"), // Mozilla
400 StringPiece("<body"), // Mozilla
401 StringPiece("<br"), // Mozilla
402 StringPiece("<p"), // Mozilla
403 StringPiece("<?xml") // Mozilla
404 };
405
406 while (data.length() > 0) {
407 if (MatchesSignature(data, kHtmlSignatures, arraysize(kHtmlSignatures)))
408 return true;
409
410 // If we cannot find "<!--", we fail sniffing this as HTML.
411 static const StringPiece kCommentBegins[] = {StringPiece("<!--")};
412 if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins)))
413 break;
414
415 // Search for --> and do SniffForHTML after that. If we can find the
416 // comment's end, we start HTML sniffing from there again.
417 static const char kEndComment[] = "-->";
418 size_t offset = data.find(kEndComment);
419 if (offset == base::StringPiece::npos)
420 break;
421
422 // Proceed to the index next to the ending comment (-->).
423 data.remove_prefix(offset + strlen(kEndComment));
424 }
425
426 return false;
427 }
428
429 bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) {
430 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for
431 // this signature. However, XML is case-sensitive. Don't we have to
432 // be more lenient only to block documents starting with the exact
433 // string <?xml rather than <?XML ?
434 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser
435 // process, we should do single-thread checking here for the static
436 // initializer.
437 static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")};
438 return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures));
439 }
440
441 bool CrossSiteDocumentClassifier::SniffForJSON(base::StringPiece data) {
442 // TODO(dsjang): We have to come up with a better way to sniff
443 // JSON. However, even RE cannot help us that much due to the fact
444 // that we don't do full parsing. This DFA starts with state 0, and
445 // finds {, "/' and : in that order. We're avoiding adding a
446 // dependency on a regular expression library.
447 enum {
448 kStartState,
449 kLeftBraceState,
450 kLeftQuoteState,
451 kColonState,
452 kTerminalState,
453 } state = kStartState;
454
455 size_t length = data.length();
456 for (size_t i = 0; i < length && state < kColonState; ++i) {
457 const char c = data[i];
458 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
459 continue;
460
461 switch (state) {
462 case kStartState:
463 if (c == '{')
464 state = kLeftBraceState;
465 else
466 state = kTerminalState;
467 break;
468 case kLeftBraceState:
469 if (c == '\"' || c == '\'')
470 state = kLeftQuoteState;
471 else
472 state = kTerminalState;
473 break;
474 case kLeftQuoteState:
475 if (c == ':')
476 state = kColonState;
477 break;
478 case kColonState:
479 case kTerminalState:
480 NOTREACHED();
481 break;
482 }
483 }
484 return state == kColonState;
485 }
486
487 bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) {
488 // The purpose of this function is to try to see if there's any possibility
489 // that this data can be JavaScript (superset of JS). Search for "var " for JS
490 // detection. This is a real hack and should only be used for stats gathering.
491 return data.find("var ") != base::StringPiece::npos;
492 }
493
494 } // namespace content
OLDNEW
« no previous file with comments | « content/child/site_isolation_policy.h ('k') | content/child/site_isolation_policy_browsertest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698