Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: content/child/site_isolation_policy.cc

Issue 22254005: UMA data collector for cross-site documents(XSD) (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@lkgr
Patch Set: ResourceDispatcher uses SiteIsolationPolicy as the outermost Peer Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/child/site_isolation_policy.h"
6
7 #include "base/basictypes.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_util.h"
11 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
12 #include "net/http/http_response_headers.h"
13 #include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"
14 #include "third_party/WebKit/public/platform/WebString.h"
15 #include "third_party/WebKit/public/platform/WebURL.h"
16 #include "third_party/WebKit/public/platform/WebURLRequest.h"
17 #include "third_party/WebKit/public/platform/WebURLResponse.h"
18 #include "third_party/WebKit/public/web/WebDocument.h"
19 #include "third_party/WebKit/public/web/WebFrame.h"
20 #include "third_party/WebKit/public/web/WebFrameClient.h"
21 #include "third_party/WebKit/public/web/WebSecurityOrigin.h"
22 #include "third_party/WebKit/public/web/WebView.h"
23
24 using WebKit::WebDocument;
25 using WebKit::WebString;
26 using WebKit::WebURL;
27 using WebKit::WebURLResponse;
28 using WebKit::WebURLRequest;
29
30 namespace content {
31
32 namespace {
33
34 // MIME types
35 const char kTextHtml[] = "text/html";
36 const char kTextXml[] = "text/xml";
37 const char xAppRssXml[] = "application/rss+xml";
38 const char kAppXml[] = "application/xml";
39 const char kAppJson[] = "application/json";
40 const char kTextJson[] = "text/json";
41 const char kTextXjson[] = "text/x-json";
42 const char kTextPlain[] = "text/plain";
43
44 } // anonymous namespace
45
46 SiteIsolationPolicy::SiteIsolationPolicy(
47 webkit_glue::ResourceLoaderBridge::Peer* original_peer,
48 bool policy_enforced,
49 GURL& frame_origin,
50 GURL& request_url,
51 int request_id,
52 ResourceType::Type resource_type)
53 : original_peer_(original_peer),
54 policy_enforced_(policy_enforced),
55 frame_origin_(frame_origin),
56 request_url_(request_url),
57 request_id_(request_id),
58 resource_type_(resource_type),
59 state_(INIT),
60 cross_site_document_header_(false),
61 confirmed_safe_(false) {
62 // TODO(dsjang): when SiteIsoloation is fully deployed in the browser process,
63 // |frame_origin| will be given from a trusted module.
64 }
65
66 void SiteIsolationPolicy::OnUploadProgress(uint64 position, uint64 size) {
67 original_peer_->OnUploadProgress(position, size);
68 }
69
70 void SiteIsolationPolicy::OnDownloadedData(int len) {
71 return original_peer_->OnDownloadedData(len);
72 }
73
74 void SiteIsolationPolicy::OnReceivedCachedMetadata(const char* data, int len) {
75 return original_peer_->OnReceivedCachedMetadata(data, len);
76 }
77
78 void SiteIsolationPolicy::OnCompletedRequest(
79 int error_code,
80 bool was_ignored_by_handler,
81 const std::string& security_info,
82 const base::TimeTicks& completion_time) {
83 state_ = COMPLETED;
84 original_peer_->OnCompletedRequest(
85 error_code, was_ignored_by_handler, security_info, completion_time);
86 }
87
88 bool SiteIsolationPolicy::OnReceivedRedirect(
89 const GURL& new_url,
90 const webkit_glue::ResourceResponseInfo& info,
91 bool* has_new_first_party_for_cookies,
92 GURL* new_first_party_for_cookies) {
93 DCHECK_EQ(state_, INIT);
94 request_url_ = new_url;
95 return original_peer_->OnReceivedRedirect(new_url,
96 info,
97 has_new_first_party_for_cookies,
98 new_first_party_for_cookies);
99 }
100
101 void SiteIsolationPolicy::OnReceivedResponse(
102 const webkit_glue::ResourceResponseInfo& info) {
103 DCHECK_EQ(state_, INIT);
104 state_ = RESPONSE_RECEIVED;
105
106 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);
107 original_peer_->OnReceivedResponse(info);
108
109 if (!policy_enforced_)
110 return;
111
112 // See if this is for navigation. If it is, don't block it, under the
113 // assumption that we will put it in an appropriate process.
114 if (ResourceType::IsFrame(resource_type_))
115 return;
116
117 if (!IsBlockableScheme(request_url_))
118 return;
119
120 if (IsSameSite(frame_origin_, request_url_))
121 return;
122
123 SiteIsolationPolicy::CanonicalMimeType canonical_mime_type =
124 GetCanonicalMimeType(info.mime_type);
125
126 if (canonical_mime_type == SiteIsolationPolicy::Others)
127 return;
128
129 // Every CORS request should have the Access-Control-Allow-Origin header even
130 // if it is preceded by a pre-flight request. Therefore, if this is a CORS
131 // request, it has this header. response.httpHeaderField() internally uses
132 // case-insensitive matching for the header name.
133 std::string access_control_origin;
134
135 // We can use a case-insensitive header name for EnumerateHeader().
136 info.headers->EnumerateHeader(
137 NULL, "access-control-allow-origin", &access_control_origin);
138 if (IsValidCorsHeaderSet(frame_origin_, request_url_, access_control_origin))
139 return;
140
141 // Real XSD data collection starts from here.
142 std::string no_sniff;
143 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);
144
145 canonical_mime_type_ = canonical_mime_type;
146 http_status_code_ = info.headers->response_code();
147 no_sniff_ = LowerCaseEqualsASCII(no_sniff, "nosniff");
148
149 cross_site_document_header_ = true;
150 }
151
152 // These macros are defined here so that we prevent code size bloat-up due to
153 // the UMA_HISTOGRAM_* macros. Similar logic is used for recording UMA stats for
154 // different MIME types, but we cannot create a helper function for this since
155 // UMA_HISTOGRAM_* macros do not accept variables as their bucket names. As a
156 // solution, macros are used instead to capture the repeated pattern for
157 // recording UMA stats. TODO(dsjang): this is only needed for collecting UMA
158 // stat. Will be deleted when this class is used for actual blocking.
159
160 #define SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \
161 UMA_HISTOGRAM_COUNTS( BUCKET_PREFIX ".Blocked", 1); \
162 if (renderable_status_code) { \
163 UMA_HISTOGRAM_ENUMERATION( \
164 BUCKET_PREFIX ".Blocked.RenderableStatusCode", \
165 resource_type_, \
166 ResourceType::LAST_TYPE + 1); \
167 } else { \
168 UMA_HISTOGRAM_COUNTS(BUCKET_PREFIX ".Blocked.NonRenderableStatusCode",1);\
169 }
170
171 #define SITE_ISOLATION_POLICY_COUNT_NO_SNIFF_BLOCK(BUCKET_PREFIX) \
172 UMA_HISTOGRAM_COUNTS( BUCKET_PREFIX ".NoSniffBlocked", 1); \
173 if (renderable_status_code) { \
174 UMA_HISTOGRAM_ENUMERATION( \
175 BUCKET_PREFIX ".NoSniffBlocked.RenderableStatusCode", \
176 resource_type_, \
177 ResourceType::LAST_TYPE + 1); \
178 } else { \
179 UMA_HISTOGRAM_ENUMERATION( \
180 BUCKET_PREFIX ".NoSniffBlocked.NonRenderableStatusCode", \
181 resource_type_, \
182 ResourceType::LAST_TYPE + 1); \
183 }
184
185 #define SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \
186 UMA_HISTOGRAM_COUNTS(BUCKET_PREFIX ".NotBlocked", 1); \
187 if (is_sniffed_for_js) \
188 UMA_HISTOGRAM_COUNTS(BUCKET_PREFIX ".NotBlocked.MaybeJS", 1); \
189
190 #define SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SNIFF_EXPR,BUCKET_PREFIX) \
191 if (SNIFF_EXPR) { \
192 SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \
193 } else { \
194 if (no_sniff_) { \
195 SITE_ISOLATION_POLICY_COUNT_NO_SNIFF_BLOCK(BUCKET_PREFIX) \
196 } else { \
197 SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \
198 } \
199 }
200
201 void SiteIsolationPolicy::OnReceivedData(const char* data,
202 int length,
203 int encoded_data_length) {
204 DCHECK(state_ == RESPONSE_RECEIVED || state_ == DATA_RECEIVED);
205 if (!policy_enforced_) {
206 original_peer_->OnReceivedData(data, length, encoded_data_length);
207 return;
208 }
209
210 // The first packet has already been examined.
211 if (state_ == DATA_RECEIVED) {
212 if (!cross_site_document_header_ || confirmed_safe_)
213 original_peer_->OnReceivedData(data, length, encoded_data_length);
214 return;
215 }
216
217 state_ = DATA_RECEIVED;
218
219 // TODO(dsjang): we do not block any response data now. If this is set to
220 // false by any sniffing logic below, it will block all the following response
221 // data to this request.
222 confirmed_safe_ = true;
223
224 if (cross_site_document_header_) {
225 // Record the length of the first received network packet to see if it's
226 // enough for sniffing.
227 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", length);
228
229 // Record the number of cross-site document responses with a specific mime
230 // type (text/html, text/xml, etc).
231 UMA_HISTOGRAM_ENUMERATION(
232 "SiteIsolation.XSD.MimeType",
233 canonical_mime_type_,
234 MaxCanonicalMimeType);
235
236 // The content is blocked if it is sniffed for HTML/JSON/XML. When the
237 // blocked response is with an error status code, it is not disruptive by
238 // the following reasons : 1) the blocked content is not a binary object
239 // (such as an image) since it is sniffed for text; 2) then, this blocking
240 // only breaks the renderer behavior only if it is either JavaScript or
241 // CSS. However, the renderer doesn't use the contents of JS/CSS with
242 // unaffected status code (e.g, 404). 3) the renderer is expected not to use
243 // the cross-site document content for purposes other than JS/CSS (e.g,
244 // XHR).
245 bool renderable_status_code =
246 IsRenderableStatusCodeForDocument(http_status_code_);
247
248 // This is only used for false-negative analysis for non-blocked resources.
249 bool is_sniffed_for_js = SniffForJS(data, length);
250
251 // Record the number of responses whose content is sniffed for what its mime
252 // type claims it to be. For example, we apply a HTML sniffer for a document
253 // tagged with text/html here. Whenever this check becomes true, we'll block
254 // the response.
255 switch (canonical_mime_type_) {
256 case SiteIsolationPolicy::HTML:
257 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForHTML(data, length),
258 "SiteIsolation.XSD.HTML");
259 break;
260 case SiteIsolationPolicy::XML:
261 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForXML(data, length),
262 "SiteIsolation.XSD.XML");
263 break;
264 case SiteIsolationPolicy::JSON:
265 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForJSON(data, length),
266 "SiteIsolation.XSD.JSON");
267 break;
268 case SiteIsolationPolicy::Plain:
269 if (SniffForHTML(data, length)) {
270 SITE_ISOLATION_POLICY_COUNT_BLOCK("SiteIsolation.XSD.Plain.HTML");
271 } else if (SniffForXML(data, length)) {
272 SITE_ISOLATION_POLICY_COUNT_BLOCK("SiteIsolation.XSD.Plain.XML");
273 } else if (SniffForJSON(data, length)) {
274 SITE_ISOLATION_POLICY_COUNT_BLOCK("SiteIsolation.XSD.Plain.JSON");
275 } else if (is_sniffed_for_js) {
276 if (no_sniff_) {
277 SITE_ISOLATION_POLICY_COUNT_NO_SNIFF_BLOCK(
278 "SiteIsolation.XSD.Plain");
279 } else {
280 SITE_ISOLATION_POLICY_COUNT_NOTBLOCK("SiteIsolation.XSD.Plain");
281 }
282 }
283 break;
284 default:
285 NOTREACHED() << "Not a blockable mime type. This mime type shouldn't "
286 "reach here.";
287 break;
288 }
289 original_peer_->OnReceivedData(data, length, encoded_data_length);
darin (slow to review) 2013/08/20 23:47:05 nit: having the same code in both branches indicat
dsjang 2013/08/21 18:49:57 I'm switching back to the older version of SiteIso
290 } else
291 original_peer_->OnReceivedData(data, length, encoded_data_length);
292 }
293
294 #undef SITE_ISOLATION_POLICY_COUNT_NOTBLOCK
295 #undef SITE_ISOLATION_POLICY_SNIFF_AND_COUNT
296 #undef SITE_ISOLATION_POLICY_COUNT_BLOCK
297
298 SiteIsolationPolicy::CanonicalMimeType
299 SiteIsolationPolicy::GetCanonicalMimeType(const std::string& mime_type) {
300 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {
301 return SiteIsolationPolicy::HTML;
302 }
303
304 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {
305 return SiteIsolationPolicy::Plain;
306 }
307
308 if (LowerCaseEqualsASCII(mime_type, kAppJson) ||
309 LowerCaseEqualsASCII(mime_type, kTextJson) ||
310 LowerCaseEqualsASCII(mime_type, kTextXjson)) {
311 return SiteIsolationPolicy::JSON;
312 }
313
314 if (LowerCaseEqualsASCII(mime_type, kTextXml) ||
315 LowerCaseEqualsASCII(mime_type, xAppRssXml) ||
316 LowerCaseEqualsASCII(mime_type, kAppXml)) {
317 return SiteIsolationPolicy::XML;
318 }
319
320 return SiteIsolationPolicy::Others;
321
322 }
323
324 bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) {
325 // We exclude ftp:// from here. FTP doesn't provide a Content-Type
326 // header which our policy depends on, so we cannot protect any
327 // document from FTP servers.
328 return url.SchemeIs("http") || url.SchemeIs("https");
329 }
330
331 bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin,
332 const GURL& response_url) {
333 if (!frame_origin.is_valid() || !response_url.is_valid())
334 return false;
335
336 if (frame_origin.scheme() != response_url.scheme())
337 return false;
338
339 // SameDomainOrHost() extracts the effective domains (public suffix plus one)
340 // from the two URLs and compare them.
341 // TODO(dsjang): use INCLUDE_PRIVATE_REGISTRIES when http://crbug.com/7988 is
342 // fixed.
343 return net::registry_controlled_domains::SameDomainOrHost(
344 frame_origin,
345 response_url,
346 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
347 }
348
349 // We don't use Webkit's existing CORS policy implementation since
350 // their policy works in terms of origins, not sites. For example,
351 // when frame is sub.a.com and it is not allowed to access a document
352 // with sub1.a.com. But under Site Isolation, it's allowed.
353 bool SiteIsolationPolicy::IsValidCorsHeaderSet(
354 GURL& frame_origin,
355 GURL& website_origin,
356 std::string access_control_origin) {
357 // Many websites are sending back "\"*\"" instead of "*". This is
358 // non-standard practice, and not supported by Chrome. Refer to
359 // CrossOriginAccessControl::passesAccessControlCheck().
360
361 // TODO(dsjang): * is not allowed for the response from a request
362 // with cookies. This allows for more than what the renderer will
363 // eventually be able to receive, so we won't see illegal cross-site
364 // documents allowed by this. We have to find a way to see if this
365 // response is from a cookie-tagged request or not in the future.
366 if (access_control_origin == "*")
367 return true;
368
369 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for
370 // "*", but many websites are using just a domain for access_control_origin,
371 // and this is blocked by Webkit's CORS logic here :
372 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set
373 // is_valid() to false when it is created from a URL containing * in the
374 // domain part.
375
376 GURL cors_origin(access_control_origin);
377 return IsSameSite(frame_origin, cors_origin);
378 }
379
380 // This function is a slight modification of |net::SniffForHTML|.
381 bool SiteIsolationPolicy::SniffForHTML(const char* data, size_t length) {
382 // The content sniffer used by Chrome and Firefox are using "<!--"
383 // as one of the HTML signatures, but it also appears in valid
384 // JavaScript, considered as well-formed JS by the browser. Since
385 // we do not want to block any JS, we exclude it from our HTML
386 // signatures. This can weaken our document block policy, but we can
387 // break less websites.
388 // TODO(dsjang): parameterize |net::SniffForHTML| with an option
389 // that decides whether to include <!-- or not, so that we can
390 // remove this function.
391 const char* html_signatures[] = {"<!DOCTYPE html", // HTML5 spec
392 "<script", // HTML5 spec, Mozilla
393 "<html", // HTML5 spec, Mozilla
394 "<head", // HTML5 spec, Mozilla
395 "<iframe", // Mozilla
396 "<h1", // Mozilla
397 "<div", // Mozilla
398 "<font", // Mozilla
399 "<table", // Mozilla
400 "<a", // Mozilla
401 "<style", // Mozilla
402 "<title", // Mozilla
403 "<b", // Mozilla
404 "<body", // Mozilla
405 "<br", "<p" // Mozilla
406 };
407
408 if (MatchesSignature(
409 data, length, html_signatures, arraysize(html_signatures)))
410 return true;
411
412 // "<!--" is specially treated since web JS can use "<!--" "-->" pair for
413 // comments.
414 const char* comment_begins[] = {"<!--" };
415
416 if (MatchesSignature(
417 data, length, comment_begins, arraysize(comment_begins))) {
418 // Search for --> and do SniffForHTML after that. If we can find the
419 // comment's end, we start HTML sniffing from there again.
420 const char end_comment[] = "-->";
421 const size_t end_comment_size = strlen(end_comment);
422
423 for (size_t i = 0; i <= length - end_comment_size; ++i) {
424 if (!strncmp(data + i, end_comment, end_comment_size)) {
425 size_t skipped = i + end_comment_size;
426 return SniffForHTML(data + skipped, length - skipped);
427 }
428 }
429 }
430
431 return false;
432 }
433
434 bool SiteIsolationPolicy::SniffForXML(const char* data, size_t length) {
435 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for
436 // this signature. However, XML is case-sensitive. Don't we have to
437 // be more lenient only to block documents starting with the exact
438 // string <?xml rather than <?XML ?
439 const char* xml_signatures[] = {"<?xml" // Mozilla
440 };
441 return MatchesSignature(
442 data, length, xml_signatures, arraysize(xml_signatures));
443 }
444
445 bool SiteIsolationPolicy::SniffForJSON(const char* data, size_t length) {
446 // TODO(dsjang): We have to come up with a better way to sniff
447 // JSON. However, even RE cannot help us that much due to the fact
448 // that we don't do full parsing. This DFA starts with state 0, and
449 // finds {, "/' and : in that order. We're avoiding adding a
450 // dependency on a regular expression library.
451 const int kInitState = 0;
452 const int kLeftBraceState = 1;
453 const int kLeftQuoteState = 2;
454 const int kColonState = 3;
455 const int kDeadState = 4;
456
457 int state = kInitState;
458 for (size_t i = 0; i < length && state < kColonState; ++i) {
459 const char c = data[i];
460 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
461 continue;
462
463 switch (state) {
464 case kInitState:
465 if (c == '{')
466 state = kLeftBraceState;
467 else
468 state = kDeadState;
469 break;
470 case kLeftBraceState:
471 if (c == '\"' || c == '\'')
472 state = kLeftQuoteState;
473 else
474 state = kDeadState;
475 break;
476 case kLeftQuoteState:
477 if (c == ':')
478 state = kColonState;
479 break;
480 default:
481 NOTREACHED();
482 break;
483 }
484 }
485 return state == kColonState;
486 }
487
488 bool SiteIsolationPolicy::MatchesSignature(const char* raw_data,
489 size_t raw_length,
490 const char* signatures[],
491 size_t arr_size) {
492 size_t start = 0;
493 // Skip white characters at the beginning of the document.
494 for (start = 0; start < raw_length; ++start) {
495 char c = raw_data[start];
496 if (!(c == ' ' || c == '\t' || c == '\r' || c == '\n'))
497 break;
498 }
499
500 // There is no not-whitespace character in this document.
501 if (!(start < raw_length))
502 return false;
503
504 const char* data = raw_data + start;
505 size_t length = raw_length - start;
506
507 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {
508 const char* signature = signatures[sig_index];
509 size_t signature_length = strlen(signature);
510
511 if (length < signature_length)
512 continue;
513
514 if (!base::strncasecmp(signature, data, signature_length))
515 return true;
516 }
517 return false;
518 }
519
520 bool SiteIsolationPolicy::IsRenderableStatusCodeForDocument(int status_code) {
521 // Chrome only uses the content of a response with one of these status codes
522 // for CSS/JavaScript. For images, Chrome just ignores status code.
523 const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, 301, 302,
524 303, 305, 306, 307};
525 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {
526 if (renderable_status_code[i] == status_code)
527 return true;
528 }
529 return false;
530 }
531
532 bool SiteIsolationPolicy::SniffForJS(const char* data, size_t length) {
533 // TODO(dsjang): This is a real hack. The only purpose of this function is to
534 // try to see if there's any possibility that this data can be JavaScript
535 // (superset of JS). This function will be removed once UMA stats are
536 // gathered.
537
538 // Search for "var " for JS detection.
539 for (size_t i = 0; i < length - 3; ++i) {
540 if (strncmp(data + i, "var ", 4) == 0)
541 return true;
542 }
543 return false;
544 }
545
546 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698