Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(234)

Side by Side Diff: content/child/site_isolation_stats_gatherer.cc

Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@rename_policy_to_sniffer3
Patch Set: Pull Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/child/site_isolation_policy.h" 5 #include "content/child/site_isolation_stats_gatherer.h"
6 6
7 #include "base/basictypes.h"
8 #include "base/command_line.h"
9 #include "base/lazy_instance.h"
10 #include "base/logging.h"
11 #include "base/metrics/histogram.h" 7 #include "base/metrics/histogram.h"
8 #include "base/strings/string_piece.h"
12 #include "base/strings/string_util.h" 9 #include "base/strings/string_util.h"
13 #include "content/public/common/content_switches.h"
14 #include "content/public/common/resource_response_info.h" 10 #include "content/public/common/resource_response_info.h"
15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
16 #include "net/http/http_response_headers.h" 11 #include "net/http/http_response_headers.h"
17 12
18 using base::StringPiece;
19
20 namespace content { 13 namespace content {
21 14
22 namespace { 15 namespace {
23 16
24 // The gathering of UMA stats for site isolation is deactivated by default, and 17 // The gathering of UMA stats for site isolation is deactivated by default, and
25 // only activated in renderer processes. 18 // only activated in renderer processes.
26 static bool g_stats_gathering_enabled = false; 19 static bool g_stats_gathering_enabled = false;
27 20
28 // MIME types
29 const char kTextHtml[] = "text/html";
30 const char kTextXml[] = "text/xml";
31 const char xAppRssXml[] = "application/rss+xml";
32 const char kAppXml[] = "application/xml";
33 const char kAppJson[] = "application/json";
34 const char kTextJson[] = "text/json";
35 const char kTextXjson[] = "text/x-json";
36 const char kTextPlain[] = "text/plain";
37
38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted
39 // when this class is used for actual blocking.
40 bool IsRenderableStatusCode(int status_code) { 21 bool IsRenderableStatusCode(int status_code) {
41 // Chrome only uses the content of a response with one of these status codes 22 // Chrome only uses the content of a response with one of these status codes
42 // for CSS/JavaScript. For images, Chrome just ignores status code. 23 // for CSS/JavaScript. For images, Chrome just ignores status code.
43 const int renderable_status_code[] = { 24 const int renderable_status_code[] = {
44 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307}; 25 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};
45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { 26 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {
46 if (renderable_status_code[i] == status_code) 27 if (renderable_status_code[i] == status_code)
47 return true; 28 return true;
48 } 29 }
49 return false; 30 return false;
50 } 31 }
51 32
52 bool MatchesSignature(StringPiece data,
53 const StringPiece signatures[],
54 size_t arr_size) {
55 size_t offset = data.find_first_not_of(" \t\r\n");
56 // There is no not-whitespace character in this document.
57 if (offset == base::StringPiece::npos)
58 return false;
59
60 data.remove_prefix(offset);
61 size_t length = data.length();
62
63 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {
64 const StringPiece& signature = signatures[sig_index];
65 size_t signature_length = signature.length();
66 if (length < signature_length)
67 continue;
68
69 if (LowerCaseEqualsASCII(data.begin(), data.begin() + signature_length,
70 signature.data()))
71 return true;
72 }
73 return false;
74 }
75
76 void IncrementHistogramCount(const std::string& name) { 33 void IncrementHistogramCount(const std::string& name) {
77 // The default value of min, max, bucket_count are copied from histogram.h. 34 // The default value of min, max, bucket_count are copied from histogram.h.
78 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet( 35 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(
79 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag); 36 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);
80 histogram_pointer->Add(1); 37 histogram_pointer->Add(1);
81 } 38 }
82 39
83 void IncrementHistogramEnum(const std::string& name, 40 void IncrementHistogramEnum(const std::string& name,
84 uint32 sample, 41 uint32 sample,
85 uint32 boundary_value) { 42 uint32 boundary_value) {
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
203 160
204 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk( 161 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(
205 const linked_ptr<SiteIsolationResponseMetaData>& resp_data, 162 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,
206 const char* raw_data, 163 const char* raw_data,
207 int raw_length) { 164 int raw_length) {
208 if (!g_stats_gathering_enabled) 165 if (!g_stats_gathering_enabled)
209 return false; 166 return false;
210 167
211 DCHECK(resp_data.get()); 168 DCHECK(resp_data.get());
212 169
213 StringPiece data(raw_data, raw_length); 170 base::StringPiece data(raw_data, raw_length);
214 171
215 // Record the length of the first received chunk of data to see if it's enough 172 // Record the length of the first received chunk of data to see if it's enough
216 // for sniffing. 173 // for sniffing.
217 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length); 174 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);
218 175
219 // Record the number of cross-site document responses with a specific mime 176 // Record the number of cross-site document responses with a specific mime
220 // type (text/html, text/xml, etc). 177 // type (text/html, text/xml, etc).
221 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType", 178 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",
222 resp_data->canonical_mime_type, 179 resp_data->canonical_mime_type,
223 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX); 180 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX);
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
283 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true); 240 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);
284 } else { 241 } else {
285 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain", 242 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",
286 sniffed_as_js); 243 sniffed_as_js);
287 } 244 }
288 } 245 }
289 246
290 return would_block; 247 return would_block;
291 } 248 }
292 249
293 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType( 250 bool SiteIsolationStatsGatherer::SniffForJS(base::StringPiece data) {
294 const std::string& mime_type) {
295 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {
296 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML;
297 }
298
299 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {
300 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN;
301 }
302
303 if (LowerCaseEqualsASCII(mime_type, kAppJson) ||
304 LowerCaseEqualsASCII(mime_type, kTextJson) ||
305 LowerCaseEqualsASCII(mime_type, kTextXjson)) {
306 return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON;
307 }
308
309 if (LowerCaseEqualsASCII(mime_type, kTextXml) ||
310 LowerCaseEqualsASCII(mime_type, xAppRssXml) ||
311 LowerCaseEqualsASCII(mime_type, kAppXml)) {
312 return CROSS_SITE_DOCUMENT_MIME_TYPE_XML;
313 }
314
315 return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS;
316 }
317
318 bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) {
319 // We exclude ftp:// from here. FTP doesn't provide a Content-Type
320 // header which our policy depends on, so we cannot protect any
321 // document from FTP servers.
322 return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme);
323 }
324
325 bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin,
326 const GURL& response_url) {
327 if (!frame_origin.is_valid() || !response_url.is_valid())
328 return false;
329
330 if (frame_origin.scheme() != response_url.scheme())
331 return false;
332
333 // SameDomainOrHost() extracts the effective domains (public suffix plus one)
334 // from the two URLs and compare them.
335 return net::registry_controlled_domains::SameDomainOrHost(
336 frame_origin, response_url,
337 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
338 }
339
340 // We don't use Webkit's existing CORS policy implementation since
341 // their policy works in terms of origins, not sites. For example,
342 // when frame is sub.a.com and it is not allowed to access a document
343 // with sub1.a.com. But under Site Isolation, it's allowed.
344 bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet(
345 const GURL& frame_origin,
346 const GURL& website_origin,
347 const std::string& access_control_origin) {
348 // Many websites are sending back "\"*\"" instead of "*". This is
349 // non-standard practice, and not supported by Chrome. Refer to
350 // CrossOriginAccessControl::passesAccessControlCheck().
351
352 // TODO(dsjang): * is not allowed for the response from a request
353 // with cookies. This allows for more than what the renderer will
354 // eventually be able to receive, so we won't see illegal cross-site
355 // documents allowed by this. We have to find a way to see if this
356 // response is from a cookie-tagged request or not in the future.
357 if (access_control_origin == "*")
358 return true;
359
360 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for
361 // "*", but many websites are using just a domain for access_control_origin,
362 // and this is blocked by Webkit's CORS logic here :
363 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set
364 // is_valid() to false when it is created from a URL containing * in the
365 // domain part.
366
367 GURL cors_origin(access_control_origin);
368 return IsSameSite(frame_origin, cors_origin);
369 }
370
371 // This function is a slight modification of |net::SniffForHTML|.
372 bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) {
373 // The content sniffer used by Chrome and Firefox are using "<!--"
374 // as one of the HTML signatures, but it also appears in valid
375 // JavaScript, considered as well-formed JS by the browser. Since
376 // we do not want to block any JS, we exclude it from our HTML
377 // signatures. This can weaken our document block policy, but we can
378 // break less websites.
379 // TODO(dsjang): parameterize |net::SniffForHTML| with an option
380 // that decides whether to include <!-- or not, so that we can
381 // remove this function.
382 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser
383 // process, we should do single-thread checking here for the static
384 // initializer.
385 static const StringPiece kHtmlSignatures[] = {
386 StringPiece("<!DOCTYPE html"), // HTML5 spec
387 StringPiece("<script"), // HTML5 spec, Mozilla
388 StringPiece("<html"), // HTML5 spec, Mozilla
389 StringPiece("<head"), // HTML5 spec, Mozilla
390 StringPiece("<iframe"), // Mozilla
391 StringPiece("<h1"), // Mozilla
392 StringPiece("<div"), // Mozilla
393 StringPiece("<font"), // Mozilla
394 StringPiece("<table"), // Mozilla
395 StringPiece("<a"), // Mozilla
396 StringPiece("<style"), // Mozilla
397 StringPiece("<title"), // Mozilla
398 StringPiece("<b"), // Mozilla
399 StringPiece("<body"), // Mozilla
400 StringPiece("<br"), // Mozilla
401 StringPiece("<p"), // Mozilla
402 StringPiece("<?xml") // Mozilla
403 };
404
405 while (data.length() > 0) {
406 if (MatchesSignature(data, kHtmlSignatures, arraysize(kHtmlSignatures)))
407 return true;
408
409 // If we cannot find "<!--", we fail sniffing this as HTML.
410 static const StringPiece kCommentBegins[] = {StringPiece("<!--")};
411 if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins)))
412 break;
413
414 // Search for --> and do SniffForHTML after that. If we can find the
415 // comment's end, we start HTML sniffing from there again.
416 static const char kEndComment[] = "-->";
417 size_t offset = data.find(kEndComment);
418 if (offset == base::StringPiece::npos)
419 break;
420
421 // Proceed to the index next to the ending comment (-->).
422 data.remove_prefix(offset + strlen(kEndComment));
423 }
424
425 return false;
426 }
427
428 bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) {
429 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for
430 // this signature. However, XML is case-sensitive. Don't we have to
431 // be more lenient only to block documents starting with the exact
432 // string <?xml rather than <?XML ?
433 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser
434 // process, we should do single-thread checking here for the static
435 // initializer.
436 static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")};
437 return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures));
438 }
439
440 bool CrossSiteDocumentClassifier::SniffForJSON(base::StringPiece data) {
441 // TODO(dsjang): We have to come up with a better way to sniff
442 // JSON. However, even RE cannot help us that much due to the fact
443 // that we don't do full parsing. This DFA starts with state 0, and
444 // finds {, "/' and : in that order. We're avoiding adding a
445 // dependency on a regular expression library.
446 enum {
447 kStartState,
448 kLeftBraceState,
449 kLeftQuoteState,
450 kColonState,
451 kTerminalState,
452 } state = kStartState;
453
454 size_t length = data.length();
455 for (size_t i = 0; i < length && state < kColonState; ++i) {
456 const char c = data[i];
457 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
458 continue;
459
460 switch (state) {
461 case kStartState:
462 if (c == '{')
463 state = kLeftBraceState;
464 else
465 state = kTerminalState;
466 break;
467 case kLeftBraceState:
468 if (c == '\"' || c == '\'')
469 state = kLeftQuoteState;
470 else
471 state = kTerminalState;
472 break;
473 case kLeftQuoteState:
474 if (c == ':')
475 state = kColonState;
476 break;
477 case kColonState:
478 case kTerminalState:
479 NOTREACHED();
480 break;
481 }
482 }
483 return state == kColonState;
484 }
485
486 bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) {
487 // The purpose of this function is to try to see if there's any possibility 251 // The purpose of this function is to try to see if there's any possibility
488 // that this data can be JavaScript (superset of JS). Search for "var " for JS 252 // that this data can be JavaScript (superset of JS). Search for "var " for JS
489 // detection. This is a real hack and should only be used for stats gathering. 253 // detection. This is a real hack and should only be used for stats gathering.
490 return data.find("var ") != base::StringPiece::npos; 254 return data.find("var ") != base::StringPiece::npos;
491 } 255 }
492 256
493 } // namespace content 257 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698