content/child/site_isolation_stats_gatherer.cc - Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files

Side by Side Diff: content/child/site_isolation_stats_gatherer.cc

Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@rename_policy_to_sniffer3

Patch Set: Pull Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « content/child/site_isolation_stats_gatherer.h ('k') | content/child/site_isolation_stats_gatherer_browsertest.cc » ('j') | content/child/site_isolation_stats_gatherer_browsertest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/child/site_isolation_policy.h"	5 #include "content/child/site_isolation_stats_gatherer.h"

6	6

7 #include "base/basictypes.h"

8 #include "base/command_line.h"

9 #include "base/lazy_instance.h"

10 #include "base/logging.h"

11 #include "base/metrics/histogram.h"	7 #include "base/metrics/histogram.h"

	8 #include "base/strings/string_piece.h"

12 #include "base/strings/string_util.h"	9 #include "base/strings/string_util.h"

13 #include "content/public/common/content_switches.h"

14 #include "content/public/common/resource_response_info.h"	10 #include "content/public/common/resource_response_info.h"

15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"

16 #include "net/http/http_response_headers.h"	11 #include "net/http/http_response_headers.h"

17	12

18 using base::StringPiece;

19

20 namespace content {	13 namespace content {

21	14

22 namespace {	15 namespace {

23	16

24 // The gathering of UMA stats for site isolation is deactivated by default, and	17 // The gathering of UMA stats for site isolation is deactivated by default, and

25 // only activated in renderer processes.	18 // only activated in renderer processes.

26 static bool g_stats_gathering_enabled = false;	19 static bool g_stats_gathering_enabled = false;

27	20

28 // MIME types

29 const char kTextHtml[] = "text/html";

30 const char kTextXml[] = "text/xml";

31 const char xAppRssXml[] = "application/rss+xml";

32 const char kAppXml[] = "application/xml";

33 const char kAppJson[] = "application/json";

34 const char kTextJson[] = "text/json";

35 const char kTextXjson[] = "text/x-json";

36 const char kTextPlain[] = "text/plain";

37

38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted

39 // when this class is used for actual blocking.

40 bool IsRenderableStatusCode(int status_code) {	21 bool IsRenderableStatusCode(int status_code) {

41 // Chrome only uses the content of a response with one of these status codes	22 // Chrome only uses the content of a response with one of these status codes

42 // for CSS/JavaScript. For images, Chrome just ignores status code.	23 // for CSS/JavaScript. For images, Chrome just ignores status code.

43 const int renderable_status_code[] = {	24 const int renderable_status_code[] = {

44 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};	25 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};

45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {	26 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {

46 if (renderable_status_code[i] == status_code)	27 if (renderable_status_code[i] == status_code)

47 return true;	28 return true;

48 }	29 }

49 return false;	30 return false;

50 }	31 }

51	32

52 bool MatchesSignature(StringPiece data,

53 const StringPiece signatures[],

54 size_t arr_size) {

55 size_t offset = data.find_first_not_of(" \t\r\n");

56 // There is no not-whitespace character in this document.

57 if (offset == base::StringPiece::npos)

58 return false;

59

60 data.remove_prefix(offset);

61 size_t length = data.length();

62

63 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {

64 const StringPiece& signature = signatures[sig_index];

65 size_t signature_length = signature.length();

66 if (length < signature_length)

67 continue;

68

69 if (LowerCaseEqualsASCII(data.begin(), data.begin() + signature_length,

70 signature.data()))

71 return true;

72 }

73 return false;

74 }

75

76 void IncrementHistogramCount(const std::string& name) {	33 void IncrementHistogramCount(const std::string& name) {

77 // The default value of min, max, bucket_count are copied from histogram.h.	34 // The default value of min, max, bucket_count are copied from histogram.h.

78 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(	35 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(

79 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);	36 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);

80 histogram_pointer->Add(1);	37 histogram_pointer->Add(1);

81 }	38 }

82	39

83 void IncrementHistogramEnum(const std::string& name,	40 void IncrementHistogramEnum(const std::string& name,

84 uint32 sample,	41 uint32 sample,

85 uint32 boundary_value) {	42 uint32 boundary_value) {

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
203	160

204 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(	161 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(

205 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,	162 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,

206 const char* raw_data,	163 const char* raw_data,

207 int raw_length) {	164 int raw_length) {

208 if (!g_stats_gathering_enabled)	165 if (!g_stats_gathering_enabled)

209 return false;	166 return false;

210	167

211 DCHECK(resp_data.get());	168 DCHECK(resp_data.get());

212	169

213 StringPiece data(raw_data, raw_length);	170 base::StringPiece data(raw_data, raw_length);

214	171

215 // Record the length of the first received chunk of data to see if it's enough	172 // Record the length of the first received chunk of data to see if it's enough

216 // for sniffing.	173 // for sniffing.

217 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);	174 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);

218	175

219 // Record the number of cross-site document responses with a specific mime	176 // Record the number of cross-site document responses with a specific mime

220 // type (text/html, text/xml, etc).	177 // type (text/html, text/xml, etc).

221 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",	178 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",

222 resp_data->canonical_mime_type,	179 resp_data->canonical_mime_type,

223 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX);	180 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX);

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
283 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);	240 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);

284 } else {	241 } else {

285 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",	242 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",

286 sniffed_as_js);	243 sniffed_as_js);

287 }	244 }

288 }	245 }

289	246

290 return would_block;	247 return would_block;

291 }	248 }

292	249

293 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType(	250 bool SiteIsolationStatsGatherer::SniffForJS(base::StringPiece data) {

294 const std::string& mime_type) {

295 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {

296 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML;

297 }

298

299 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {

300 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN;

301 }

302

303 if (LowerCaseEqualsASCII(mime_type, kAppJson) \|\|

304 LowerCaseEqualsASCII(mime_type, kTextJson) \|\|

305 LowerCaseEqualsASCII(mime_type, kTextXjson)) {

306 return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON;

307 }

308

309 if (LowerCaseEqualsASCII(mime_type, kTextXml) \|\|

310 LowerCaseEqualsASCII(mime_type, xAppRssXml) \|\|

311 LowerCaseEqualsASCII(mime_type, kAppXml)) {

312 return CROSS_SITE_DOCUMENT_MIME_TYPE_XML;

313 }

314

315 return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS;

316 }

317

318 bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) {

319 // We exclude ftp:// from here. FTP doesn't provide a Content-Type

320 // header which our policy depends on, so we cannot protect any

321 // document from FTP servers.

322 return url.SchemeIs(url::kHttpScheme) \|\| url.SchemeIs(url::kHttpsScheme);

323 }

324

325 bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin,

326 const GURL& response_url) {

327 if (!frame_origin.is_valid() \|\| !response_url.is_valid())

328 return false;

329

330 if (frame_origin.scheme() != response_url.scheme())

331 return false;

332

333 // SameDomainOrHost() extracts the effective domains (public suffix plus one)

334 // from the two URLs and compare them.

335 return net::registry_controlled_domains::SameDomainOrHost(

336 frame_origin, response_url,

337 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);

338 }

339

340 // We don't use Webkit's existing CORS policy implementation since

341 // their policy works in terms of origins, not sites. For example,

342 // when frame is sub.a.com and it is not allowed to access a document

343 // with sub1.a.com. But under Site Isolation, it's allowed.

344 bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet(

345 const GURL& frame_origin,

346 const GURL& website_origin,

347 const std::string& access_control_origin) {

348 // Many websites are sending back "\"\"" instead of "". This is

349 // non-standard practice, and not supported by Chrome. Refer to

350 // CrossOriginAccessControl::passesAccessControlCheck().

351

352 // TODO(dsjang): * is not allowed for the response from a request

353 // with cookies. This allows for more than what the renderer will

354 // eventually be able to receive, so we won't see illegal cross-site

355 // documents allowed by this. We have to find a way to see if this

356 // response is from a cookie-tagged request or not in the future.

357 if (access_control_origin == "*")

358 return true;

359

360 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for

361 // "*", but many websites are using just a domain for access_control_origin,

362 // and this is blocked by Webkit's CORS logic here :

363 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set

364 // is_valid() to false when it is created from a URL containing * in the

365 // domain part.

366

367 GURL cors_origin(access_control_origin);

368 return IsSameSite(frame_origin, cors_origin);

369 }

370

371 // This function is a slight modification of \|net::SniffForHTML\|.

372 bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) {

373 // The content sniffer used by Chrome and Firefox are using "<!--"

374 // as one of the HTML signatures, but it also appears in valid

375 // JavaScript, considered as well-formed JS by the browser. Since

376 // we do not want to block any JS, we exclude it from our HTML

377 // signatures. This can weaken our document block policy, but we can

378 // break less websites.

379 // TODO(dsjang): parameterize \|net::SniffForHTML\| with an option

380 // that decides whether to include <!-- or not, so that we can

381 // remove this function.

382 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser

383 // process, we should do single-thread checking here for the static

384 // initializer.

385 static const StringPiece kHtmlSignatures[] = {

386 StringPiece("<!DOCTYPE html"), // HTML5 spec

387 StringPiece("<script"), // HTML5 spec, Mozilla

388 StringPiece("<html"), // HTML5 spec, Mozilla

389 StringPiece("<head"), // HTML5 spec, Mozilla

390 StringPiece("<iframe"), // Mozilla

391 StringPiece("<h1"), // Mozilla

392 StringPiece("<div"), // Mozilla

393 StringPiece("<font"), // Mozilla

394 StringPiece("<table"), // Mozilla

395 StringPiece("<a"), // Mozilla

396 StringPiece("<style"), // Mozilla

397 StringPiece("<title"), // Mozilla

398 StringPiece("<b"), // Mozilla

399 StringPiece("<body"), // Mozilla

400 StringPiece("<br"), // Mozilla

401 StringPiece("<p"), // Mozilla

402 StringPiece("<?xml") // Mozilla

403 };

404

405 while (data.length() > 0) {

406 if (MatchesSignature(data, kHtmlSignatures, arraysize(kHtmlSignatures)))

407 return true;

408

409 // If we cannot find "<!--", we fail sniffing this as HTML.

410 static const StringPiece kCommentBegins[] = {StringPiece("<!--")};

411 if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins)))

412 break;

413

414 // Search for --> and do SniffForHTML after that. If we can find the

415 // comment's end, we start HTML sniffing from there again.

416 static const char kEndComment[] = "-->";

417 size_t offset = data.find(kEndComment);

418 if (offset == base::StringPiece::npos)

419 break;

420

421 // Proceed to the index next to the ending comment (-->).

422 data.remove_prefix(offset + strlen(kEndComment));

423 }

424

425 return false;

426 }

427

428 bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) {

429 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for

430 // this signature. However, XML is case-sensitive. Don't we have to

431 // be more lenient only to block documents starting with the exact

432 // string <?xml rather than <?XML ?

433 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser

434 // process, we should do single-thread checking here for the static

435 // initializer.

436 static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")};

437 return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures));

438 }

439

440 bool CrossSiteDocumentClassifier::SniffForJSON(base::StringPiece data) {

441 // TODO(dsjang): We have to come up with a better way to sniff

442 // JSON. However, even RE cannot help us that much due to the fact

443 // that we don't do full parsing. This DFA starts with state 0, and

444 // finds {, "/' and : in that order. We're avoiding adding a

445 // dependency on a regular expression library.

446 enum {

447 kStartState,

448 kLeftBraceState,

449 kLeftQuoteState,

450 kColonState,

451 kTerminalState,

452 } state = kStartState;

453

454 size_t length = data.length();

455 for (size_t i = 0; i < length && state < kColonState; ++i) {

456 const char c = data[i];

457 if (c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n')

458 continue;

459

460 switch (state) {

461 case kStartState:

462 if (c == '{')

463 state = kLeftBraceState;

464 else

465 state = kTerminalState;

466 break;

467 case kLeftBraceState:

468 if (c == '\"' \|\| c == '\'')

469 state = kLeftQuoteState;

470 else

471 state = kTerminalState;

472 break;

473 case kLeftQuoteState:

474 if (c == ':')

475 state = kColonState;

476 break;

477 case kColonState:

478 case kTerminalState:

479 NOTREACHED();

480 break;

481 }

482 }

483 return state == kColonState;

484 }

485

486 bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) {

487 // The purpose of this function is to try to see if there's any possibility	251 // The purpose of this function is to try to see if there's any possibility

488 // that this data can be JavaScript (superset of JS). Search for "var " for JS	252 // that this data can be JavaScript (superset of JS). Search for "var " for JS

489 // detection. This is a real hack and should only be used for stats gathering.	253 // detection. This is a real hack and should only be used for stats gathering.

490 return data.find("var ") != base::StringPiece::npos;	254 return data.find("var ") != base::StringPiece::npos;

491 }	255 }

492	256

493 } // namespace content	257 } // namespace content

OLD	NEW