net/base/mime_sniffer.cc - Issue 6712040: Removed static variable for histograms to avoid race with...

Side by Side Diff: net/base/mime_sniffer.cc

Issue 6712040: Removed static variable for histograms to avoid race with... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « content/browser/renderer_host/buffered_resource_handler.cc ('k') | net/disk_cache/histogram_macros.h » ('j') | no next file with comments »

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Detecting mime types is a tricky business because we need to balance	5 // Detecting mime types is a tricky business because we need to balance

6 // compatibility concerns with security issues. Here is a survey of how other	6 // compatibility concerns with security issues. Here is a survey of how other

7 // browsers behave and then a description of how we intend to behave.	7 // browsers behave and then a description of how we intend to behave.

8 //	8 //

9 // HTML payload, no Content-Type header:	9 // HTML payload, no Content-Type header:

10 // * IE 7: Render as HTML	10 // * IE 7: Render as HTML

(...skipping 290 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
301 *have_enough_content &= TruncateSize(512, &size);	301 *have_enough_content &= TruncateSize(512, &size);

302	302

303 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags,	303 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags,

304 // but with some modifications to better match the HTML5 spec.	304 // but with some modifications to better match the HTML5 spec.

305 const char* const end = content + size;	305 const char* const end = content + size;

306 const char* pos;	306 const char* pos;

307 for (pos = content; pos < end; ++pos) {	307 for (pos = content; pos < end; ++pos) {

308 if (!IsAsciiWhitespace(*pos))	308 if (!IsAsciiWhitespace(*pos))

309 break;	309 break;

310 }	310 }

311 static scoped_refptr<base::Histogram> counter =	311 scoped_refptr<base::Histogram> counter =

312 UMASnifferHistogramGet("mime_sniffer.kSniffableTags2",	312 UMASnifferHistogramGet("mime_sniffer.kSniffableTags2",

313 arraysize(kSniffableTags));	313 arraysize(kSniffableTags));

314 // \|pos\| now points to first non-whitespace character (or at end).	314 // \|pos\| now points to first non-whitespace character (or at end).

315 return CheckForMagicNumbers(pos, end - pos,	315 return CheckForMagicNumbers(pos, end - pos,

316 kSniffableTags, arraysize(kSniffableTags),	316 kSniffableTags, arraysize(kSniffableTags),

317 counter.get(), result);	317 counter.get(), result);

318 }	318 }

319	319

320 // Returns true and sets result if the content matches any of kMagicNumbers.	320 // Returns true and sets result if the content matches any of kMagicNumbers.

321 // Clears have_enough_content if more data could possibly change the result.	321 // Clears have_enough_content if more data could possibly change the result.

322 static bool SniffForMagicNumbers(const char* content,	322 static bool SniffForMagicNumbers(const char* content,

323 size_t size,	323 size_t size,

324 bool* have_enough_content,	324 bool* have_enough_content,

325 std::string* result) {	325 std::string* result) {

326 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);	326 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);

327	327

328 // Check our big table of Magic Numbers	328 // Check our big table of Magic Numbers

329 static scoped_refptr<base::Histogram> counter =	329 scoped_refptr<base::Histogram> counter =

330 UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2",	330 UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2",

331 arraysize(kMagicNumbers));	331 arraysize(kMagicNumbers));

332 return CheckForMagicNumbers(content, size,	332 return CheckForMagicNumbers(content, size,

333 kMagicNumbers, arraysize(kMagicNumbers),	333 kMagicNumbers, arraysize(kMagicNumbers),

334 counter.get(), result);	334 counter.get(), result);

335 }	335 }

336	336

337 // Byte order marks	337 // Byte order marks

338 static const MagicNumber kMagicXML[] = {	338 static const MagicNumber kMagicXML[] = {

339 // We want to be very conservative in interpreting text/xml content as	339 // We want to be very conservative in interpreting text/xml content as

(...skipping 20 matching lines...) Expand all Loading...
360 std::string* result) {	360 std::string* result) {

361 // We allow at most 300 bytes of content before we expect the opening tag.	361 // We allow at most 300 bytes of content before we expect the opening tag.

362 *have_enough_content &= TruncateSize(300, &size);	362 *have_enough_content &= TruncateSize(300, &size);

363 const char* pos = content;	363 const char* pos = content;

364 const char* const end = content + size;	364 const char* const end = content + size;

365	365

366 // This loop iterates through tag-looking offsets in the file.	366 // This loop iterates through tag-looking offsets in the file.

367 // We want to skip XML processing instructions (of the form "<?xml ...")	367 // We want to skip XML processing instructions (of the form "<?xml ...")

368 // and stop at the first "plain" tag, then make a decision on the mime-type	368 // and stop at the first "plain" tag, then make a decision on the mime-type

369 // based on the name (or possibly attributes) of that tag.	369 // based on the name (or possibly attributes) of that tag.

370 static scoped_refptr<base::Histogram> counter =	370 scoped_refptr<base::Histogram> counter =

371 UMASnifferHistogramGet("mime_sniffer.kMagicXML2",	371 UMASnifferHistogramGet("mime_sniffer.kMagicXML2",

372 arraysize(kMagicXML));	372 arraysize(kMagicXML));

373 const int kMaxTagIterations = 5;	373 const int kMaxTagIterations = 5;

374 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {	374 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {

375 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));	375 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));

376 if (!pos)	376 if (!pos)

377 return false;	377 return false;

378	378

379 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) {	379 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) {

380 // Skip XML declarations.	380 // Skip XML declarations.

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
444 // There is no concensus about exactly how to sniff for binary content.	444 // There is no concensus about exactly how to sniff for binary content.

445 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension.	445 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension.

446 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte.	446 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte.

447 // Here, we side with FF, but with a smaller buffer. This size was chosen	447 // Here, we side with FF, but with a smaller buffer. This size was chosen

448 // because it is small enough to comfortably fit into a single packet (after	448 // because it is small enough to comfortably fit into a single packet (after

449 // allowing for headers) and yet large enough to account for binary formats	449 // allowing for headers) and yet large enough to account for binary formats

450 // that have a significant amount of ASCII at the beginning (crbug.com/15314).	450 // that have a significant amount of ASCII at the beginning (crbug.com/15314).

451 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size);	451 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size);

452	452

453 // First, we look for a BOM.	453 // First, we look for a BOM.

454 static scoped_refptr<base::Histogram> counter =	454 scoped_refptr<base::Histogram> counter =

455 UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2",	455 UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2",

456 arraysize(kByteOrderMark));	456 arraysize(kByteOrderMark));

457 std::string unused;	457 std::string unused;

458 if (CheckForMagicNumbers(content, size,	458 if (CheckForMagicNumbers(content, size,

459 kByteOrderMark, arraysize(kByteOrderMark),	459 kByteOrderMark, arraysize(kByteOrderMark),

460 counter.get(), &unused)) {	460 counter.get(), &unused)) {

461 // If there is BOM, we think the buffer is not binary.	461 // If there is BOM, we think the buffer is not binary.

462 result->assign("text/plain");	462 result->assign("text/plain");

463 return false;	463 return false;

464 }	464 }

(...skipping 21 matching lines...) Expand all Loading...
486 static const char* kUnknownMimeTypes[] = {	486 static const char* kUnknownMimeTypes[] = {

487 // Empty mime types are as unknown as they get.	487 // Empty mime types are as unknown as they get.

488 "",	488 "",

489 // The unknown/unknown type is popular and uninformative	489 // The unknown/unknown type is popular and uninformative

490 "unknown/unknown",	490 "unknown/unknown",

491 // The second most popular unknown mime type is application/unknown	491 // The second most popular unknown mime type is application/unknown

492 "application/unknown",	492 "application/unknown",

493 // Firefox rejects a mime type if it is exactly /	493 // Firefox rejects a mime type if it is exactly /

494 "/",	494 "/",

495 };	495 };

496 static scoped_refptr<base::Histogram> counter =	496 scoped_refptr<base::Histogram> counter =

497 UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2",	497 UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2",

498 arraysize(kUnknownMimeTypes) + 1);	498 arraysize(kUnknownMimeTypes) + 1);

499 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {	499 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {

500 if (mime_type == kUnknownMimeTypes[i]) {	500 if (mime_type == kUnknownMimeTypes[i]) {

501 counter->Add(i);	501 counter->Add(i);

502 return true;	502 return true;

503 }	503 }

504 }	504 }

505 if (mime_type.find('/') == std::string::npos) {	505 if (mime_type.find('/') == std::string::npos) {

506 // Firefox rejects a mime type if it does not contain a slash	506 // Firefox rejects a mime type if it does not contain a slash

507 counter->Add(arraysize(kUnknownMimeTypes));	507 counter->Add(arraysize(kUnknownMimeTypes));

508 return true;	508 return true;

509 }	509 }

510 return false;	510 return false;

511 }	511 }

512	512

513 // Returns true and sets result if the content appears to be a crx (chrome	513 // Returns true and sets result if the content appears to be a crx (chrome

514 // extension) file.	514 // extension) file.

515 // Clears have_enough_content if more data could possibly change the result.	515 // Clears have_enough_content if more data could possibly change the result.

516 static bool SniffCRX(const char* content,	516 static bool SniffCRX(const char* content,

517 size_t size,	517 size_t size,

518 const GURL& url,	518 const GURL& url,

519 const std::string& type_hint,	519 const std::string& type_hint,

520 bool* have_enough_content,	520 bool* have_enough_content,

521 std::string* result) {	521 std::string* result) {

522 static scoped_refptr<base::Histogram> counter =	522 scoped_refptr<base::Histogram> counter =

523 UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3);	523 UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3);

524	524

525 // Technically, the crx magic number is just Cr24, but the bytes after that	525 // Technically, the crx magic number is just Cr24, but the bytes after that

526 // are a version number which changes infrequently. Including it in the	526 // are a version number which changes infrequently. Including it in the

527 // sniffing gives us less room for error. If the version number ever changes,	527 // sniffing gives us less room for error. If the version number ever changes,

528 // we can just add an entry to this list.	528 // we can just add an entry to this list.

529 //	529 //

530 // TODO(aa): If we ever have another magic number, we'll want to pass a	530 // TODO(aa): If we ever have another magic number, we'll want to pass a

531 // histogram into CheckForMagicNumbers(), below, to see which one matched.	531 // histogram into CheckForMagicNumbers(), below, to see which one matched.

532 static const struct MagicNumber kCRXMagicNumbers[] = {	532 static const struct MagicNumber kCRXMagicNumbers[] = {

(...skipping 17 matching lines...) Expand all Loading...
550 NULL, result)) {	550 NULL, result)) {

551 counter->Add(2);	551 counter->Add(2);

552 } else {	552 } else {

553 return false;	553 return false;

554 }	554 }

555	555

556 return true;	556 return true;

557 }	557 }

558	558

559 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {	559 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {

560 static scoped_refptr<base::Histogram> should_sniff_counter =	560 scoped_refptr<base::Histogram> should_sniff_counter =

561 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3);	561 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3);

562 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP	562 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP

563 bool sniffable_scheme = url.is_empty() \|\|	563 bool sniffable_scheme = url.is_empty() \|\|

564 url.SchemeIs("http") \|\|	564 url.SchemeIs("http") \|\|

565 url.SchemeIs("https") \|\|	565 url.SchemeIs("https") \|\|

566 url.SchemeIs("ftp") \|\|	566 url.SchemeIs("ftp") \|\|

567 url.SchemeIsFile();	567 url.SchemeIsFile();

568 if (!sniffable_scheme) {	568 if (!sniffable_scheme) {

569 should_sniff_counter->Add(1);	569 should_sniff_counter->Add(1);

570 return false;	570 return false;

571 }	571 }

572	572

573 static const char* kSniffableTypes[] = {	573 static const char* kSniffableTypes[] = {

574 // Many web servers are misconfigured to send text/plain for many	574 // Many web servers are misconfigured to send text/plain for many

575 // different types of content.	575 // different types of content.

576 "text/plain",	576 "text/plain",

577 // We want to sniff application/octet-stream for	577 // We want to sniff application/octet-stream for

578 // application/x-chrome-extension, but nothing else.	578 // application/x-chrome-extension, but nothing else.

579 "application/octet-stream",	579 "application/octet-stream",

580 // XHTML and Atom/RSS feeds are often served as plain xml instead of	580 // XHTML and Atom/RSS feeds are often served as plain xml instead of

581 // their more specific mime types.	581 // their more specific mime types.

582 "text/xml",	582 "text/xml",

583 "application/xml",	583 "application/xml",

584 };	584 };

585 static scoped_refptr<base::Histogram> counter =	585 scoped_refptr<base::Histogram> counter =

586 UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2",	586 UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2",

587 arraysize(kSniffableTypes) + 1);	587 arraysize(kSniffableTypes) + 1);

588 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {	588 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {

589 if (mime_type == kSniffableTypes[i]) {	589 if (mime_type == kSniffableTypes[i]) {

590 counter->Add(i);	590 counter->Add(i);

591 should_sniff_counter->Add(2);	591 should_sniff_counter->Add(2);

592 return true;	592 return true;

593 }	593 }

594 }	594 }

595 if (IsUnknownMimeType(mime_type)) {	595 if (IsUnknownMimeType(mime_type)) {

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
671 // Now we look in our large table of magic numbers to see if we can find	671 // Now we look in our large table of magic numbers to see if we can find

672 // anything that matches the content.	672 // anything that matches the content.

673 if (SniffForMagicNumbers(content, content_size,	673 if (SniffForMagicNumbers(content, content_size,

674 &have_enough_content, result))	674 &have_enough_content, result))

675 return true; // We've matched a magic number. No more content needed.	675 return true; // We've matched a magic number. No more content needed.

676	676

677 return have_enough_content;	677 return have_enough_content;

678 }	678 }

679	679

680 } // namespace net	680 } // namespace net

OLD	NEW