net/base/mime_sniffer.cc - Issue 6780035: Use lock-free lazy initialization for static histogram references

Side by Side Diff: net/base/mime_sniffer.cc

Issue 6780035: Use lock-free lazy initialization for static histogram references (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Detecting mime types is a tricky business because we need to balance	5 // Detecting mime types is a tricky business because we need to balance

6 // compatibility concerns with security issues. Here is a survey of how other	6 // compatibility concerns with security issues. Here is a survey of how other

7 // browsers behave and then a description of how we intend to behave.	7 // browsers behave and then a description of how we intend to behave.

8 //	8 //

9 // HTML payload, no Content-Type header:	9 // HTML payload, no Content-Type header:

10 // * IE 7: Render as HTML	10 // * IE 7: Render as HTML

(...skipping 191 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
202 MAGIC_HTML_TAG("table") // Mozilla	202 MAGIC_HTML_TAG("table") // Mozilla

203 MAGIC_HTML_TAG("a") // Mozilla	203 MAGIC_HTML_TAG("a") // Mozilla

204 MAGIC_HTML_TAG("style") // Mozilla	204 MAGIC_HTML_TAG("style") // Mozilla

205 MAGIC_HTML_TAG("title") // Mozilla	205 MAGIC_HTML_TAG("title") // Mozilla

206 MAGIC_HTML_TAG("b") // Mozilla	206 MAGIC_HTML_TAG("b") // Mozilla

207 MAGIC_HTML_TAG("body") // Mozilla	207 MAGIC_HTML_TAG("body") // Mozilla

208 MAGIC_HTML_TAG("br")	208 MAGIC_HTML_TAG("br")

209 MAGIC_HTML_TAG("p") // Mozilla	209 MAGIC_HTML_TAG("p") // Mozilla

210 };	210 };

211	211

212 static scoped_refptr<base::Histogram> UMASnifferHistogramGet(const char* name,	212 static base::Histogram* UMASnifferHistogramGet(const char* name,

213 int array_size) {	213 int array_size) {

214 scoped_refptr<base::Histogram> counter =	214 base::Histogram* counter =

215 base::LinearHistogram::FactoryGet(name, 1, array_size - 1, array_size,	215 base::LinearHistogram::FactoryGet(name, 1, array_size - 1, array_size,

216 base::Histogram::kUmaTargetedHistogramFlag);	216 base::Histogram::kUmaTargetedHistogramFlag);

217 return counter;	217 return counter;

218 }	218 }

219	219

220 // Compare content header to a magic number where magic_entry can contain '.'	220 // Compare content header to a magic number where magic_entry can contain '.'

221 // for single character of anything, allowing some bytes to be skipped.	221 // for single character of anything, allowing some bytes to be skipped.

222 static bool MagicCmp(const char* magic_entry, const char* content, size_t len) {	222 static bool MagicCmp(const char* magic_entry, const char* content, size_t len) {

223 while (len) {	223 while (len) {

224 if ((magic_entry != '.') && (magic_entry != *content))	224 if ((magic_entry != '.') && (magic_entry != *content))

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
301 *have_enough_content &= TruncateSize(512, &size);	301 *have_enough_content &= TruncateSize(512, &size);

302	302

303 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags,	303 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags,

304 // but with some modifications to better match the HTML5 spec.	304 // but with some modifications to better match the HTML5 spec.

305 const char* const end = content + size;	305 const char* const end = content + size;

306 const char* pos;	306 const char* pos;

307 for (pos = content; pos < end; ++pos) {	307 for (pos = content; pos < end; ++pos) {

308 if (!IsAsciiWhitespace(*pos))	308 if (!IsAsciiWhitespace(*pos))

309 break;	309 break;

310 }	310 }

311 scoped_refptr<base::Histogram> counter =	311 static base::Histogram* counter(NULL);

312 UMASnifferHistogramGet("mime_sniffer.kSniffableTags2",	312 if (!counter)

313 arraysize(kSniffableTags));	313 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2",

	314 arraysize(kSniffableTags));

314 // \|pos\| now points to first non-whitespace character (or at end).	315 // \|pos\| now points to first non-whitespace character (or at end).

315 return CheckForMagicNumbers(pos, end - pos,	316 return CheckForMagicNumbers(pos, end - pos,

316 kSniffableTags, arraysize(kSniffableTags),	317 kSniffableTags, arraysize(kSniffableTags),

317 counter.get(), result);	318 counter, result);

318 }	319 }

319	320

320 // Returns true and sets result if the content matches any of kMagicNumbers.	321 // Returns true and sets result if the content matches any of kMagicNumbers.

321 // Clears have_enough_content if more data could possibly change the result.	322 // Clears have_enough_content if more data could possibly change the result.

322 static bool SniffForMagicNumbers(const char* content,	323 static bool SniffForMagicNumbers(const char* content,

323 size_t size,	324 size_t size,

324 bool* have_enough_content,	325 bool* have_enough_content,

325 std::string* result) {	326 std::string* result) {

326 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);	327 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);

327	328

328 // Check our big table of Magic Numbers	329 // Check our big table of Magic Numbers

329 scoped_refptr<base::Histogram> counter =	330 static base::Histogram* counter(NULL);

330 UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2",	331 if (!counter)

331 arraysize(kMagicNumbers));	332 counter = UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2",

	333 arraysize(kMagicNumbers));

332 return CheckForMagicNumbers(content, size,	334 return CheckForMagicNumbers(content, size,

333 kMagicNumbers, arraysize(kMagicNumbers),	335 kMagicNumbers, arraysize(kMagicNumbers),

334 counter.get(), result);	336 counter, result);

335 }	337 }

336	338

337 // Byte order marks	339 // Byte order marks

338 static const MagicNumber kMagicXML[] = {	340 static const MagicNumber kMagicXML[] = {

339 // We want to be very conservative in interpreting text/xml content as	341 // We want to be very conservative in interpreting text/xml content as

340 // XHTML -- we just want to sniff enough to make unit tests pass.	342 // XHTML -- we just want to sniff enough to make unit tests pass.

341 // So we match explicitly on this, and don't match other ways of writing	343 // So we match explicitly on this, and don't match other ways of writing

342 // it in semantically-equivalent ways.	344 // it in semantically-equivalent ways.

343 MAGIC_STRING("application/xhtml+xml",	345 MAGIC_STRING("application/xhtml+xml",

344 "<html xmlns=\"http://www.w3.org/1999/xhtml\"")	346 "<html xmlns=\"http://www.w3.org/1999/xhtml\"")

(...skipping 15 matching lines...) Expand all Loading...
360 std::string* result) {	362 std::string* result) {

361 // We allow at most 300 bytes of content before we expect the opening tag.	363 // We allow at most 300 bytes of content before we expect the opening tag.

362 *have_enough_content &= TruncateSize(300, &size);	364 *have_enough_content &= TruncateSize(300, &size);

363 const char* pos = content;	365 const char* pos = content;

364 const char* const end = content + size;	366 const char* const end = content + size;

365	367

366 // This loop iterates through tag-looking offsets in the file.	368 // This loop iterates through tag-looking offsets in the file.

367 // We want to skip XML processing instructions (of the form "<?xml ...")	369 // We want to skip XML processing instructions (of the form "<?xml ...")

368 // and stop at the first "plain" tag, then make a decision on the mime-type	370 // and stop at the first "plain" tag, then make a decision on the mime-type

369 // based on the name (or possibly attributes) of that tag.	371 // based on the name (or possibly attributes) of that tag.

370 scoped_refptr<base::Histogram> counter =	372 static base::Histogram* counter(NULL);

371 UMASnifferHistogramGet("mime_sniffer.kMagicXML2",	373 if (!counter)

372 arraysize(kMagicXML));	374 counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2",

	375 arraysize(kMagicXML));

373 const int kMaxTagIterations = 5;	376 const int kMaxTagIterations = 5;

374 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {	377 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {

375 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));	378 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));

376 if (!pos)	379 if (!pos)

377 return false;	380 return false;

378	381

379 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) {	382 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) {

380 // Skip XML declarations.	383 // Skip XML declarations.

381 ++pos;	384 ++pos;

382 continue;	385 continue;

383 } else if (base::strncasecmp(pos, "<!DOCTYPE",	386 } else if (base::strncasecmp(pos, "<!DOCTYPE",

384 sizeof("<!DOCTYPE")-1) == 0) {	387 sizeof("<!DOCTYPE")-1) == 0) {

385 // Skip DOCTYPE declarations.	388 // Skip DOCTYPE declarations.

386 ++pos;	389 ++pos;

387 continue;	390 continue;

388 }	391 }

389	392

390 if (CheckForMagicNumbers(pos, end - pos,	393 if (CheckForMagicNumbers(pos, end - pos,

391 kMagicXML, arraysize(kMagicXML),	394 kMagicXML, arraysize(kMagicXML),

392 counter.get(), result))	395 counter, result))

393 return true;	396 return true;

394	397

395 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult	398 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult

396 // to identify.	399 // to identify.

397	400

398 // If we get here, we've hit an initial tag that hasn't matched one of the	401 // If we get here, we've hit an initial tag that hasn't matched one of the

399 // above tests. Abort.	402 // above tests. Abort.

400 return true;	403 return true;

401 }	404 }

402	405

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
444 // There is no concensus about exactly how to sniff for binary content.	447 // There is no concensus about exactly how to sniff for binary content.

445 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension.	448 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension.

446 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte.	449 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte.

447 // Here, we side with FF, but with a smaller buffer. This size was chosen	450 // Here, we side with FF, but with a smaller buffer. This size was chosen

448 // because it is small enough to comfortably fit into a single packet (after	451 // because it is small enough to comfortably fit into a single packet (after

449 // allowing for headers) and yet large enough to account for binary formats	452 // allowing for headers) and yet large enough to account for binary formats

450 // that have a significant amount of ASCII at the beginning (crbug.com/15314).	453 // that have a significant amount of ASCII at the beginning (crbug.com/15314).

451 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size);	454 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size);

452	455

453 // First, we look for a BOM.	456 // First, we look for a BOM.

454 scoped_refptr<base::Histogram> counter =	457 static base::Histogram* counter(NULL);

455 UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2",	458 if (!counter)

456 arraysize(kByteOrderMark));	459 counter = UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2",

	460 arraysize(kByteOrderMark));

457 std::string unused;	461 std::string unused;

458 if (CheckForMagicNumbers(content, size,	462 if (CheckForMagicNumbers(content, size,

459 kByteOrderMark, arraysize(kByteOrderMark),	463 kByteOrderMark, arraysize(kByteOrderMark),

460 counter.get(), &unused)) {	464 counter, &unused)) {

461 // If there is BOM, we think the buffer is not binary.	465 // If there is BOM, we think the buffer is not binary.

462 result->assign("text/plain");	466 result->assign("text/plain");

463 return false;	467 return false;

464 }	468 }

465	469

466 // Next we look to see if any of the bytes "look binary."	470 // Next we look to see if any of the bytes "look binary."

467 for (size_t i = 0; i < size; ++i) {	471 for (size_t i = 0; i < size; ++i) {

468 // If we a see a binary-looking byte, we think the content is binary.	472 // If we a see a binary-looking byte, we think the content is binary.

469 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) {	473 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) {

470 result->assign("application/octet-stream");	474 result->assign("application/octet-stream");

(...skipping 15 matching lines...) Expand all Loading...
486 static const char* kUnknownMimeTypes[] = {	490 static const char* kUnknownMimeTypes[] = {

487 // Empty mime types are as unknown as they get.	491 // Empty mime types are as unknown as they get.

488 "",	492 "",

489 // The unknown/unknown type is popular and uninformative	493 // The unknown/unknown type is popular and uninformative

490 "unknown/unknown",	494 "unknown/unknown",

491 // The second most popular unknown mime type is application/unknown	495 // The second most popular unknown mime type is application/unknown

492 "application/unknown",	496 "application/unknown",

493 // Firefox rejects a mime type if it is exactly /	497 // Firefox rejects a mime type if it is exactly /

494 "/",	498 "/",

495 };	499 };

496 scoped_refptr<base::Histogram> counter =	500 static base::Histogram* counter(NULL);

497 UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2",	501 if (!counter)

498 arraysize(kUnknownMimeTypes) + 1);	502 counter = UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2",

	503 arraysize(kUnknownMimeTypes) + 1);

499 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {	504 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {

500 if (mime_type == kUnknownMimeTypes[i]) {	505 if (mime_type == kUnknownMimeTypes[i]) {

501 counter->Add(i);	506 counter->Add(i);

502 return true;	507 return true;

503 }	508 }

504 }	509 }

505 if (mime_type.find('/') == std::string::npos) {	510 if (mime_type.find('/') == std::string::npos) {

506 // Firefox rejects a mime type if it does not contain a slash	511 // Firefox rejects a mime type if it does not contain a slash

507 counter->Add(arraysize(kUnknownMimeTypes));	512 counter->Add(arraysize(kUnknownMimeTypes));

508 return true;	513 return true;

509 }	514 }

510 return false;	515 return false;

511 }	516 }

512	517

513 // Returns true and sets result if the content appears to be a crx (chrome	518 // Returns true and sets result if the content appears to be a crx (chrome

514 // extension) file.	519 // extension) file.

515 // Clears have_enough_content if more data could possibly change the result.	520 // Clears have_enough_content if more data could possibly change the result.

516 static bool SniffCRX(const char* content,	521 static bool SniffCRX(const char* content,

517 size_t size,	522 size_t size,

518 const GURL& url,	523 const GURL& url,

519 const std::string& type_hint,	524 const std::string& type_hint,

520 bool* have_enough_content,	525 bool* have_enough_content,

521 std::string* result) {	526 std::string* result) {

522 scoped_refptr<base::Histogram> counter =	527 static base::Histogram* counter(NULL);

523 UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3);	528 if (!counter)

	529 counter = UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3);

524	530

525 // Technically, the crx magic number is just Cr24, but the bytes after that	531 // Technically, the crx magic number is just Cr24, but the bytes after that

526 // are a version number which changes infrequently. Including it in the	532 // are a version number which changes infrequently. Including it in the

527 // sniffing gives us less room for error. If the version number ever changes,	533 // sniffing gives us less room for error. If the version number ever changes,

528 // we can just add an entry to this list.	534 // we can just add an entry to this list.

529 //	535 //

530 // TODO(aa): If we ever have another magic number, we'll want to pass a	536 // TODO(aa): If we ever have another magic number, we'll want to pass a

531 // histogram into CheckForMagicNumbers(), below, to see which one matched.	537 // histogram into CheckForMagicNumbers(), below, to see which one matched.

532 static const struct MagicNumber kCRXMagicNumbers[] = {	538 static const struct MagicNumber kCRXMagicNumbers[] = {

533 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")	539 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")

(...skipping 16 matching lines...) Expand all Loading...
550 NULL, result)) {	556 NULL, result)) {

551 counter->Add(2);	557 counter->Add(2);

552 } else {	558 } else {

553 return false;	559 return false;

554 }	560 }

555	561

556 return true;	562 return true;

557 }	563 }

558	564

559 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {	565 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {

560 scoped_refptr<base::Histogram> should_sniff_counter =	566 static base::Histogram* should_sniff_counter(NULL);

561 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3);	567 if (!should_sniff_counter)

	568 should_sniff_counter =

	569 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3);

562 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP	570 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP

563 bool sniffable_scheme = url.is_empty() \|\|	571 bool sniffable_scheme = url.is_empty() \|\|

564 url.SchemeIs("http") \|\|	572 url.SchemeIs("http") \|\|

565 url.SchemeIs("https") \|\|	573 url.SchemeIs("https") \|\|

566 url.SchemeIs("ftp") \|\|	574 url.SchemeIs("ftp") \|\|

567 url.SchemeIsFile();	575 url.SchemeIsFile();

568 if (!sniffable_scheme) {	576 if (!sniffable_scheme) {

569 should_sniff_counter->Add(1);	577 should_sniff_counter->Add(1);

570 return false;	578 return false;

571 }	579 }

572	580

573 static const char* kSniffableTypes[] = {	581 static const char* kSniffableTypes[] = {

574 // Many web servers are misconfigured to send text/plain for many	582 // Many web servers are misconfigured to send text/plain for many

575 // different types of content.	583 // different types of content.

576 "text/plain",	584 "text/plain",

577 // We want to sniff application/octet-stream for	585 // We want to sniff application/octet-stream for

578 // application/x-chrome-extension, but nothing else.	586 // application/x-chrome-extension, but nothing else.

579 "application/octet-stream",	587 "application/octet-stream",

580 // XHTML and Atom/RSS feeds are often served as plain xml instead of	588 // XHTML and Atom/RSS feeds are often served as plain xml instead of

581 // their more specific mime types.	589 // their more specific mime types.

582 "text/xml",	590 "text/xml",

583 "application/xml",	591 "application/xml",

584 };	592 };

585 scoped_refptr<base::Histogram> counter =	593 static base::Histogram* counter(NULL);

586 UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2",	594 if (!counter)

587 arraysize(kSniffableTypes) + 1);	595 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2",

	596 arraysize(kSniffableTypes) + 1);

588 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {	597 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {

589 if (mime_type == kSniffableTypes[i]) {	598 if (mime_type == kSniffableTypes[i]) {

590 counter->Add(i);	599 counter->Add(i);

591 should_sniff_counter->Add(2);	600 should_sniff_counter->Add(2);

592 return true;	601 return true;

593 }	602 }

594 }	603 }

595 if (IsUnknownMimeType(mime_type)) {	604 if (IsUnknownMimeType(mime_type)) {

596 // The web server didn't specify a content type or specified a mime	605 // The web server didn't specify a content type or specified a mime

597 // type that we ignore.	606 // type that we ignore.

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
671 // Now we look in our large table of magic numbers to see if we can find	680 // Now we look in our large table of magic numbers to see if we can find

672 // anything that matches the content.	681 // anything that matches the content.

673 if (SniffForMagicNumbers(content, content_size,	682 if (SniffForMagicNumbers(content, content_size,

674 &have_enough_content, result))	683 &have_enough_content, result))

675 return true; // We've matched a magic number. No more content needed.	684 return true; // We've matched a magic number. No more content needed.

676	685

677 return have_enough_content;	686 return have_enough_content;

678 }	687 }

679	688

680 } // namespace net	689 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/cookie_monster_unittest.cc ('k') | net/disk_cache/histogram_macros.h » ('j') | no next file with comments »