Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(65)

Side by Side Diff: net/base/mime_sniffer.cc

Issue 6780035: Use lock-free lazy initialization for static histogram references (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/cookie_monster_unittest.cc ('k') | net/disk_cache/histogram_macros.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Detecting mime types is a tricky business because we need to balance 5 // Detecting mime types is a tricky business because we need to balance
6 // compatibility concerns with security issues. Here is a survey of how other 6 // compatibility concerns with security issues. Here is a survey of how other
7 // browsers behave and then a description of how we intend to behave. 7 // browsers behave and then a description of how we intend to behave.
8 // 8 //
9 // HTML payload, no Content-Type header: 9 // HTML payload, no Content-Type header:
10 // * IE 7: Render as HTML 10 // * IE 7: Render as HTML
(...skipping 191 matching lines...) Expand 10 before | Expand all | Expand 10 after
202 MAGIC_HTML_TAG("table") // Mozilla 202 MAGIC_HTML_TAG("table") // Mozilla
203 MAGIC_HTML_TAG("a") // Mozilla 203 MAGIC_HTML_TAG("a") // Mozilla
204 MAGIC_HTML_TAG("style") // Mozilla 204 MAGIC_HTML_TAG("style") // Mozilla
205 MAGIC_HTML_TAG("title") // Mozilla 205 MAGIC_HTML_TAG("title") // Mozilla
206 MAGIC_HTML_TAG("b") // Mozilla 206 MAGIC_HTML_TAG("b") // Mozilla
207 MAGIC_HTML_TAG("body") // Mozilla 207 MAGIC_HTML_TAG("body") // Mozilla
208 MAGIC_HTML_TAG("br") 208 MAGIC_HTML_TAG("br")
209 MAGIC_HTML_TAG("p") // Mozilla 209 MAGIC_HTML_TAG("p") // Mozilla
210 }; 210 };
211 211
212 static scoped_refptr<base::Histogram> UMASnifferHistogramGet(const char* name, 212 static base::Histogram* UMASnifferHistogramGet(const char* name,
213 int array_size) { 213 int array_size) {
214 scoped_refptr<base::Histogram> counter = 214 base::Histogram* counter =
215 base::LinearHistogram::FactoryGet(name, 1, array_size - 1, array_size, 215 base::LinearHistogram::FactoryGet(name, 1, array_size - 1, array_size,
216 base::Histogram::kUmaTargetedHistogramFlag); 216 base::Histogram::kUmaTargetedHistogramFlag);
217 return counter; 217 return counter;
218 } 218 }
219 219
220 // Compare content header to a magic number where magic_entry can contain '.' 220 // Compare content header to a magic number where magic_entry can contain '.'
221 // for single character of anything, allowing some bytes to be skipped. 221 // for single character of anything, allowing some bytes to be skipped.
222 static bool MagicCmp(const char* magic_entry, const char* content, size_t len) { 222 static bool MagicCmp(const char* magic_entry, const char* content, size_t len) {
223 while (len) { 223 while (len) {
224 if ((*magic_entry != '.') && (*magic_entry != *content)) 224 if ((*magic_entry != '.') && (*magic_entry != *content))
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
301 *have_enough_content &= TruncateSize(512, &size); 301 *have_enough_content &= TruncateSize(512, &size);
302 302
303 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, 303 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags,
304 // but with some modifications to better match the HTML5 spec. 304 // but with some modifications to better match the HTML5 spec.
305 const char* const end = content + size; 305 const char* const end = content + size;
306 const char* pos; 306 const char* pos;
307 for (pos = content; pos < end; ++pos) { 307 for (pos = content; pos < end; ++pos) {
308 if (!IsAsciiWhitespace(*pos)) 308 if (!IsAsciiWhitespace(*pos))
309 break; 309 break;
310 } 310 }
311 scoped_refptr<base::Histogram> counter = 311 static base::Histogram* counter(NULL);
312 UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", 312 if (!counter)
313 arraysize(kSniffableTags)); 313 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2",
314 arraysize(kSniffableTags));
314 // |pos| now points to first non-whitespace character (or at end). 315 // |pos| now points to first non-whitespace character (or at end).
315 return CheckForMagicNumbers(pos, end - pos, 316 return CheckForMagicNumbers(pos, end - pos,
316 kSniffableTags, arraysize(kSniffableTags), 317 kSniffableTags, arraysize(kSniffableTags),
317 counter.get(), result); 318 counter, result);
318 } 319 }
319 320
320 // Returns true and sets result if the content matches any of kMagicNumbers. 321 // Returns true and sets result if the content matches any of kMagicNumbers.
321 // Clears have_enough_content if more data could possibly change the result. 322 // Clears have_enough_content if more data could possibly change the result.
322 static bool SniffForMagicNumbers(const char* content, 323 static bool SniffForMagicNumbers(const char* content,
323 size_t size, 324 size_t size,
324 bool* have_enough_content, 325 bool* have_enough_content,
325 std::string* result) { 326 std::string* result) {
326 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); 327 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);
327 328
328 // Check our big table of Magic Numbers 329 // Check our big table of Magic Numbers
329 scoped_refptr<base::Histogram> counter = 330 static base::Histogram* counter(NULL);
330 UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2", 331 if (!counter)
331 arraysize(kMagicNumbers)); 332 counter = UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2",
333 arraysize(kMagicNumbers));
332 return CheckForMagicNumbers(content, size, 334 return CheckForMagicNumbers(content, size,
333 kMagicNumbers, arraysize(kMagicNumbers), 335 kMagicNumbers, arraysize(kMagicNumbers),
334 counter.get(), result); 336 counter, result);
335 } 337 }
336 338
337 // Byte order marks 339 // Byte order marks
338 static const MagicNumber kMagicXML[] = { 340 static const MagicNumber kMagicXML[] = {
339 // We want to be very conservative in interpreting text/xml content as 341 // We want to be very conservative in interpreting text/xml content as
340 // XHTML -- we just want to sniff enough to make unit tests pass. 342 // XHTML -- we just want to sniff enough to make unit tests pass.
341 // So we match explicitly on this, and don't match other ways of writing 343 // So we match explicitly on this, and don't match other ways of writing
342 // it in semantically-equivalent ways. 344 // it in semantically-equivalent ways.
343 MAGIC_STRING("application/xhtml+xml", 345 MAGIC_STRING("application/xhtml+xml",
344 "<html xmlns=\"http://www.w3.org/1999/xhtml\"") 346 "<html xmlns=\"http://www.w3.org/1999/xhtml\"")
(...skipping 15 matching lines...) Expand all
360 std::string* result) { 362 std::string* result) {
361 // We allow at most 300 bytes of content before we expect the opening tag. 363 // We allow at most 300 bytes of content before we expect the opening tag.
362 *have_enough_content &= TruncateSize(300, &size); 364 *have_enough_content &= TruncateSize(300, &size);
363 const char* pos = content; 365 const char* pos = content;
364 const char* const end = content + size; 366 const char* const end = content + size;
365 367
366 // This loop iterates through tag-looking offsets in the file. 368 // This loop iterates through tag-looking offsets in the file.
367 // We want to skip XML processing instructions (of the form "<?xml ...") 369 // We want to skip XML processing instructions (of the form "<?xml ...")
368 // and stop at the first "plain" tag, then make a decision on the mime-type 370 // and stop at the first "plain" tag, then make a decision on the mime-type
369 // based on the name (or possibly attributes) of that tag. 371 // based on the name (or possibly attributes) of that tag.
370 scoped_refptr<base::Histogram> counter = 372 static base::Histogram* counter(NULL);
371 UMASnifferHistogramGet("mime_sniffer.kMagicXML2", 373 if (!counter)
372 arraysize(kMagicXML)); 374 counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2",
375 arraysize(kMagicXML));
373 const int kMaxTagIterations = 5; 376 const int kMaxTagIterations = 5;
374 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { 377 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {
375 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); 378 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));
376 if (!pos) 379 if (!pos)
377 return false; 380 return false;
378 381
379 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) { 382 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) {
380 // Skip XML declarations. 383 // Skip XML declarations.
381 ++pos; 384 ++pos;
382 continue; 385 continue;
383 } else if (base::strncasecmp(pos, "<!DOCTYPE", 386 } else if (base::strncasecmp(pos, "<!DOCTYPE",
384 sizeof("<!DOCTYPE")-1) == 0) { 387 sizeof("<!DOCTYPE")-1) == 0) {
385 // Skip DOCTYPE declarations. 388 // Skip DOCTYPE declarations.
386 ++pos; 389 ++pos;
387 continue; 390 continue;
388 } 391 }
389 392
390 if (CheckForMagicNumbers(pos, end - pos, 393 if (CheckForMagicNumbers(pos, end - pos,
391 kMagicXML, arraysize(kMagicXML), 394 kMagicXML, arraysize(kMagicXML),
392 counter.get(), result)) 395 counter, result))
393 return true; 396 return true;
394 397
395 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult 398 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult
396 // to identify. 399 // to identify.
397 400
398 // If we get here, we've hit an initial tag that hasn't matched one of the 401 // If we get here, we've hit an initial tag that hasn't matched one of the
399 // above tests. Abort. 402 // above tests. Abort.
400 return true; 403 return true;
401 } 404 }
402 405
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
444 // There is no concensus about exactly how to sniff for binary content. 447 // There is no concensus about exactly how to sniff for binary content.
445 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. 448 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension.
446 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. 449 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte.
447 // Here, we side with FF, but with a smaller buffer. This size was chosen 450 // Here, we side with FF, but with a smaller buffer. This size was chosen
448 // because it is small enough to comfortably fit into a single packet (after 451 // because it is small enough to comfortably fit into a single packet (after
449 // allowing for headers) and yet large enough to account for binary formats 452 // allowing for headers) and yet large enough to account for binary formats
450 // that have a significant amount of ASCII at the beginning (crbug.com/15314). 453 // that have a significant amount of ASCII at the beginning (crbug.com/15314).
451 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size); 454 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size);
452 455
453 // First, we look for a BOM. 456 // First, we look for a BOM.
454 scoped_refptr<base::Histogram> counter = 457 static base::Histogram* counter(NULL);
455 UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2", 458 if (!counter)
456 arraysize(kByteOrderMark)); 459 counter = UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2",
460 arraysize(kByteOrderMark));
457 std::string unused; 461 std::string unused;
458 if (CheckForMagicNumbers(content, size, 462 if (CheckForMagicNumbers(content, size,
459 kByteOrderMark, arraysize(kByteOrderMark), 463 kByteOrderMark, arraysize(kByteOrderMark),
460 counter.get(), &unused)) { 464 counter, &unused)) {
461 // If there is BOM, we think the buffer is not binary. 465 // If there is BOM, we think the buffer is not binary.
462 result->assign("text/plain"); 466 result->assign("text/plain");
463 return false; 467 return false;
464 } 468 }
465 469
466 // Next we look to see if any of the bytes "look binary." 470 // Next we look to see if any of the bytes "look binary."
467 for (size_t i = 0; i < size; ++i) { 471 for (size_t i = 0; i < size; ++i) {
468 // If we a see a binary-looking byte, we think the content is binary. 472 // If we a see a binary-looking byte, we think the content is binary.
469 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) { 473 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) {
470 result->assign("application/octet-stream"); 474 result->assign("application/octet-stream");
(...skipping 15 matching lines...) Expand all
486 static const char* kUnknownMimeTypes[] = { 490 static const char* kUnknownMimeTypes[] = {
487 // Empty mime types are as unknown as they get. 491 // Empty mime types are as unknown as they get.
488 "", 492 "",
489 // The unknown/unknown type is popular and uninformative 493 // The unknown/unknown type is popular and uninformative
490 "unknown/unknown", 494 "unknown/unknown",
491 // The second most popular unknown mime type is application/unknown 495 // The second most popular unknown mime type is application/unknown
492 "application/unknown", 496 "application/unknown",
493 // Firefox rejects a mime type if it is exactly */* 497 // Firefox rejects a mime type if it is exactly */*
494 "*/*", 498 "*/*",
495 }; 499 };
496 scoped_refptr<base::Histogram> counter = 500 static base::Histogram* counter(NULL);
497 UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2", 501 if (!counter)
498 arraysize(kUnknownMimeTypes) + 1); 502 counter = UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2",
503 arraysize(kUnknownMimeTypes) + 1);
499 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) { 504 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {
500 if (mime_type == kUnknownMimeTypes[i]) { 505 if (mime_type == kUnknownMimeTypes[i]) {
501 counter->Add(i); 506 counter->Add(i);
502 return true; 507 return true;
503 } 508 }
504 } 509 }
505 if (mime_type.find('/') == std::string::npos) { 510 if (mime_type.find('/') == std::string::npos) {
506 // Firefox rejects a mime type if it does not contain a slash 511 // Firefox rejects a mime type if it does not contain a slash
507 counter->Add(arraysize(kUnknownMimeTypes)); 512 counter->Add(arraysize(kUnknownMimeTypes));
508 return true; 513 return true;
509 } 514 }
510 return false; 515 return false;
511 } 516 }
512 517
513 // Returns true and sets result if the content appears to be a crx (chrome 518 // Returns true and sets result if the content appears to be a crx (chrome
514 // extension) file. 519 // extension) file.
515 // Clears have_enough_content if more data could possibly change the result. 520 // Clears have_enough_content if more data could possibly change the result.
516 static bool SniffCRX(const char* content, 521 static bool SniffCRX(const char* content,
517 size_t size, 522 size_t size,
518 const GURL& url, 523 const GURL& url,
519 const std::string& type_hint, 524 const std::string& type_hint,
520 bool* have_enough_content, 525 bool* have_enough_content,
521 std::string* result) { 526 std::string* result) {
522 scoped_refptr<base::Histogram> counter = 527 static base::Histogram* counter(NULL);
523 UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3); 528 if (!counter)
529 counter = UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3);
524 530
525 // Technically, the crx magic number is just Cr24, but the bytes after that 531 // Technically, the crx magic number is just Cr24, but the bytes after that
526 // are a version number which changes infrequently. Including it in the 532 // are a version number which changes infrequently. Including it in the
527 // sniffing gives us less room for error. If the version number ever changes, 533 // sniffing gives us less room for error. If the version number ever changes,
528 // we can just add an entry to this list. 534 // we can just add an entry to this list.
529 // 535 //
530 // TODO(aa): If we ever have another magic number, we'll want to pass a 536 // TODO(aa): If we ever have another magic number, we'll want to pass a
531 // histogram into CheckForMagicNumbers(), below, to see which one matched. 537 // histogram into CheckForMagicNumbers(), below, to see which one matched.
532 static const struct MagicNumber kCRXMagicNumbers[] = { 538 static const struct MagicNumber kCRXMagicNumbers[] = {
533 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00") 539 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")
(...skipping 16 matching lines...) Expand all
550 NULL, result)) { 556 NULL, result)) {
551 counter->Add(2); 557 counter->Add(2);
552 } else { 558 } else {
553 return false; 559 return false;
554 } 560 }
555 561
556 return true; 562 return true;
557 } 563 }
558 564
559 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { 565 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {
560 scoped_refptr<base::Histogram> should_sniff_counter = 566 static base::Histogram* should_sniff_counter(NULL);
561 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3); 567 if (!should_sniff_counter)
568 should_sniff_counter =
569 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3);
562 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP 570 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP
563 bool sniffable_scheme = url.is_empty() || 571 bool sniffable_scheme = url.is_empty() ||
564 url.SchemeIs("http") || 572 url.SchemeIs("http") ||
565 url.SchemeIs("https") || 573 url.SchemeIs("https") ||
566 url.SchemeIs("ftp") || 574 url.SchemeIs("ftp") ||
567 url.SchemeIsFile(); 575 url.SchemeIsFile();
568 if (!sniffable_scheme) { 576 if (!sniffable_scheme) {
569 should_sniff_counter->Add(1); 577 should_sniff_counter->Add(1);
570 return false; 578 return false;
571 } 579 }
572 580
573 static const char* kSniffableTypes[] = { 581 static const char* kSniffableTypes[] = {
574 // Many web servers are misconfigured to send text/plain for many 582 // Many web servers are misconfigured to send text/plain for many
575 // different types of content. 583 // different types of content.
576 "text/plain", 584 "text/plain",
577 // We want to sniff application/octet-stream for 585 // We want to sniff application/octet-stream for
578 // application/x-chrome-extension, but nothing else. 586 // application/x-chrome-extension, but nothing else.
579 "application/octet-stream", 587 "application/octet-stream",
580 // XHTML and Atom/RSS feeds are often served as plain xml instead of 588 // XHTML and Atom/RSS feeds are often served as plain xml instead of
581 // their more specific mime types. 589 // their more specific mime types.
582 "text/xml", 590 "text/xml",
583 "application/xml", 591 "application/xml",
584 }; 592 };
585 scoped_refptr<base::Histogram> counter = 593 static base::Histogram* counter(NULL);
586 UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2", 594 if (!counter)
587 arraysize(kSniffableTypes) + 1); 595 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2",
596 arraysize(kSniffableTypes) + 1);
588 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { 597 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {
589 if (mime_type == kSniffableTypes[i]) { 598 if (mime_type == kSniffableTypes[i]) {
590 counter->Add(i); 599 counter->Add(i);
591 should_sniff_counter->Add(2); 600 should_sniff_counter->Add(2);
592 return true; 601 return true;
593 } 602 }
594 } 603 }
595 if (IsUnknownMimeType(mime_type)) { 604 if (IsUnknownMimeType(mime_type)) {
596 // The web server didn't specify a content type or specified a mime 605 // The web server didn't specify a content type or specified a mime
597 // type that we ignore. 606 // type that we ignore.
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
671 // Now we look in our large table of magic numbers to see if we can find 680 // Now we look in our large table of magic numbers to see if we can find
672 // anything that matches the content. 681 // anything that matches the content.
673 if (SniffForMagicNumbers(content, content_size, 682 if (SniffForMagicNumbers(content, content_size,
674 &have_enough_content, result)) 683 &have_enough_content, result))
675 return true; // We've matched a magic number. No more content needed. 684 return true; // We've matched a magic number. No more content needed.
676 685
677 return have_enough_content; 686 return have_enough_content;
678 } 687 }
679 688
680 } // namespace net 689 } // namespace net
OLDNEW
« no previous file with comments | « net/base/cookie_monster_unittest.cc ('k') | net/disk_cache/histogram_macros.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698