| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
| 6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
| 7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
| 8 // | 8 // |
| 9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
| 10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
| (...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 301 *have_enough_content &= TruncateSize(512, &size); | 301 *have_enough_content &= TruncateSize(512, &size); |
| 302 | 302 |
| 303 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, | 303 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, |
| 304 // but with some modifications to better match the HTML5 spec. | 304 // but with some modifications to better match the HTML5 spec. |
| 305 const char* const end = content + size; | 305 const char* const end = content + size; |
| 306 const char* pos; | 306 const char* pos; |
| 307 for (pos = content; pos < end; ++pos) { | 307 for (pos = content; pos < end; ++pos) { |
| 308 if (!IsAsciiWhitespace(*pos)) | 308 if (!IsAsciiWhitespace(*pos)) |
| 309 break; | 309 break; |
| 310 } | 310 } |
| 311 static scoped_refptr<base::Histogram> counter = | 311 scoped_refptr<base::Histogram> counter = |
| 312 UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", | 312 UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", |
| 313 arraysize(kSniffableTags)); | 313 arraysize(kSniffableTags)); |
| 314 // |pos| now points to first non-whitespace character (or at end). | 314 // |pos| now points to first non-whitespace character (or at end). |
| 315 return CheckForMagicNumbers(pos, end - pos, | 315 return CheckForMagicNumbers(pos, end - pos, |
| 316 kSniffableTags, arraysize(kSniffableTags), | 316 kSniffableTags, arraysize(kSniffableTags), |
| 317 counter.get(), result); | 317 counter.get(), result); |
| 318 } | 318 } |
| 319 | 319 |
| 320 // Returns true and sets result if the content matches any of kMagicNumbers. | 320 // Returns true and sets result if the content matches any of kMagicNumbers. |
| 321 // Clears have_enough_content if more data could possibly change the result. | 321 // Clears have_enough_content if more data could possibly change the result. |
| 322 static bool SniffForMagicNumbers(const char* content, | 322 static bool SniffForMagicNumbers(const char* content, |
| 323 size_t size, | 323 size_t size, |
| 324 bool* have_enough_content, | 324 bool* have_enough_content, |
| 325 std::string* result) { | 325 std::string* result) { |
| 326 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); | 326 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); |
| 327 | 327 |
| 328 // Check our big table of Magic Numbers | 328 // Check our big table of Magic Numbers |
| 329 static scoped_refptr<base::Histogram> counter = | 329 scoped_refptr<base::Histogram> counter = |
| 330 UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2", | 330 UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2", |
| 331 arraysize(kMagicNumbers)); | 331 arraysize(kMagicNumbers)); |
| 332 return CheckForMagicNumbers(content, size, | 332 return CheckForMagicNumbers(content, size, |
| 333 kMagicNumbers, arraysize(kMagicNumbers), | 333 kMagicNumbers, arraysize(kMagicNumbers), |
| 334 counter.get(), result); | 334 counter.get(), result); |
| 335 } | 335 } |
| 336 | 336 |
| 337 // Byte order marks | 337 // Byte order marks |
| 338 static const MagicNumber kMagicXML[] = { | 338 static const MagicNumber kMagicXML[] = { |
| 339 // We want to be very conservative in interpreting text/xml content as | 339 // We want to be very conservative in interpreting text/xml content as |
| (...skipping 20 matching lines...) Expand all Loading... |
| 360 std::string* result) { | 360 std::string* result) { |
| 361 // We allow at most 300 bytes of content before we expect the opening tag. | 361 // We allow at most 300 bytes of content before we expect the opening tag. |
| 362 *have_enough_content &= TruncateSize(300, &size); | 362 *have_enough_content &= TruncateSize(300, &size); |
| 363 const char* pos = content; | 363 const char* pos = content; |
| 364 const char* const end = content + size; | 364 const char* const end = content + size; |
| 365 | 365 |
| 366 // This loop iterates through tag-looking offsets in the file. | 366 // This loop iterates through tag-looking offsets in the file. |
| 367 // We want to skip XML processing instructions (of the form "<?xml ...") | 367 // We want to skip XML processing instructions (of the form "<?xml ...") |
| 368 // and stop at the first "plain" tag, then make a decision on the mime-type | 368 // and stop at the first "plain" tag, then make a decision on the mime-type |
| 369 // based on the name (or possibly attributes) of that tag. | 369 // based on the name (or possibly attributes) of that tag. |
| 370 static scoped_refptr<base::Histogram> counter = | 370 scoped_refptr<base::Histogram> counter = |
| 371 UMASnifferHistogramGet("mime_sniffer.kMagicXML2", | 371 UMASnifferHistogramGet("mime_sniffer.kMagicXML2", |
| 372 arraysize(kMagicXML)); | 372 arraysize(kMagicXML)); |
| 373 const int kMaxTagIterations = 5; | 373 const int kMaxTagIterations = 5; |
| 374 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { | 374 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { |
| 375 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); | 375 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); |
| 376 if (!pos) | 376 if (!pos) |
| 377 return false; | 377 return false; |
| 378 | 378 |
| 379 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) { | 379 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) { |
| 380 // Skip XML declarations. | 380 // Skip XML declarations. |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 444 // There is no concensus about exactly how to sniff for binary content. | 444 // There is no concensus about exactly how to sniff for binary content. |
| 445 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. | 445 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. |
| 446 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. | 446 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. |
| 447 // Here, we side with FF, but with a smaller buffer. This size was chosen | 447 // Here, we side with FF, but with a smaller buffer. This size was chosen |
| 448 // because it is small enough to comfortably fit into a single packet (after | 448 // because it is small enough to comfortably fit into a single packet (after |
| 449 // allowing for headers) and yet large enough to account for binary formats | 449 // allowing for headers) and yet large enough to account for binary formats |
| 450 // that have a significant amount of ASCII at the beginning (crbug.com/15314). | 450 // that have a significant amount of ASCII at the beginning (crbug.com/15314). |
| 451 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size); | 451 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size); |
| 452 | 452 |
| 453 // First, we look for a BOM. | 453 // First, we look for a BOM. |
| 454 static scoped_refptr<base::Histogram> counter = | 454 scoped_refptr<base::Histogram> counter = |
| 455 UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2", | 455 UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2", |
| 456 arraysize(kByteOrderMark)); | 456 arraysize(kByteOrderMark)); |
| 457 std::string unused; | 457 std::string unused; |
| 458 if (CheckForMagicNumbers(content, size, | 458 if (CheckForMagicNumbers(content, size, |
| 459 kByteOrderMark, arraysize(kByteOrderMark), | 459 kByteOrderMark, arraysize(kByteOrderMark), |
| 460 counter.get(), &unused)) { | 460 counter.get(), &unused)) { |
| 461 // If there is BOM, we think the buffer is not binary. | 461 // If there is BOM, we think the buffer is not binary. |
| 462 result->assign("text/plain"); | 462 result->assign("text/plain"); |
| 463 return false; | 463 return false; |
| 464 } | 464 } |
| (...skipping 21 matching lines...) Expand all Loading... |
| 486 static const char* kUnknownMimeTypes[] = { | 486 static const char* kUnknownMimeTypes[] = { |
| 487 // Empty mime types are as unknown as they get. | 487 // Empty mime types are as unknown as they get. |
| 488 "", | 488 "", |
| 489 // The unknown/unknown type is popular and uninformative | 489 // The unknown/unknown type is popular and uninformative |
| 490 "unknown/unknown", | 490 "unknown/unknown", |
| 491 // The second most popular unknown mime type is application/unknown | 491 // The second most popular unknown mime type is application/unknown |
| 492 "application/unknown", | 492 "application/unknown", |
| 493 // Firefox rejects a mime type if it is exactly */* | 493 // Firefox rejects a mime type if it is exactly */* |
| 494 "*/*", | 494 "*/*", |
| 495 }; | 495 }; |
| 496 static scoped_refptr<base::Histogram> counter = | 496 scoped_refptr<base::Histogram> counter = |
| 497 UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2", | 497 UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2", |
| 498 arraysize(kUnknownMimeTypes) + 1); | 498 arraysize(kUnknownMimeTypes) + 1); |
| 499 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) { | 499 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) { |
| 500 if (mime_type == kUnknownMimeTypes[i]) { | 500 if (mime_type == kUnknownMimeTypes[i]) { |
| 501 counter->Add(i); | 501 counter->Add(i); |
| 502 return true; | 502 return true; |
| 503 } | 503 } |
| 504 } | 504 } |
| 505 if (mime_type.find('/') == std::string::npos) { | 505 if (mime_type.find('/') == std::string::npos) { |
| 506 // Firefox rejects a mime type if it does not contain a slash | 506 // Firefox rejects a mime type if it does not contain a slash |
| 507 counter->Add(arraysize(kUnknownMimeTypes)); | 507 counter->Add(arraysize(kUnknownMimeTypes)); |
| 508 return true; | 508 return true; |
| 509 } | 509 } |
| 510 return false; | 510 return false; |
| 511 } | 511 } |
| 512 | 512 |
| 513 // Returns true and sets result if the content appears to be a crx (chrome | 513 // Returns true and sets result if the content appears to be a crx (chrome |
| 514 // extension) file. | 514 // extension) file. |
| 515 // Clears have_enough_content if more data could possibly change the result. | 515 // Clears have_enough_content if more data could possibly change the result. |
| 516 static bool SniffCRX(const char* content, | 516 static bool SniffCRX(const char* content, |
| 517 size_t size, | 517 size_t size, |
| 518 const GURL& url, | 518 const GURL& url, |
| 519 const std::string& type_hint, | 519 const std::string& type_hint, |
| 520 bool* have_enough_content, | 520 bool* have_enough_content, |
| 521 std::string* result) { | 521 std::string* result) { |
| 522 static scoped_refptr<base::Histogram> counter = | 522 scoped_refptr<base::Histogram> counter = |
| 523 UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3); | 523 UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3); |
| 524 | 524 |
| 525 // Technically, the crx magic number is just Cr24, but the bytes after that | 525 // Technically, the crx magic number is just Cr24, but the bytes after that |
| 526 // are a version number which changes infrequently. Including it in the | 526 // are a version number which changes infrequently. Including it in the |
| 527 // sniffing gives us less room for error. If the version number ever changes, | 527 // sniffing gives us less room for error. If the version number ever changes, |
| 528 // we can just add an entry to this list. | 528 // we can just add an entry to this list. |
| 529 // | 529 // |
| 530 // TODO(aa): If we ever have another magic number, we'll want to pass a | 530 // TODO(aa): If we ever have another magic number, we'll want to pass a |
| 531 // histogram into CheckForMagicNumbers(), below, to see which one matched. | 531 // histogram into CheckForMagicNumbers(), below, to see which one matched. |
| 532 static const struct MagicNumber kCRXMagicNumbers[] = { | 532 static const struct MagicNumber kCRXMagicNumbers[] = { |
| (...skipping 17 matching lines...) Expand all Loading... |
| 550 NULL, result)) { | 550 NULL, result)) { |
| 551 counter->Add(2); | 551 counter->Add(2); |
| 552 } else { | 552 } else { |
| 553 return false; | 553 return false; |
| 554 } | 554 } |
| 555 | 555 |
| 556 return true; | 556 return true; |
| 557 } | 557 } |
| 558 | 558 |
| 559 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { | 559 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { |
| 560 static scoped_refptr<base::Histogram> should_sniff_counter = | 560 scoped_refptr<base::Histogram> should_sniff_counter = |
| 561 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3); | 561 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3); |
| 562 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP | 562 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP |
| 563 bool sniffable_scheme = url.is_empty() || | 563 bool sniffable_scheme = url.is_empty() || |
| 564 url.SchemeIs("http") || | 564 url.SchemeIs("http") || |
| 565 url.SchemeIs("https") || | 565 url.SchemeIs("https") || |
| 566 url.SchemeIs("ftp") || | 566 url.SchemeIs("ftp") || |
| 567 url.SchemeIsFile(); | 567 url.SchemeIsFile(); |
| 568 if (!sniffable_scheme) { | 568 if (!sniffable_scheme) { |
| 569 should_sniff_counter->Add(1); | 569 should_sniff_counter->Add(1); |
| 570 return false; | 570 return false; |
| 571 } | 571 } |
| 572 | 572 |
| 573 static const char* kSniffableTypes[] = { | 573 static const char* kSniffableTypes[] = { |
| 574 // Many web servers are misconfigured to send text/plain for many | 574 // Many web servers are misconfigured to send text/plain for many |
| 575 // different types of content. | 575 // different types of content. |
| 576 "text/plain", | 576 "text/plain", |
| 577 // We want to sniff application/octet-stream for | 577 // We want to sniff application/octet-stream for |
| 578 // application/x-chrome-extension, but nothing else. | 578 // application/x-chrome-extension, but nothing else. |
| 579 "application/octet-stream", | 579 "application/octet-stream", |
| 580 // XHTML and Atom/RSS feeds are often served as plain xml instead of | 580 // XHTML and Atom/RSS feeds are often served as plain xml instead of |
| 581 // their more specific mime types. | 581 // their more specific mime types. |
| 582 "text/xml", | 582 "text/xml", |
| 583 "application/xml", | 583 "application/xml", |
| 584 }; | 584 }; |
| 585 static scoped_refptr<base::Histogram> counter = | 585 scoped_refptr<base::Histogram> counter = |
| 586 UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2", | 586 UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2", |
| 587 arraysize(kSniffableTypes) + 1); | 587 arraysize(kSniffableTypes) + 1); |
| 588 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { | 588 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { |
| 589 if (mime_type == kSniffableTypes[i]) { | 589 if (mime_type == kSniffableTypes[i]) { |
| 590 counter->Add(i); | 590 counter->Add(i); |
| 591 should_sniff_counter->Add(2); | 591 should_sniff_counter->Add(2); |
| 592 return true; | 592 return true; |
| 593 } | 593 } |
| 594 } | 594 } |
| 595 if (IsUnknownMimeType(mime_type)) { | 595 if (IsUnknownMimeType(mime_type)) { |
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 671 // Now we look in our large table of magic numbers to see if we can find | 671 // Now we look in our large table of magic numbers to see if we can find |
| 672 // anything that matches the content. | 672 // anything that matches the content. |
| 673 if (SniffForMagicNumbers(content, content_size, | 673 if (SniffForMagicNumbers(content, content_size, |
| 674 &have_enough_content, result)) | 674 &have_enough_content, result)) |
| 675 return true; // We've matched a magic number. No more content needed. | 675 return true; // We've matched a magic number. No more content needed. |
| 676 | 676 |
| 677 return have_enough_content; | 677 return have_enough_content; |
| 678 } | 678 } |
| 679 | 679 |
| 680 } // namespace net | 680 } // namespace net |
| OLD | NEW |