| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
| 6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
| 7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
| 8 // | 8 // |
| 9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
| 10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
| (...skipping 634 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 645 return pos < end; | 645 return pos < end; |
| 646 } | 646 } |
| 647 | 647 |
| 648 // Byte order marks | 648 // Byte order marks |
| 649 static const MagicNumber kByteOrderMark[] = { | 649 static const MagicNumber kByteOrderMark[] = { |
| 650 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE | 650 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE |
| 651 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE | 651 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE |
| 652 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 | 652 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
| 653 }; | 653 }; |
| 654 | 654 |
| 655 // Whether a given byte looks like it might be part of binary content. | |
| 656 // Source: HTML5 spec | |
| 657 static char kByteLooksBinary[] = { | |
| 658 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F | |
| 659 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F | |
| 660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F | |
| 661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F | |
| 662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F | |
| 663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F | |
| 664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F | |
| 665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F | |
| 666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F | |
| 667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F | |
| 668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF | |
| 669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF | |
| 670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF | |
| 671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF | |
| 672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF | |
| 673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF | |
| 674 }; | |
| 675 | |
| 676 // Returns true and sets result to "application/octet-stream" if the content | 655 // Returns true and sets result to "application/octet-stream" if the content |
| 677 // appears to be binary data. Otherwise, returns false and sets "text/plain". | 656 // appears to be binary data. Otherwise, returns false and sets "text/plain". |
| 678 // Clears have_enough_content if more data could possibly change the result. | 657 // Clears have_enough_content if more data could possibly change the result. |
| 679 static bool SniffBinary(const char* content, | 658 static bool SniffBinary(const char* content, |
| 680 size_t size, | 659 size_t size, |
| 681 bool* have_enough_content, | 660 bool* have_enough_content, |
| 682 std::string* result) { | 661 std::string* result) { |
| 683 // There is no concensus about exactly how to sniff for binary content. | 662 // There is no concensus about exactly how to sniff for binary content. |
| 684 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. | 663 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. |
| 685 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. | 664 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. |
| (...skipping 12 matching lines...) Expand all Loading... |
| 698 std::string unused; | 677 std::string unused; |
| 699 if (CheckForMagicNumbers(content, size, | 678 if (CheckForMagicNumbers(content, size, |
| 700 kByteOrderMark, arraysize(kByteOrderMark), | 679 kByteOrderMark, arraysize(kByteOrderMark), |
| 701 counter, &unused)) { | 680 counter, &unused)) { |
| 702 // If there is BOM, we think the buffer is not binary. | 681 // If there is BOM, we think the buffer is not binary. |
| 703 result->assign("text/plain"); | 682 result->assign("text/plain"); |
| 704 return false; | 683 return false; |
| 705 } | 684 } |
| 706 | 685 |
| 707 // Next we look to see if any of the bytes "look binary." | 686 // Next we look to see if any of the bytes "look binary." |
| 708 for (size_t i = 0; i < size; ++i) { | 687 if (LooksLikeBinary(content, size)) { |
| 709 // If we a see a binary-looking byte, we think the content is binary. | 688 result->assign("application/octet-stream"); |
| 710 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) { | 689 return true; |
| 711 result->assign("application/octet-stream"); | |
| 712 return true; | |
| 713 } | |
| 714 } | 690 } |
| 715 | 691 |
| 716 // No evidence either way. Default to non-binary and, if truncated, clear | 692 // No evidence either way. Default to non-binary and, if truncated, clear |
| 717 // have_enough_content because there could be a binary looking byte in the | 693 // have_enough_content because there could be a binary looking byte in the |
| 718 // truncated data. | 694 // truncated data. |
| 719 *have_enough_content &= is_truncated; | 695 *have_enough_content &= is_truncated; |
| 720 result->assign("text/plain"); | 696 result->assign("text/plain"); |
| 721 return false; | 697 return false; |
| 722 } | 698 } |
| 723 | 699 |
| (...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 963 std::string* result) { | 939 std::string* result) { |
| 964 // First check the extra table. | 940 // First check the extra table. |
| 965 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, | 941 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, |
| 966 arraysize(kExtraMagicNumbers), NULL, result)) | 942 arraysize(kExtraMagicNumbers), NULL, result)) |
| 967 return true; | 943 return true; |
| 968 // Finally check the original table. | 944 // Finally check the original table. |
| 969 return CheckForMagicNumbers(content, size, kMagicNumbers, | 945 return CheckForMagicNumbers(content, size, kMagicNumbers, |
| 970 arraysize(kMagicNumbers), NULL, result); | 946 arraysize(kMagicNumbers), NULL, result); |
| 971 } | 947 } |
| 972 | 948 |
| 949 bool LooksLikeBinary(const char* content, size_t size) { |
| 950 // The definition of "binary bytes" is from the spec at |
| 951 // https://mimesniff.spec.whatwg.org/#binary-data-byte |
| 952 // |
| 953 // The bytes which are considered to be "binary" are all < 0x20. Encode them |
| 954 // one bit per byte, with 1 for a "binary" bit, and 0 for a "text" bit. The |
| 955 // least-significant bit represents byte 0x00, the most-significant bit |
| 956 // represents byte 0x1F. |
| 957 const uint32 kBinaryBits = |
| 958 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); |
| 959 for (size_t i = 0; i < size; ++i) { |
| 960 uint8 byte = static_cast<uint8>(content[i]); |
| 961 if (byte < 0x20 && (kBinaryBits & (1u << byte))) |
| 962 return true; |
| 963 } |
| 964 return false; |
| 965 } |
| 966 |
| 973 } // namespace net | 967 } // namespace net |
| OLD | NEW |