OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
8 // | 8 // |
9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
(...skipping 634 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
645 return pos < end; | 645 return pos < end; |
646 } | 646 } |
647 | 647 |
648 // Byte order marks | 648 // Byte order marks |
649 static const MagicNumber kByteOrderMark[] = { | 649 static const MagicNumber kByteOrderMark[] = { |
650 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE | 650 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE |
651 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE | 651 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE |
652 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 | 652 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
653 }; | 653 }; |
654 | 654 |
655 // Whether a given byte looks like it might be part of binary content. | |
656 // Source: HTML5 spec | |
657 static char kByteLooksBinary[] = { | |
658 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F | |
659 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F | |
660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F | |
661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F | |
662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F | |
663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F | |
664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F | |
665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F | |
666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F | |
667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F | |
668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF | |
669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF | |
670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF | |
671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF | |
672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF | |
673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF | |
674 }; | |
675 | |
676 // Returns true and sets result to "application/octet-stream" if the content | 655 // Returns true and sets result to "application/octet-stream" if the content |
677 // appears to be binary data. Otherwise, returns false and sets "text/plain". | 656 // appears to be binary data. Otherwise, returns false and sets "text/plain". |
678 // Clears have_enough_content if more data could possibly change the result. | 657 // Clears have_enough_content if more data could possibly change the result. |
679 static bool SniffBinary(const char* content, | 658 static bool SniffBinary(const char* content, |
680 size_t size, | 659 size_t size, |
681 bool* have_enough_content, | 660 bool* have_enough_content, |
682 std::string* result) { | 661 std::string* result) { |
683 // There is no concensus about exactly how to sniff for binary content. | 662 // There is no concensus about exactly how to sniff for binary content. |
684 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. | 663 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. |
685 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. | 664 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. |
(...skipping 12 matching lines...) Expand all Loading... |
698 std::string unused; | 677 std::string unused; |
699 if (CheckForMagicNumbers(content, size, | 678 if (CheckForMagicNumbers(content, size, |
700 kByteOrderMark, arraysize(kByteOrderMark), | 679 kByteOrderMark, arraysize(kByteOrderMark), |
701 counter, &unused)) { | 680 counter, &unused)) { |
702 // If there is BOM, we think the buffer is not binary. | 681 // If there is BOM, we think the buffer is not binary. |
703 result->assign("text/plain"); | 682 result->assign("text/plain"); |
704 return false; | 683 return false; |
705 } | 684 } |
706 | 685 |
707 // Next we look to see if any of the bytes "look binary." | 686 // Next we look to see if any of the bytes "look binary." |
708 for (size_t i = 0; i < size; ++i) { | 687 if (LooksLikeBinary(content, size)) { |
709 // If we a see a binary-looking byte, we think the content is binary. | 688 result->assign("application/octet-stream"); |
710 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) { | 689 return true; |
711 result->assign("application/octet-stream"); | |
712 return true; | |
713 } | |
714 } | 690 } |
715 | 691 |
716 // No evidence either way. Default to non-binary and, if truncated, clear | 692 // No evidence either way. Default to non-binary and, if truncated, clear |
717 // have_enough_content because there could be a binary looking byte in the | 693 // have_enough_content because there could be a binary looking byte in the |
718 // truncated data. | 694 // truncated data. |
719 *have_enough_content &= is_truncated; | 695 *have_enough_content &= is_truncated; |
720 result->assign("text/plain"); | 696 result->assign("text/plain"); |
721 return false; | 697 return false; |
722 } | 698 } |
723 | 699 |
(...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
963 std::string* result) { | 939 std::string* result) { |
964 // First check the extra table. | 940 // First check the extra table. |
965 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, | 941 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, |
966 arraysize(kExtraMagicNumbers), NULL, result)) | 942 arraysize(kExtraMagicNumbers), NULL, result)) |
967 return true; | 943 return true; |
968 // Finally check the original table. | 944 // Finally check the original table. |
969 return CheckForMagicNumbers(content, size, kMagicNumbers, | 945 return CheckForMagicNumbers(content, size, kMagicNumbers, |
970 arraysize(kMagicNumbers), NULL, result); | 946 arraysize(kMagicNumbers), NULL, result); |
971 } | 947 } |
972 | 948 |
| 949 bool LooksLikeBinary(const char* content, size_t size) { |
| 950 // The definition of "binary bytes" is from the spec at |
| 951 // https://mimesniff.spec.whatwg.org/#binary-data-byte |
| 952 // |
| 953 // The bytes which are considered to be "binary" are all < 0x20. Encode them |
| 954 // one bit per byte, with 1 for a "binary" bit, and 0 for a "text" bit. The |
| 955 // least-significant bit represents byte 0x00, the most-significant bit |
| 956 // represents byte 0x1F. |
| 957 const uint32 kBinaryBits = |
| 958 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); |
| 959 for (size_t i = 0; i < size; ++i) { |
| 960 uint8 byte = static_cast<uint8>(content[i]); |
| 961 if (byte < 0x20 && (kBinaryBits & (1u << byte))) |
| 962 return true; |
| 963 } |
| 964 return false; |
| 965 } |
| 966 |
973 } // namespace net | 967 } // namespace net |
OLD | NEW |