OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
8 // | 8 // |
9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
(...skipping 634 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
645 return pos < end; | 645 return pos < end; |
646 } | 646 } |
647 | 647 |
648 // Byte order marks | 648 // Byte order marks |
649 static const MagicNumber kByteOrderMark[] = { | 649 static const MagicNumber kByteOrderMark[] = { |
650 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE | 650 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE |
651 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE | 651 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE |
652 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 | 652 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
653 }; | 653 }; |
654 | 654 |
655 // Whether a given byte looks like it might be part of binary content. | 655 // The bytes which are considered to be "binary" are all < 0x20. Encode them one |
656 // Source: HTML5 spec | 656 // bit per byte, with 1 for a "binary" bit, and 0 for a "text" bit. The |
657 static char kByteLooksBinary[] = { | 657 // least-significant bit represents byte 0x00, the most-significant bit |
658 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F | 658 // represents byte 0x1F. |
659 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F | 659 // 0b11110111111111111100100111111111 |
660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F | 660 static const uint32 kBinaryBits = 0xf7ffc9ff; |
asanka
2015/04/29 23:17:44
Let's try to make this a bit more readable.
kByte
Adam Rice
2015/05/07 16:52:50
Done.
| |
661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F | |
662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F | |
663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F | |
664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F | |
665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F | |
666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F | |
667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F | |
668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF | |
669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF | |
670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF | |
671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF | |
672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF | |
673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF | |
674 }; | |
675 | 661 |
676 // Returns true and sets result to "application/octet-stream" if the content | 662 // Returns true and sets result to "application/octet-stream" if the content |
677 // appears to be binary data. Otherwise, returns false and sets "text/plain". | 663 // appears to be binary data. Otherwise, returns false and sets "text/plain". |
678 // Clears have_enough_content if more data could possibly change the result. | 664 // Clears have_enough_content if more data could possibly change the result. |
679 static bool SniffBinary(const char* content, | 665 static bool SniffBinary(const char* content, |
680 size_t size, | 666 size_t size, |
681 bool* have_enough_content, | 667 bool* have_enough_content, |
682 std::string* result) { | 668 std::string* result) { |
683 // There is no concensus about exactly how to sniff for binary content. | 669 // There is no concensus about exactly how to sniff for binary content. |
684 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. | 670 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. |
(...skipping 13 matching lines...) Expand all Loading... | |
698 std::string unused; | 684 std::string unused; |
699 if (CheckForMagicNumbers(content, size, | 685 if (CheckForMagicNumbers(content, size, |
700 kByteOrderMark, arraysize(kByteOrderMark), | 686 kByteOrderMark, arraysize(kByteOrderMark), |
701 counter, &unused)) { | 687 counter, &unused)) { |
702 // If there is BOM, we think the buffer is not binary. | 688 // If there is BOM, we think the buffer is not binary. |
703 result->assign("text/plain"); | 689 result->assign("text/plain"); |
704 return false; | 690 return false; |
705 } | 691 } |
706 | 692 |
707 // Next we look to see if any of the bytes "look binary." | 693 // Next we look to see if any of the bytes "look binary." |
708 for (size_t i = 0; i < size; ++i) { | 694 if (ContainsControlCodes(content, size)) { |
709 // If we a see a binary-looking byte, we think the content is binary. | 695 result->assign("application/octet-stream"); |
710 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) { | 696 return true; |
711 result->assign("application/octet-stream"); | |
712 return true; | |
713 } | |
714 } | 697 } |
715 | 698 |
716 // No evidence either way. Default to non-binary and, if truncated, clear | 699 // No evidence either way. Default to non-binary and, if truncated, clear |
717 // have_enough_content because there could be a binary looking byte in the | 700 // have_enough_content because there could be a binary looking byte in the |
718 // truncated data. | 701 // truncated data. |
719 *have_enough_content &= is_truncated; | 702 *have_enough_content &= is_truncated; |
720 result->assign("text/plain"); | 703 result->assign("text/plain"); |
721 return false; | 704 return false; |
722 } | 705 } |
723 | 706 |
(...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
963 std::string* result) { | 946 std::string* result) { |
964 // First check the extra table. | 947 // First check the extra table. |
965 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, | 948 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, |
966 arraysize(kExtraMagicNumbers), NULL, result)) | 949 arraysize(kExtraMagicNumbers), NULL, result)) |
967 return true; | 950 return true; |
968 // Finally check the original table. | 951 // Finally check the original table. |
969 return CheckForMagicNumbers(content, size, kMagicNumbers, | 952 return CheckForMagicNumbers(content, size, kMagicNumbers, |
970 arraysize(kMagicNumbers), NULL, result); | 953 arraysize(kMagicNumbers), NULL, result); |
971 } | 954 } |
972 | 955 |
956 bool ContainsControlCodes(const char* content, size_t size) { | |
asanka
2015/04/29 23:17:44
Strictly speaking, this is a much fuzzier predicat
Adam Rice
2015/05/07 16:52:50
Done.
| |
957 for (size_t i = 0; i < size; ++i) { | |
958 uint8 byte = static_cast<uint8>(content[i]); | |
959 if (byte < 0x20 && (kBinaryBits & (1 << byte))) | |
960 return true; | |
961 } | |
962 return false; | |
963 } | |
964 | |
973 } // namespace net | 965 } // namespace net |
OLD | NEW |