Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
| 6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
| 7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
| 8 // | 8 // |
| 9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
| 10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
| (...skipping 634 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 645 return pos < end; | 645 return pos < end; |
| 646 } | 646 } |
| 647 | 647 |
| 648 // Byte order marks | 648 // Byte order marks |
| 649 static const MagicNumber kByteOrderMark[] = { | 649 static const MagicNumber kByteOrderMark[] = { |
| 650 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE | 650 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE |
| 651 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE | 651 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE |
| 652 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 | 652 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
| 653 }; | 653 }; |
| 654 | 654 |
| 655 // Whether a given byte looks like it might be part of binary content. | 655 // The bytes which are considered to be "binary" are all < 0x20. Encode them one |
| 656 // Source: HTML5 spec | 656 // bit per byte, with 1 for a "binary" bit, and 0 for a "text" bit. The |
| 657 static char kByteLooksBinary[] = { | 657 // least-significant bit represents byte 0x00, the most-significant bit |
| 658 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F | 658 // represents byte 0x1F. |
| 659 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F | 659 // 0b11110111111111111100100111111111 |
| 660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F | 660 static const uint32 kBinaryBits = 0xf7ffc9ff; |
|
asanka
2015/04/29 23:17:44
Let's try to make this a bit more readable.
kByte
Adam Rice
2015/05/07 16:52:50
Done.
| |
| 661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F | |
| 662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F | |
| 663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F | |
| 664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F | |
| 665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F | |
| 666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F | |
| 667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F | |
| 668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF | |
| 669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF | |
| 670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF | |
| 671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF | |
| 672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF | |
| 673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF | |
| 674 }; | |
| 675 | 661 |
| 676 // Returns true and sets result to "application/octet-stream" if the content | 662 // Returns true and sets result to "application/octet-stream" if the content |
| 677 // appears to be binary data. Otherwise, returns false and sets "text/plain". | 663 // appears to be binary data. Otherwise, returns false and sets "text/plain". |
| 678 // Clears have_enough_content if more data could possibly change the result. | 664 // Clears have_enough_content if more data could possibly change the result. |
| 679 static bool SniffBinary(const char* content, | 665 static bool SniffBinary(const char* content, |
| 680 size_t size, | 666 size_t size, |
| 681 bool* have_enough_content, | 667 bool* have_enough_content, |
| 682 std::string* result) { | 668 std::string* result) { |
| 683 // There is no concensus about exactly how to sniff for binary content. | 669 // There is no concensus about exactly how to sniff for binary content. |
| 684 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. | 670 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 698 std::string unused; | 684 std::string unused; |
| 699 if (CheckForMagicNumbers(content, size, | 685 if (CheckForMagicNumbers(content, size, |
| 700 kByteOrderMark, arraysize(kByteOrderMark), | 686 kByteOrderMark, arraysize(kByteOrderMark), |
| 701 counter, &unused)) { | 687 counter, &unused)) { |
| 702 // If there is BOM, we think the buffer is not binary. | 688 // If there is BOM, we think the buffer is not binary. |
| 703 result->assign("text/plain"); | 689 result->assign("text/plain"); |
| 704 return false; | 690 return false; |
| 705 } | 691 } |
| 706 | 692 |
| 707 // Next we look to see if any of the bytes "look binary." | 693 // Next we look to see if any of the bytes "look binary." |
| 708 for (size_t i = 0; i < size; ++i) { | 694 if (ContainsControlCodes(content, size)) { |
| 709 // If we a see a binary-looking byte, we think the content is binary. | 695 result->assign("application/octet-stream"); |
| 710 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) { | 696 return true; |
| 711 result->assign("application/octet-stream"); | |
| 712 return true; | |
| 713 } | |
| 714 } | 697 } |
| 715 | 698 |
| 716 // No evidence either way. Default to non-binary and, if truncated, clear | 699 // No evidence either way. Default to non-binary and, if truncated, clear |
| 717 // have_enough_content because there could be a binary looking byte in the | 700 // have_enough_content because there could be a binary looking byte in the |
| 718 // truncated data. | 701 // truncated data. |
| 719 *have_enough_content &= is_truncated; | 702 *have_enough_content &= is_truncated; |
| 720 result->assign("text/plain"); | 703 result->assign("text/plain"); |
| 721 return false; | 704 return false; |
| 722 } | 705 } |
| 723 | 706 |
| (...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 963 std::string* result) { | 946 std::string* result) { |
| 964 // First check the extra table. | 947 // First check the extra table. |
| 965 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, | 948 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, |
| 966 arraysize(kExtraMagicNumbers), NULL, result)) | 949 arraysize(kExtraMagicNumbers), NULL, result)) |
| 967 return true; | 950 return true; |
| 968 // Finally check the original table. | 951 // Finally check the original table. |
| 969 return CheckForMagicNumbers(content, size, kMagicNumbers, | 952 return CheckForMagicNumbers(content, size, kMagicNumbers, |
| 970 arraysize(kMagicNumbers), NULL, result); | 953 arraysize(kMagicNumbers), NULL, result); |
| 971 } | 954 } |
| 972 | 955 |
| 956 bool ContainsControlCodes(const char* content, size_t size) { | |
|
asanka
2015/04/29 23:17:44
Strictly speaking, this is a much fuzzier predicat
Adam Rice
2015/05/07 16:52:50
Done.
| |
| 957 for (size_t i = 0; i < size; ++i) { | |
| 958 uint8 byte = static_cast<uint8>(content[i]); | |
| 959 if (byte < 0x20 && (kBinaryBits & (1 << byte))) | |
| 960 return true; | |
| 961 } | |
| 962 return false; | |
| 963 } | |
| 964 | |
| 973 } // namespace net | 965 } // namespace net |
| OLD | NEW |