Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Side by Side Diff: net/base/mime_sniffer.cc

Issue 1058003005: Mime sniffer: reduce table from 256 bytes to 4 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Compile fix for MSVC 64-bit Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « net/base/mime_sniffer.h ('k') | net/base/mime_sniffer_perftest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Detecting mime types is a tricky business because we need to balance 5 // Detecting mime types is a tricky business because we need to balance
6 // compatibility concerns with security issues. Here is a survey of how other 6 // compatibility concerns with security issues. Here is a survey of how other
7 // browsers behave and then a description of how we intend to behave. 7 // browsers behave and then a description of how we intend to behave.
8 // 8 //
9 // HTML payload, no Content-Type header: 9 // HTML payload, no Content-Type header:
10 // * IE 7: Render as HTML 10 // * IE 7: Render as HTML
(...skipping 634 matching lines...) Expand 10 before | Expand all | Expand 10 after
645 return pos < end; 645 return pos < end;
646 } 646 }
647 647
648 // Byte order marks 648 // Byte order marks
649 static const MagicNumber kByteOrderMark[] = { 649 static const MagicNumber kByteOrderMark[] = {
650 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE 650 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE
651 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE 651 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE
652 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 652 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8
653 }; 653 };
654 654
655 // Whether a given byte looks like it might be part of binary content.
656 // Source: HTML5 spec
657 static char kByteLooksBinary[] = {
658 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F
659 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F
660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F
661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F
662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F
663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F
664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F
665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F
666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F
667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F
668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF
669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF
670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF
671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF
672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF
673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF
674 };
675
676 // Returns true and sets result to "application/octet-stream" if the content 655 // Returns true and sets result to "application/octet-stream" if the content
677 // appears to be binary data. Otherwise, returns false and sets "text/plain". 656 // appears to be binary data. Otherwise, returns false and sets "text/plain".
678 // Clears have_enough_content if more data could possibly change the result. 657 // Clears have_enough_content if more data could possibly change the result.
679 static bool SniffBinary(const char* content, 658 static bool SniffBinary(const char* content,
680 size_t size, 659 size_t size,
681 bool* have_enough_content, 660 bool* have_enough_content,
682 std::string* result) { 661 std::string* result) {
683 // There is no concensus about exactly how to sniff for binary content. 662 // There is no concensus about exactly how to sniff for binary content.
684 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. 663 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension.
685 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. 664 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte.
(...skipping 12 matching lines...) Expand all
698 std::string unused; 677 std::string unused;
699 if (CheckForMagicNumbers(content, size, 678 if (CheckForMagicNumbers(content, size,
700 kByteOrderMark, arraysize(kByteOrderMark), 679 kByteOrderMark, arraysize(kByteOrderMark),
701 counter, &unused)) { 680 counter, &unused)) {
702 // If there is BOM, we think the buffer is not binary. 681 // If there is BOM, we think the buffer is not binary.
703 result->assign("text/plain"); 682 result->assign("text/plain");
704 return false; 683 return false;
705 } 684 }
706 685
707 // Next we look to see if any of the bytes "look binary." 686 // Next we look to see if any of the bytes "look binary."
708 for (size_t i = 0; i < size; ++i) { 687 if (LooksLikeBinary(content, size)) {
709 // If we a see a binary-looking byte, we think the content is binary. 688 result->assign("application/octet-stream");
710 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) { 689 return true;
711 result->assign("application/octet-stream");
712 return true;
713 }
714 } 690 }
715 691
716 // No evidence either way. Default to non-binary and, if truncated, clear 692 // No evidence either way. Default to non-binary and, if truncated, clear
717 // have_enough_content because there could be a binary looking byte in the 693 // have_enough_content because there could be a binary looking byte in the
718 // truncated data. 694 // truncated data.
719 *have_enough_content &= is_truncated; 695 *have_enough_content &= is_truncated;
720 result->assign("text/plain"); 696 result->assign("text/plain");
721 return false; 697 return false;
722 } 698 }
723 699
(...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after
963 std::string* result) { 939 std::string* result) {
964 // First check the extra table. 940 // First check the extra table.
965 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, 941 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers,
966 arraysize(kExtraMagicNumbers), NULL, result)) 942 arraysize(kExtraMagicNumbers), NULL, result))
967 return true; 943 return true;
968 // Finally check the original table. 944 // Finally check the original table.
969 return CheckForMagicNumbers(content, size, kMagicNumbers, 945 return CheckForMagicNumbers(content, size, kMagicNumbers,
970 arraysize(kMagicNumbers), NULL, result); 946 arraysize(kMagicNumbers), NULL, result);
971 } 947 }
972 948
949 bool LooksLikeBinary(const char* content, size_t size) {
950 // The definition of "binary bytes" is from the spec at
951 // https://mimesniff.spec.whatwg.org/#binary-data-byte
952 //
953 // The bytes which are considered to be "binary" are all < 0x20. Encode them
954 // one bit per byte, with 1 for a "binary" bit, and 0 for a "text" bit. The
955 // least-significant bit represents byte 0x00, the most-significant bit
956 // represents byte 0x1F.
957 const uint32 kBinaryBits =
958 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b');
959 for (size_t i = 0; i < size; ++i) {
960 uint8 byte = static_cast<uint8>(content[i]);
961 if (byte < 0x20 && (kBinaryBits & (1u << byte)))
962 return true;
963 }
964 return false;
965 }
966
973 } // namespace net 967 } // namespace net
OLDNEW
« no previous file with comments | « net/base/mime_sniffer.h ('k') | net/base/mime_sniffer_perftest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698