Chromium Code Reviews| Index: net/base/mime_sniffer.cc |
| diff --git a/net/base/mime_sniffer.cc b/net/base/mime_sniffer.cc |
| index 0d1f1169907bd91d1f93404afeee54a7e7e197e9..337759cd6f788b416f53f48be7f23d72ae870d3d 100644 |
| --- a/net/base/mime_sniffer.cc |
| +++ b/net/base/mime_sniffer.cc |
| @@ -652,26 +652,12 @@ static const MagicNumber kByteOrderMark[] = { |
| MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
| }; |
| -// Whether a given byte looks like it might be part of binary content. |
| -// Source: HTML5 spec |
| -static char kByteLooksBinary[] = { |
| - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F |
| - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF |
| -}; |
| +// The bytes which are considered to be "binary" are all < 0x20. Encode them one |
| +// bit per byte, with 1 for a "binary" bit, and 0 for a "text" bit. The |
| +// least-significant bit represents byte 0x00, the most-significant bit |
| +// represents byte 0x1F. |
| +// 0b11110111111111111100100111111111 |
| +static const uint32 kBinaryBits = 0xf7ffc9ff; |
|
asanka
2015/04/29 23:17:44
Let's try to make this a bit more readable.
kByte
Adam Rice
2015/05/07 16:52:50
Done.
|
| // Returns true and sets result to "application/octet-stream" if the content |
| // appears to be binary data. Otherwise, returns false and sets "text/plain". |
| @@ -705,12 +691,9 @@ static bool SniffBinary(const char* content, |
| } |
| // Next we look to see if any of the bytes "look binary." |
| - for (size_t i = 0; i < size; ++i) { |
| - // If we a see a binary-looking byte, we think the content is binary. |
| - if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) { |
| - result->assign("application/octet-stream"); |
| - return true; |
| - } |
| + if (ContainsControlCodes(content, size)) { |
| + result->assign("application/octet-stream"); |
| + return true; |
| } |
| // No evidence either way. Default to non-binary and, if truncated, clear |
| @@ -970,4 +953,13 @@ bool SniffMimeTypeFromLocalData(const char* content, |
| arraysize(kMagicNumbers), NULL, result); |
| } |
| +bool ContainsControlCodes(const char* content, size_t size) { |
|
asanka
2015/04/29 23:17:44
Strictly speaking, this is a much fuzzier predicat
Adam Rice
2015/05/07 16:52:50
Done.
|
| + for (size_t i = 0; i < size; ++i) { |
| + uint8 byte = static_cast<uint8>(content[i]); |
| + if (byte < 0x20 && (kBinaryBits & (1 << byte))) |
| + return true; |
| + } |
| + return false; |
| +} |
| + |
| } // namespace net |