Index: net/base/mime_sniffer.cc |
diff --git a/net/base/mime_sniffer.cc b/net/base/mime_sniffer.cc |
index 0d1f1169907bd91d1f93404afeee54a7e7e197e9..337759cd6f788b416f53f48be7f23d72ae870d3d 100644 |
--- a/net/base/mime_sniffer.cc |
+++ b/net/base/mime_sniffer.cc |
@@ -652,26 +652,12 @@ static const MagicNumber kByteOrderMark[] = { |
MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
}; |
-// Whether a given byte looks like it might be part of binary content. |
-// Source: HTML5 spec |
-static char kByteLooksBinary[] = { |
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F |
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF |
-}; |
+// The bytes which are considered to be "binary" are all < 0x20. Encode them one |
+// bit per byte, with 1 for a "binary" bit, and 0 for a "text" bit. The |
+// least-significant bit represents byte 0x00, the most-significant bit |
+// represents byte 0x1F. |
+// 0b11110111111111111100100111111111 |
+static const uint32 kBinaryBits = 0xf7ffc9ff; |
asanka
2015/04/29 23:17:44
Let's try to make this a bit more readable.
kByte
Adam Rice
2015/05/07 16:52:50
Done.
|
// Returns true and sets result to "application/octet-stream" if the content |
// appears to be binary data. Otherwise, returns false and sets "text/plain". |
@@ -705,12 +691,9 @@ static bool SniffBinary(const char* content, |
} |
// Next we look to see if any of the bytes "look binary." |
- for (size_t i = 0; i < size; ++i) { |
- // If we a see a binary-looking byte, we think the content is binary. |
- if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) { |
- result->assign("application/octet-stream"); |
- return true; |
- } |
+ if (ContainsControlCodes(content, size)) { |
+ result->assign("application/octet-stream"); |
+ return true; |
} |
// No evidence either way. Default to non-binary and, if truncated, clear |
@@ -970,4 +953,13 @@ bool SniffMimeTypeFromLocalData(const char* content, |
arraysize(kMagicNumbers), NULL, result); |
} |
+bool ContainsControlCodes(const char* content, size_t size) { |
asanka
2015/04/29 23:17:44
Strictly speaking, this is a much fuzzier predicat
Adam Rice
2015/05/07 16:52:50
Done.
|
+ for (size_t i = 0; i < size; ++i) { |
+ uint8 byte = static_cast<uint8>(content[i]); |
+ if (byte < 0x20 && (kBinaryBits & (1 << byte))) |
+ return true; |
+ } |
+ return false; |
+} |
+ |
} // namespace net |