Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(130)

Unified Diff: net/base/mime_sniffer.cc

Issue 1058003005: Mime sniffer: reduce table from 256 bytes to 4 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Add unit tests for ContainsControlCodes() Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: net/base/mime_sniffer.cc
diff --git a/net/base/mime_sniffer.cc b/net/base/mime_sniffer.cc
index 0d1f1169907bd91d1f93404afeee54a7e7e197e9..337759cd6f788b416f53f48be7f23d72ae870d3d 100644
--- a/net/base/mime_sniffer.cc
+++ b/net/base/mime_sniffer.cc
@@ -652,26 +652,12 @@ static const MagicNumber kByteOrderMark[] = {
MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8
};
-// Whether a given byte looks like it might be part of binary content.
-// Source: HTML5 spec
-static char kByteLooksBinary[] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF
-};
+// The bytes which are considered to be "binary" are all < 0x20. Encode them one
+// bit per byte, with 1 for a "binary" bit, and 0 for a "text" bit. The
+// least-significant bit represents byte 0x00, the most-significant bit
+// represents byte 0x1F.
+// 0b11110111111111111100100111111111
+static const uint32 kBinaryBits = 0xf7ffc9ff;
asanka 2015/04/29 23:17:44 Let's try to make this a bit more readable. kByte
Adam Rice 2015/05/07 16:52:50 Done.
// Returns true and sets result to "application/octet-stream" if the content
// appears to be binary data. Otherwise, returns false and sets "text/plain".
@@ -705,12 +691,9 @@ static bool SniffBinary(const char* content,
}
// Next we look to see if any of the bytes "look binary."
- for (size_t i = 0; i < size; ++i) {
- // If we a see a binary-looking byte, we think the content is binary.
- if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) {
- result->assign("application/octet-stream");
- return true;
- }
+ if (ContainsControlCodes(content, size)) {
+ result->assign("application/octet-stream");
+ return true;
}
// No evidence either way. Default to non-binary and, if truncated, clear
@@ -970,4 +953,13 @@ bool SniffMimeTypeFromLocalData(const char* content,
arraysize(kMagicNumbers), NULL, result);
}
+bool ContainsControlCodes(const char* content, size_t size) {
asanka 2015/04/29 23:17:44 Strictly speaking, this is a much fuzzier predicat
Adam Rice 2015/05/07 16:52:50 Done.
+ for (size_t i = 0; i < size; ++i) {
+ uint8 byte = static_cast<uint8>(content[i]);
+ if (byte < 0x20 && (kBinaryBits & (1 << byte)))
+ return true;
+ }
+ return false;
+}
+
} // namespace net

Powered by Google App Engine
This is Rietveld 408576698