Index: net/base/mime_sniffer.cc |
diff --git a/net/base/mime_sniffer.cc b/net/base/mime_sniffer.cc |
index ef2e27030bb5f642477362def29a683e5ad3a544..422e3f2d7901c578d286573ecfb9ee112ab22029 100644 |
--- a/net/base/mime_sniffer.cc |
+++ b/net/base/mime_sniffer.cc |
@@ -117,77 +117,78 @@ struct MagicNumber { |
const char* mask; // if set, must have same length as |magic| |
}; |
-#define MAGIC_NUMBER(mime_type, magic) \ |
- { (mime_type), (magic), sizeof(magic)-1, false, NULL }, |
+#define MAGIC_NUMBER(mime_type, magic) \ |
+ { (mime_type), (magic), sizeof(magic) - 1, false, NULL } \ |
+ , |
mmenke
2014/10/10 18:12:39
Hrm...That comma change is really weird.
|
template <int MagicSize, int MaskSize> |
class VerifySizes { |
COMPILE_ASSERT(MagicSize == MaskSize, sizes_must_be_equal); |
+ |
public: |
enum { SIZES = MagicSize }; |
}; |
#define verified_sizeof(magic, mask) \ |
-VerifySizes<sizeof(magic), sizeof(mask)>::SIZES |
+ VerifySizes<sizeof(magic), sizeof(mask)>::SIZES |
-#define MAGIC_MASK(mime_type, magic, mask) \ |
- { (mime_type), (magic), verified_sizeof(magic, mask)-1, false, (mask) }, |
+#define MAGIC_MASK(mime_type, magic, mask) \ |
+ { (mime_type), (magic), verified_sizeof(magic, mask) - 1, false, (mask) } \ |
+ , |
// Magic strings are case insensitive and must not include '\0' characters |
-#define MAGIC_STRING(mime_type, magic) \ |
- { (mime_type), (magic), sizeof(magic)-1, true, NULL }, |
+#define MAGIC_STRING(mime_type, magic) \ |
+ { (mime_type), (magic), sizeof(magic) - 1, true, NULL } \ |
+ , |
static const MagicNumber kMagicNumbers[] = { |
- // Source: HTML 5 specification |
- MAGIC_NUMBER("application/pdf", "%PDF-") |
- MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") |
- MAGIC_NUMBER("image/gif", "GIF87a") |
- MAGIC_NUMBER("image/gif", "GIF89a") |
- MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A") |
- MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") |
- MAGIC_NUMBER("image/bmp", "BM") |
- // Source: Mozilla |
- MAGIC_NUMBER("text/plain", "#!") // Script |
- MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS |
- MAGIC_NUMBER("text/plain", "From") |
- MAGIC_NUMBER("text/plain", ">From") |
- // Chrome specific |
- MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08") |
- MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46") |
- MAGIC_NUMBER("video/x-ms-asf", |
- "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C") |
- MAGIC_NUMBER("image/tiff", "I I") |
- MAGIC_NUMBER("image/tiff", "II*") |
- MAGIC_NUMBER("image/tiff", "MM\x00*") |
- MAGIC_NUMBER("audio/mpeg", "ID3") |
- MAGIC_NUMBER("image/webp", "RIFF....WEBPVP8 ") |
- MAGIC_NUMBER("video/webm", "\x1A\x45\xDF\xA3") |
- // TODO(abarth): we don't handle partial byte matches yet |
- // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB") |
- // MAGIC_NUMBER("audio/mpeg", "\xFF\xE") |
- // MAGIC_NUMBER("audio/mpeg", "\xFF\xF") |
- MAGIC_NUMBER("application/zip", "PK\x03\x04") |
- MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00") |
- MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A") |
- MAGIC_NUMBER("application/octet-stream", "MZ") // EXE |
- // Sniffing for Flash: |
- // |
- // MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
- // MAGIC_NUMBER("application/x-shockwave-flash", "FLV") |
- // MAGIC_NUMBER("application/x-shockwave-flash", "FWS") |
- // |
- // Including these magic number for Flash is a trade off. |
- // |
- // Pros: |
- // * Flash is an important and popular file format |
- // |
- // Cons: |
- // * These patterns are fairly weak |
- // * If we mistakenly decide something is Flash, we will execute it |
- // in the origin of an unsuspecting site. This could be a security |
- // vulnerability if the site allows users to upload content. |
- // |
- // On balance, we do not include these patterns. |
+ // Source: HTML 5 specification |
+ MAGIC_NUMBER("application/pdf", "%PDF-") |
+ MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") |
+ MAGIC_NUMBER("image/gif", "GIF87a") MAGIC_NUMBER("image/gif", "GIF89a") |
+ MAGIC_NUMBER("image/png", |
+ "\x89" |
+ "PNG\x0D\x0A\x1A\x0A") |
+ MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") MAGIC_NUMBER("image/bmp", "BM") |
+ // Source: Mozilla |
+ MAGIC_NUMBER("text/plain", "#!") // Script |
+ MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS |
+ MAGIC_NUMBER("text/plain", "From") MAGIC_NUMBER("text/plain", ">From") |
+ // Chrome specific |
+ MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08") |
+ MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46") MAGIC_NUMBER( |
+ "video/x-ms-asf", |
+ "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C") |
+ MAGIC_NUMBER("image/tiff", "I I") MAGIC_NUMBER("image/tiff", "II*") |
+ MAGIC_NUMBER("image/tiff", "MM\x00*") MAGIC_NUMBER("audio/mpeg", "ID3") |
+ MAGIC_NUMBER("image/webp", "RIFF....WEBPVP8 ") |
+ MAGIC_NUMBER("video/webm", "\x1A\x45\xDF\xA3") |
+ // TODO(abarth): we don't handle partial byte matches yet |
+ // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB") |
+ // MAGIC_NUMBER("audio/mpeg", "\xFF\xE") |
+ // MAGIC_NUMBER("audio/mpeg", "\xFF\xF") |
+ MAGIC_NUMBER("application/zip", "PK\x03\x04") |
+ MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00") |
+ MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A") |
+ MAGIC_NUMBER("application/octet-stream", "MZ") // EXE |
+ // Sniffing for Flash: |
+ // |
+ // MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
+ // MAGIC_NUMBER("application/x-shockwave-flash", "FLV") |
+ // MAGIC_NUMBER("application/x-shockwave-flash", "FWS") |
+ // |
+ // Including these magic number for Flash is a trade off. |
+ // |
+ // Pros: |
+ // * Flash is an important and popular file format |
+ // |
+ // Cons: |
+ // * These patterns are fairly weak |
+ // * If we mistakenly decide something is Flash, we will execute it |
+ // in the origin of an unsuspecting site. This could be a security |
+ // vulnerability if the site allows users to upload content. |
+ // |
+ // On balance, we do not include these patterns. |
}; |
// The number of content bytes we need to use all our Microsoft Office magic |
@@ -195,9 +196,8 @@ static const MagicNumber kMagicNumbers[] = { |
static const size_t kBytesRequiredForOfficeMagic = 8; |
static const MagicNumber kOfficeMagicNumbers[] = { |
- MAGIC_NUMBER("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1") |
- MAGIC_NUMBER("OOXML", "PK\x03\x04") |
-}; |
+ MAGIC_NUMBER("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1") |
+ MAGIC_NUMBER("OOXML", "PK\x03\x04")}; |
mmenke
2014/10/10 18:12:39
Not putting the close brace on its own line seems
|
enum OfficeDocType { |
DOC_TYPE_WORD, |
@@ -212,90 +212,90 @@ struct OfficeExtensionType { |
size_t extension_len; |
}; |
-#define OFFICE_EXTENSION(type, extension) \ |
- { (type), (extension), sizeof(extension) - 1 }, |
+#define OFFICE_EXTENSION(type, extension) \ |
+ { (type), (extension), sizeof(extension) - 1 } \ |
+ , |
static const OfficeExtensionType kOfficeExtensionTypes[] = { |
- OFFICE_EXTENSION(DOC_TYPE_WORD, ".doc") |
- OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xls") |
- OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".ppt") |
- OFFICE_EXTENSION(DOC_TYPE_WORD, ".docx") |
- OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xlsx") |
- OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".pptx") |
-}; |
+ OFFICE_EXTENSION(DOC_TYPE_WORD, ".doc") |
+ OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xls") |
+ OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".ppt") |
+ OFFICE_EXTENSION(DOC_TYPE_WORD, ".docx") |
+ OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xlsx") |
+ OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".pptx")}; |
static const MagicNumber kExtraMagicNumbers[] = { |
- MAGIC_NUMBER("image/x-xbitmap", "#define") |
- MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00") |
- MAGIC_NUMBER("image/svg+xml", "<?xml_version=") |
- MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ") |
- MAGIC_NUMBER("video/avi", "RIFF....AVI LIST") |
- MAGIC_NUMBER("audio/ogg", "OggS") |
- MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0") |
- MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0") |
- MAGIC_NUMBER("video/3gpp", "....ftyp3g") |
- MAGIC_NUMBER("video/3gpp", "....ftypavcl") |
- MAGIC_NUMBER("video/mp4", "....ftyp") |
- MAGIC_NUMBER("video/quicktime", "....moov") |
- MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
- MAGIC_NUMBER("application/x-shockwave-flash", "FWS") |
- MAGIC_NUMBER("video/x-flv", "FLV") |
- MAGIC_NUMBER("audio/x-flac", "fLaC") |
- |
- // RAW image types. |
- MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR") |
- MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR") |
- MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM") |
- MAGIC_NUMBER("image/x-olympus-orf", "MMOR") // big-endian |
- MAGIC_NUMBER("image/x-olympus-orf", "IIRO") // little-endian |
- MAGIC_NUMBER("image/x-olympus-orf", "IIRS") // little-endian |
- MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW ") |
- MAGIC_NUMBER("image/x-panasonic-raw", |
- "IIU\x00\x08\x00\x00\x00") // Panasonic .raw |
- MAGIC_NUMBER("image/x-panasonic-raw", |
- "IIU\x00\x18\x00\x00\x00") // Panasonic .rw2 |
- MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw") |
- MAGIC_NUMBER("image/x-x3f", "FOVb") |
-}; |
+ MAGIC_NUMBER("image/x-xbitmap", "#define") |
+ MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00") |
+ MAGIC_NUMBER("image/svg+xml", "<?xml_version=") |
+ MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ") |
+ MAGIC_NUMBER("video/avi", "RIFF....AVI LIST") |
+ MAGIC_NUMBER("audio/ogg", "OggS") |
+ MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0") |
+ MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0") |
+ MAGIC_NUMBER("video/3gpp", "....ftyp3g") |
+ MAGIC_NUMBER("video/3gpp", "....ftypavcl") |
+ MAGIC_NUMBER("video/mp4", "....ftyp") |
+ MAGIC_NUMBER("video/quicktime", "....moov") |
+ MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
+ MAGIC_NUMBER("application/x-shockwave-flash", "FWS") |
+ MAGIC_NUMBER("video/x-flv", "FLV") MAGIC_NUMBER("audio/x-flac", "fLaC") |
+ |
+ // RAW image types. |
+ MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR") |
+ MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR") |
+ MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM") |
+ MAGIC_NUMBER("image/x-olympus-orf", "MMOR") // big-endian |
+ MAGIC_NUMBER("image/x-olympus-orf", "IIRO") // little-endian |
+ MAGIC_NUMBER("image/x-olympus-orf", "IIRS") // little-endian |
+ MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW ") |
+ MAGIC_NUMBER("image/x-panasonic-raw", |
+ "IIU\x00\x08\x00\x00\x00") // Panasonic .raw |
+ MAGIC_NUMBER("image/x-panasonic-raw", |
+ "IIU\x00\x18\x00\x00\x00") // Panasonic .rw2 |
+ MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw") |
+ MAGIC_NUMBER("image/x-x3f", "FOVb")}; |
// Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will |
// decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is |
// HTML, but we will not. |
-#define MAGIC_HTML_TAG(tag) \ |
- MAGIC_STRING("text/html", "<" tag) |
+#define MAGIC_HTML_TAG(tag) MAGIC_STRING("text/html", "<" tag) |
static const MagicNumber kSniffableTags[] = { |
- // XML processing directive. Although this is not an HTML mime type, we sniff |
- // for this in the HTML phase because text/xml is just as powerful as HTML and |
- // we want to leverage our white space skipping technology. |
- MAGIC_NUMBER("text/xml", "<?xml") // Mozilla |
- // DOCTYPEs |
- MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec |
- // Sniffable tags, ordered by how often they occur in sniffable documents. |
- MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla |
- MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla |
- MAGIC_HTML_TAG("!--") |
- MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla |
- MAGIC_HTML_TAG("iframe") // Mozilla |
- MAGIC_HTML_TAG("h1") // Mozilla |
- MAGIC_HTML_TAG("div") // Mozilla |
- MAGIC_HTML_TAG("font") // Mozilla |
- MAGIC_HTML_TAG("table") // Mozilla |
- MAGIC_HTML_TAG("a") // Mozilla |
- MAGIC_HTML_TAG("style") // Mozilla |
- MAGIC_HTML_TAG("title") // Mozilla |
- MAGIC_HTML_TAG("b") // Mozilla |
- MAGIC_HTML_TAG("body") // Mozilla |
- MAGIC_HTML_TAG("br") |
- MAGIC_HTML_TAG("p") // Mozilla |
+ // XML processing directive. Although this is not an HTML mime type, we |
+ // sniff |
+ // for this in the HTML phase because text/xml is just as powerful as HTML |
+ // and |
+ // we want to leverage our white space skipping technology. |
+ MAGIC_NUMBER("text/xml", "<?xml") // Mozilla |
+ // DOCTYPEs |
+ MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec |
+ // Sniffable tags, ordered by how often they occur in sniffable documents. |
+ MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla |
+ MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla |
+ MAGIC_HTML_TAG("!--") MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla |
+ MAGIC_HTML_TAG("iframe") // Mozilla |
+ MAGIC_HTML_TAG("h1") // Mozilla |
+ MAGIC_HTML_TAG("div") // Mozilla |
+ MAGIC_HTML_TAG("font") // Mozilla |
+ MAGIC_HTML_TAG("table") // Mozilla |
+ MAGIC_HTML_TAG("a") // Mozilla |
+ MAGIC_HTML_TAG("style") // Mozilla |
+ MAGIC_HTML_TAG("title") // Mozilla |
+ MAGIC_HTML_TAG("b") // Mozilla |
+ MAGIC_HTML_TAG("body") // Mozilla |
+ MAGIC_HTML_TAG("br") MAGIC_HTML_TAG("p") // Mozilla |
}; |
static base::HistogramBase* UMASnifferHistogramGet(const char* name, |
int array_size) { |
- base::HistogramBase* counter = |
- base::LinearHistogram::FactoryGet(name, 1, array_size - 1, array_size, |
- base::HistogramBase::kUmaTargetedHistogramFlag); |
+ base::HistogramBase* counter = base::LinearHistogram::FactoryGet( |
+ name, |
+ 1, |
+ array_size - 1, |
+ array_size, |
+ base::HistogramBase::kUmaTargetedHistogramFlag); |
return counter; |
} |
@@ -368,13 +368,16 @@ static bool MatchMagicNumber(const char* content, |
return false; |
} |
-static bool CheckForMagicNumbers(const char* content, size_t size, |
- const MagicNumber* magic, size_t magic_len, |
+static bool CheckForMagicNumbers(const char* content, |
+ size_t size, |
+ const MagicNumber* magic, |
+ size_t magic_len, |
base::HistogramBase* counter, |
std::string* result) { |
for (size_t i = 0; i < magic_len; ++i) { |
if (MatchMagicNumber(content, size, magic[i], result)) { |
- if (counter) counter->Add(static_cast<int>(i)); |
+ if (counter) |
+ counter->Add(static_cast<int>(i)); |
return true; |
} |
} |
@@ -418,9 +421,12 @@ static bool SniffForHTML(const char* content, |
arraysize(kSniffableTags)); |
} |
// |pos| now points to first non-whitespace character (or at end). |
- return CheckForMagicNumbers(pos, end - pos, |
- kSniffableTags, arraysize(kSniffableTags), |
- counter, result); |
+ return CheckForMagicNumbers(pos, |
+ end - pos, |
+ kSniffableTags, |
+ arraysize(kSniffableTags), |
+ counter, |
+ result); |
} |
// Returns true and sets result if the content matches any of kMagicNumbers. |
@@ -437,9 +443,8 @@ static bool SniffForMagicNumbers(const char* content, |
counter = UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2", |
arraysize(kMagicNumbers)); |
} |
- return CheckForMagicNumbers(content, size, |
- kMagicNumbers, arraysize(kMagicNumbers), |
- counter, result); |
+ return CheckForMagicNumbers( |
+ content, size, kMagicNumbers, arraysize(kMagicNumbers), counter, result); |
} |
// Returns true and sets result if the content matches any of |
@@ -454,9 +459,12 @@ static bool SniffForOfficeDocs(const char* content, |
// Check our table of magic numbers for Office file types. |
std::string office_version; |
- if (!CheckForMagicNumbers(content, size, |
- kOfficeMagicNumbers, arraysize(kOfficeMagicNumbers), |
- NULL, &office_version)) |
+ if (!CheckForMagicNumbers(content, |
+ size, |
+ kOfficeMagicNumbers, |
+ arraysize(kOfficeMagicNumbers), |
+ NULL, |
+ &office_version)) |
return false; |
OfficeDocType type = DOC_TYPE_NONE; |
@@ -469,7 +477,8 @@ static bool SniffForOfficeDocs(const char* content, |
const char* extension = |
&url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len]; |
- if (0 == base::strncasecmp(extension, kOfficeExtensionTypes[i].extension, |
+ if (0 == base::strncasecmp(extension, |
+ kOfficeExtensionTypes[i].extension, |
kOfficeExtensionTypes[i].extension_len)) { |
type = kOfficeExtensionTypes[i].doc_type; |
break; |
@@ -497,16 +506,19 @@ static bool SniffForOfficeDocs(const char* content, |
} else if (office_version == "OOXML") { |
switch (type) { |
case DOC_TYPE_WORD: |
- *result = "application/vnd.openxmlformats-officedocument." |
- "wordprocessingml.document"; |
+ *result = |
+ "application/vnd.openxmlformats-officedocument." |
+ "wordprocessingml.document"; |
return true; |
case DOC_TYPE_EXCEL: |
- *result = "application/vnd.openxmlformats-officedocument." |
- "spreadsheetml.sheet"; |
+ *result = |
+ "application/vnd.openxmlformats-officedocument." |
+ "spreadsheetml.sheet"; |
return true; |
case DOC_TYPE_POWERPOINT: |
- *result = "application/vnd.openxmlformats-officedocument." |
- "presentationml.presentation"; |
+ *result = |
+ "application/vnd.openxmlformats-officedocument." |
+ "presentationml.presentation"; |
return true; |
case DOC_TYPE_NONE: |
NOTREACHED(); |
@@ -522,16 +534,20 @@ static bool IsOfficeType(const std::string& type_hint) { |
return (type_hint == "application/msword" || |
type_hint == "application/vnd.ms-excel" || |
type_hint == "application/vnd.ms-powerpoint" || |
- type_hint == "application/vnd.openxmlformats-officedocument." |
- "wordprocessingml.document" || |
- type_hint == "application/vnd.openxmlformats-officedocument." |
- "spreadsheetml.sheet" || |
- type_hint == "application/vnd.openxmlformats-officedocument." |
- "presentationml.presentation" || |
+ type_hint == |
+ "application/vnd.openxmlformats-officedocument." |
+ "wordprocessingml.document" || |
+ type_hint == |
+ "application/vnd.openxmlformats-officedocument." |
+ "spreadsheetml.sheet" || |
+ type_hint == |
+ "application/vnd.openxmlformats-officedocument." |
+ "presentationml.presentation" || |
type_hint == "application/vnd.ms-excel.sheet.macroenabled.12" || |
type_hint == "application/vnd.ms-word.document.macroenabled.12" || |
- type_hint == "application/vnd.ms-powerpoint.presentation." |
- "macroenabled.12" || |
+ type_hint == |
+ "application/vnd.ms-powerpoint.presentation." |
+ "macroenabled.12" || |
type_hint == "application/mspowerpoint" || |
type_hint == "application/msexcel" || |
type_hint == "application/vnd.ms-word" || |
@@ -557,9 +573,12 @@ static bool SniffForInvalidOfficeDocs(const char* content, |
// Check our table of magic numbers for Office file types. If it does not |
// match one, the MIME type was invalid. Set it instead to a safe value. |
std::string office_version; |
- if (!CheckForMagicNumbers(content, size, |
- kOfficeMagicNumbers, arraysize(kOfficeMagicNumbers), |
- NULL, &office_version)) { |
+ if (!CheckForMagicNumbers(content, |
+ size, |
+ kOfficeMagicNumbers, |
+ arraysize(kOfficeMagicNumbers), |
+ NULL, |
+ &office_version)) { |
*result = "application/octet-stream"; |
} |
@@ -570,14 +589,14 @@ static bool SniffForInvalidOfficeDocs(const char* content, |
// Byte order marks |
static const MagicNumber kMagicXML[] = { |
- // We want to be very conservative in interpreting text/xml content as |
- // XHTML -- we just want to sniff enough to make unit tests pass. |
- // So we match explicitly on this, and don't match other ways of writing |
- // it in semantically-equivalent ways. |
- MAGIC_STRING("application/xhtml+xml", |
- "<html xmlns=\"http://www.w3.org/1999/xhtml\"") |
- MAGIC_STRING("application/atom+xml", "<feed") |
- MAGIC_STRING("application/rss+xml", "<rss") // UTF-8 |
+ // We want to be very conservative in interpreting text/xml content as |
+ // XHTML -- we just want to sniff enough to make unit tests pass. |
+ // So we match explicitly on this, and don't match other ways of writing |
+ // it in semantically-equivalent ways. |
+ MAGIC_STRING("application/xhtml+xml", |
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\"") |
+ MAGIC_STRING("application/atom+xml", "<feed") |
+ MAGIC_STRING("application/rss+xml", "<rss") // UTF-8 |
}; |
// Returns true and sets result if the content appears to contain XHTML or a |
@@ -603,8 +622,8 @@ static bool SniffXML(const char* content, |
// based on the name (or possibly attributes) of that tag. |
static base::HistogramBase* counter(NULL); |
if (!counter) { |
- counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2", |
- arraysize(kMagicXML)); |
+ counter = |
+ UMASnifferHistogramGet("mime_sniffer.kMagicXML2", arraysize(kMagicXML)); |
} |
const int kMaxTagIterations = 5; |
for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { |
@@ -616,16 +635,15 @@ static bool SniffXML(const char* content, |
// Skip XML declarations. |
++pos; |
continue; |
- } else if (base::strncasecmp(pos, "<!DOCTYPE", |
- sizeof("<!DOCTYPE") - 1) == 0) { |
+ } else if (base::strncasecmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE") - 1) == |
+ 0) { |
mmenke
2014/10/10 18:12:39
Think this is pretty ugly - I find no extra indent
|
// Skip DOCTYPE declarations. |
++pos; |
continue; |
} |
- if (CheckForMagicNumbers(pos, end - pos, |
- kMagicXML, arraysize(kMagicXML), |
- counter, result)) |
+ if (CheckForMagicNumbers( |
+ pos, end - pos, kMagicXML, arraysize(kMagicXML), counter, result)) |
return true; |
mmenke
2014/10/10 18:12:39
This is a style violation - when an if body takes
|
// TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult |
@@ -644,30 +662,30 @@ static bool SniffXML(const char* content, |
// Byte order marks |
static const MagicNumber kByteOrderMark[] = { |
- MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE |
- MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE |
- MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
+ MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE |
+ MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE |
+ MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
}; |
// Whether a given byte looks like it might be part of binary content. |
// Source: HTML5 spec |
static char kByteLooksBinary[] = { |
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F |
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF |
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF |
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F |
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF |
}; |
// Returns true and sets result to "application/octet-stream" if the content |
@@ -693,9 +711,12 @@ static bool SniffBinary(const char* content, |
arraysize(kByteOrderMark)); |
} |
std::string unused; |
- if (CheckForMagicNumbers(content, size, |
- kByteOrderMark, arraysize(kByteOrderMark), |
- counter, &unused)) { |
+ if (CheckForMagicNumbers(content, |
+ size, |
+ kByteOrderMark, |
+ arraysize(kByteOrderMark), |
+ counter, |
+ &unused)) { |
// If there is BOM, we think the buffer is not binary. |
result->assign("text/plain"); |
return false; |
@@ -722,14 +743,14 @@ static bool IsUnknownMimeType(const std::string& mime_type) { |
// TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here. |
// If we do, please be careful not to alter the semantics at all. |
static const char* kUnknownMimeTypes[] = { |
- // Empty mime types are as unknown as they get. |
- "", |
- // The unknown/unknown type is popular and uninformative |
- "unknown/unknown", |
- // The second most popular unknown mime type is application/unknown |
- "application/unknown", |
- // Firefox rejects a mime type if it is exactly */* |
- "*/*", |
+ // Empty mime types are as unknown as they get. |
+ "", |
+ // The unknown/unknown type is popular and uninformative |
+ "unknown/unknown", |
+ // The second most popular unknown mime type is application/unknown |
+ "application/unknown", |
+ // Firefox rejects a mime type if it is exactly */* |
+ "*/*", |
}; |
static base::HistogramBase* counter(NULL); |
if (!counter) { |
@@ -771,8 +792,7 @@ static bool SniffCRX(const char* content, |
// TODO(aa): If we ever have another magic number, we'll want to pass a |
// histogram into CheckForMagicNumbers(), below, to see which one matched. |
static const struct MagicNumber kCRXMagicNumbers[] = { |
- MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00") |
- }; |
+ MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")}; |
// Only consider files that have the extension ".crx". |
static const char kCRXExtension[] = ".crx"; |
@@ -786,9 +806,12 @@ static bool SniffCRX(const char* content, |
} |
*have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); |
- if (CheckForMagicNumbers(content, size, |
- kCRXMagicNumbers, arraysize(kCRXMagicNumbers), |
- NULL, result)) { |
+ if (CheckForMagicNumbers(content, |
+ size, |
+ kCRXMagicNumbers, |
+ arraysize(kCRXMagicNumbers), |
+ NULL, |
+ result)) { |
counter->Add(2); |
} else { |
return false; |
@@ -803,46 +826,50 @@ bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { |
should_sniff_counter = |
UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3); |
} |
- bool sniffable_scheme = url.is_empty() || |
- url.SchemeIsHTTPOrHTTPS() || |
+ bool sniffable_scheme = url.is_empty() || url.SchemeIsHTTPOrHTTPS() || |
url.SchemeIs("ftp") || |
#if defined(OS_ANDROID) |
url.SchemeIs("content") || |
#endif |
- url.SchemeIsFile() || |
- url.SchemeIsFileSystem(); |
+ url.SchemeIsFile() || url.SchemeIsFileSystem(); |
if (!sniffable_scheme) { |
should_sniff_counter->Add(1); |
return false; |
} |
- static const char* kSniffableTypes[] = { |
- // Many web servers are misconfigured to send text/plain for many |
- // different types of content. |
- "text/plain", |
- // We want to sniff application/octet-stream for |
- // application/x-chrome-extension, but nothing else. |
- "application/octet-stream", |
- // XHTML and Atom/RSS feeds are often served as plain xml instead of |
- // their more specific mime types. |
- "text/xml", |
- "application/xml", |
- // Check for false Microsoft Office MIME types. |
- "application/msword", |
- "application/vnd.ms-excel", |
- "application/vnd.ms-powerpoint", |
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document", |
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
- "application/vnd.openxmlformats-officedocument.presentationml.presentation", |
- "application/vnd.ms-excel.sheet.macroenabled.12", |
- "application/vnd.ms-word.document.macroenabled.12", |
- "application/vnd.ms-powerpoint.presentation.macroenabled.12", |
- "application/mspowerpoint", |
- "application/msexcel", |
- "application/vnd.ms-word", |
- "application/vnd.ms-word.document.12", |
- "application/vnd.msword", |
- }; |
+ static const char* |
+ kSniffableTypes |
+ [] = {// Many web servers are misconfigured to send text/plain for |
mmenke
2014/10/10 18:12:39
Just no.
|
+ // many |
+ // different types of content. |
+ "text/plain", |
+ // We want to sniff application/octet-stream for |
+ // application/x-chrome-extension, but nothing else. |
+ "application/octet-stream", |
+ // XHTML and Atom/RSS feeds are often served as plain xml |
+ // instead of |
+ // their more specific mime types. |
+ "text/xml", |
+ "application/xml", |
+ // Check for false Microsoft Office MIME types. |
+ "application/msword", |
+ "application/vnd.ms-excel", |
+ "application/vnd.ms-powerpoint", |
+ "application/" |
+ "vnd.openxmlformats-officedocument.wordprocessingml.document", |
+ "application/" |
+ "vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
+ "application/" |
+ "vnd.openxmlformats-officedocument.presentationml.presentation", |
+ "application/vnd.ms-excel.sheet.macroenabled.12", |
+ "application/vnd.ms-word.document.macroenabled.12", |
+ "application/vnd.ms-powerpoint.presentation.macroenabled.12", |
+ "application/mspowerpoint", |
+ "application/msexcel", |
+ "application/vnd.ms-word", |
+ "application/vnd.ms-word.document.12", |
+ "application/vnd.msword", |
+ }; |
static base::HistogramBase* counter(NULL); |
if (!counter) { |
counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2", |
@@ -930,15 +957,15 @@ bool SniffMimeType(const char* content, |
// CRX files (Chrome extensions) have a special sniffing algorithm. It is |
// tighter than the others because we don't have to match legacy behavior. |
- if (SniffCRX(content, content_size, url, type_hint, |
- &have_enough_content, result)) |
+ if (SniffCRX( |
+ content, content_size, url, type_hint, &have_enough_content, result)) |
return true; |
// Check the file extension and magic numbers to see if this is an Office |
// document. This needs to be checked before the general magic numbers |
// because zip files and Office documents (OOXML) have the same magic number. |
- if (SniffForOfficeDocs(content, content_size, url, |
- &have_enough_content, result)) |
+ if (SniffForOfficeDocs( |
+ content, content_size, url, &have_enough_content, result)) |
return true; // We've matched a magic number. No more content needed. |
// We're not interested in sniffing for magic numbers when the type_hint |
@@ -948,8 +975,7 @@ bool SniffMimeType(const char* content, |
// Now we look in our large table of magic numbers to see if we can find |
// anything that matches the content. |
- if (SniffForMagicNumbers(content, content_size, |
- &have_enough_content, result)) |
+ if (SniffForMagicNumbers(content, content_size, &have_enough_content, result)) |
return true; // We've matched a magic number. No more content needed. |
return have_enough_content; |
@@ -959,12 +985,16 @@ bool SniffMimeTypeFromLocalData(const char* content, |
size_t size, |
std::string* result) { |
// First check the extra table. |
- if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, |
- arraysize(kExtraMagicNumbers), NULL, result)) |
+ if (CheckForMagicNumbers(content, |
+ size, |
+ kExtraMagicNumbers, |
+ arraysize(kExtraMagicNumbers), |
+ NULL, |
+ result)) |
return true; |
// Finally check the original table. |
- return CheckForMagicNumbers(content, size, kMagicNumbers, |
- arraysize(kMagicNumbers), NULL, result); |
+ return CheckForMagicNumbers( |
+ content, size, kMagicNumbers, arraysize(kMagicNumbers), NULL, result); |
} |
} // namespace net |