Chromium Code Reviews| Index: net/base/mime_sniffer.cc |
| diff --git a/net/base/mime_sniffer.cc b/net/base/mime_sniffer.cc |
| index 61ef948211059db1857f10df3cf5b9386a7eaaf3..3481d6a09a9fe4671d953edd3aa636a34991f1b0 100644 |
| --- a/net/base/mime_sniffer.cc |
| +++ b/net/base/mime_sniffer.cc |
| @@ -117,77 +117,82 @@ struct MagicNumber { |
| const char* mask; // if set, must have same length as |magic| |
| }; |
|
davidben
2014/10/10 20:24:16
Okay, the tables in this file are pretty badly mes
|
| -#define MAGIC_NUMBER(mime_type, magic) \ |
| - { (mime_type), (magic), sizeof(magic)-1, false, NULL }, |
| +#define MAGIC_NUMBER(mime_type, magic) \ |
| + { (mime_type), (magic), sizeof(magic) - 1, false, NULL } \ |
| + , |
| template <int MagicSize, int MaskSize> |
| class VerifySizes { |
| COMPILE_ASSERT(MagicSize == MaskSize, sizes_must_be_equal); |
| + |
| public: |
| enum { SIZES = MagicSize }; |
| }; |
| #define verified_sizeof(magic, mask) \ |
| -VerifySizes<sizeof(magic), sizeof(mask)>::SIZES |
| + VerifySizes<sizeof(magic), sizeof(mask)>::SIZES |
| -#define MAGIC_MASK(mime_type, magic, mask) \ |
| - { (mime_type), (magic), verified_sizeof(magic, mask)-1, false, (mask) }, |
| +#define MAGIC_MASK(mime_type, magic, mask) \ |
| + { (mime_type), (magic), verified_sizeof(magic, mask) - 1, false, (mask) } \ |
| + , |
| // Magic strings are case insensitive and must not include '\0' characters |
| -#define MAGIC_STRING(mime_type, magic) \ |
| - { (mime_type), (magic), sizeof(magic)-1, true, NULL }, |
| +#define MAGIC_STRING(mime_type, magic) \ |
| + { (mime_type), (magic), sizeof(magic) - 1, true, NULL } \ |
| + , |
| static const MagicNumber kMagicNumbers[] = { |
| - // Source: HTML 5 specification |
| - MAGIC_NUMBER("application/pdf", "%PDF-") |
| - MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") |
| - MAGIC_NUMBER("image/gif", "GIF87a") |
| - MAGIC_NUMBER("image/gif", "GIF89a") |
| - MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A") |
| - MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") |
| - MAGIC_NUMBER("image/bmp", "BM") |
| - // Source: Mozilla |
| - MAGIC_NUMBER("text/plain", "#!") // Script |
| - MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS |
| - MAGIC_NUMBER("text/plain", "From") |
| - MAGIC_NUMBER("text/plain", ">From") |
| - // Chrome specific |
| - MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08") |
| - MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46") |
| - MAGIC_NUMBER("video/x-ms-asf", |
| - "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C") |
| - MAGIC_NUMBER("image/tiff", "I I") |
| - MAGIC_NUMBER("image/tiff", "II*") |
| - MAGIC_NUMBER("image/tiff", "MM\x00*") |
| - MAGIC_NUMBER("audio/mpeg", "ID3") |
| - MAGIC_NUMBER("image/webp", "RIFF....WEBPVP8 ") |
| - MAGIC_NUMBER("video/webm", "\x1A\x45\xDF\xA3") |
| - // TODO(abarth): we don't handle partial byte matches yet |
| - // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB") |
| - // MAGIC_NUMBER("audio/mpeg", "\xFF\xE") |
| - // MAGIC_NUMBER("audio/mpeg", "\xFF\xF") |
| - MAGIC_NUMBER("application/zip", "PK\x03\x04") |
| - MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00") |
| - MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A") |
| - MAGIC_NUMBER("application/octet-stream", "MZ") // EXE |
| - // Sniffing for Flash: |
| - // |
| - // MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
| - // MAGIC_NUMBER("application/x-shockwave-flash", "FLV") |
| - // MAGIC_NUMBER("application/x-shockwave-flash", "FWS") |
| - // |
| - // Including these magic number for Flash is a trade off. |
| - // |
| - // Pros: |
| - // * Flash is an important and popular file format |
| - // |
| - // Cons: |
| - // * These patterns are fairly weak |
| - // * If we mistakenly decide something is Flash, we will execute it |
| - // in the origin of an unsuspecting site. This could be a security |
| - // vulnerability if the site allows users to upload content. |
| - // |
| - // On balance, we do not include these patterns. |
| + // Source: HTML 5 specification |
| + MAGIC_NUMBER("application/pdf", |
| + "%PDF-") MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") |
| + MAGIC_NUMBER("image/gif", "GIF87a") MAGIC_NUMBER("image/gif", "GIF89a") |
| + MAGIC_NUMBER("image/png", |
| + "\x89" |
| + "PNG\x0D\x0A\x1A\x0A") |
| + MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") |
| + MAGIC_NUMBER("image/bmp", "BM") |
| + // Source: Mozilla |
| + MAGIC_NUMBER("text/plain", "#!") // Script |
| + MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS |
| + MAGIC_NUMBER("text/plain", "From") MAGIC_NUMBER("text/plain", ">From") |
| + // Chrome specific |
| + MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08") |
| + MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46") MAGIC_NUMBER( |
| + "video/x-ms-asf", |
| + "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C") |
| + MAGIC_NUMBER("image/tiff", "I I") MAGIC_NUMBER( |
| + "image/tiff", |
| + "II*") MAGIC_NUMBER("image/tiff", |
| + "MM\x00*") MAGIC_NUMBER("audio/mpeg", "ID3") |
| + MAGIC_NUMBER("image/webp", "RIFF....WEBPVP8 ") MAGIC_NUMBER( |
| + "video/webm", |
| + "\x1A\x45\xDF\xA3") |
| + // TODO(abarth): we don't handle partial byte matches yet |
| + // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB") |
| + // MAGIC_NUMBER("audio/mpeg", "\xFF\xE") |
| + // MAGIC_NUMBER("audio/mpeg", "\xFF\xF") |
| + MAGIC_NUMBER("application/zip", "PK\x03\x04") |
| + MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00") |
| + MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A") |
| + MAGIC_NUMBER("application/octet-stream", "MZ") // EXE |
| + // Sniffing for Flash: |
| + // |
| + // MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
| + // MAGIC_NUMBER("application/x-shockwave-flash", "FLV") |
| + // MAGIC_NUMBER("application/x-shockwave-flash", "FWS") |
| + // |
| + // Including these magic number for Flash is a trade off. |
| + // |
| + // Pros: |
| + // * Flash is an important and popular file format |
| + // |
| + // Cons: |
| + // * These patterns are fairly weak |
| + // * If we mistakenly decide something is Flash, we will execute it |
| + // in the origin of an unsuspecting site. This could be a security |
| + // vulnerability if the site allows users to upload content. |
| + // |
| + // On balance, we do not include these patterns. |
| }; |
| // The number of content bytes we need to use all our Microsoft Office magic |
| @@ -195,9 +200,8 @@ static const MagicNumber kMagicNumbers[] = { |
| static const size_t kBytesRequiredForOfficeMagic = 8; |
| static const MagicNumber kOfficeMagicNumbers[] = { |
| - MAGIC_NUMBER("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1") |
| - MAGIC_NUMBER("OOXML", "PK\x03\x04") |
| -}; |
| + MAGIC_NUMBER("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1") |
| + MAGIC_NUMBER("OOXML", "PK\x03\x04")}; |
| enum OfficeDocType { |
| DOC_TYPE_WORD, |
| @@ -212,90 +216,92 @@ struct OfficeExtensionType { |
| size_t extension_len; |
| }; |
| -#define OFFICE_EXTENSION(type, extension) \ |
| - { (type), (extension), sizeof(extension) - 1 }, |
| +#define OFFICE_EXTENSION(type, extension) \ |
| + { (type), (extension), sizeof(extension) - 1 } \ |
| + , |
| static const OfficeExtensionType kOfficeExtensionTypes[] = { |
| - OFFICE_EXTENSION(DOC_TYPE_WORD, ".doc") |
| - OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xls") |
| - OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".ppt") |
| - OFFICE_EXTENSION(DOC_TYPE_WORD, ".docx") |
| - OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xlsx") |
| - OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".pptx") |
| -}; |
| + OFFICE_EXTENSION(DOC_TYPE_WORD, ".doc") |
| + OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xls") |
| + OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".ppt") |
| + OFFICE_EXTENSION(DOC_TYPE_WORD, ".docx") |
| + OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xlsx") |
| + OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".pptx")}; |
| static const MagicNumber kExtraMagicNumbers[] = { |
| - MAGIC_NUMBER("image/x-xbitmap", "#define") |
| - MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00") |
| - MAGIC_NUMBER("image/svg+xml", "<?xml_version=") |
| - MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ") |
| - MAGIC_NUMBER("video/avi", "RIFF....AVI LIST") |
| - MAGIC_NUMBER("audio/ogg", "OggS") |
| - MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0") |
| - MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0") |
| - MAGIC_NUMBER("video/3gpp", "....ftyp3g") |
| - MAGIC_NUMBER("video/3gpp", "....ftypavcl") |
| - MAGIC_NUMBER("video/mp4", "....ftyp") |
| - MAGIC_NUMBER("video/quicktime", "....moov") |
| - MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
| - MAGIC_NUMBER("application/x-shockwave-flash", "FWS") |
| - MAGIC_NUMBER("video/x-flv", "FLV") |
| - MAGIC_NUMBER("audio/x-flac", "fLaC") |
| - |
| - // RAW image types. |
| - MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR") |
| - MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR") |
| - MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM") |
| - MAGIC_NUMBER("image/x-olympus-orf", "MMOR") // big-endian |
| - MAGIC_NUMBER("image/x-olympus-orf", "IIRO") // little-endian |
| - MAGIC_NUMBER("image/x-olympus-orf", "IIRS") // little-endian |
| - MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW ") |
| - MAGIC_NUMBER("image/x-panasonic-raw", |
| - "IIU\x00\x08\x00\x00\x00") // Panasonic .raw |
| - MAGIC_NUMBER("image/x-panasonic-raw", |
| - "IIU\x00\x18\x00\x00\x00") // Panasonic .rw2 |
| - MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw") |
| - MAGIC_NUMBER("image/x-x3f", "FOVb") |
| -}; |
| + MAGIC_NUMBER("image/x-xbitmap", "#define") MAGIC_NUMBER( |
| + "image/x-icon", |
| + "\x00\x00\x01\x00") MAGIC_NUMBER("image/svg+xml", "<?xml_version=") |
| + MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ") MAGIC_NUMBER( |
| + "video/avi", |
| + "RIFF....AVI LIST") MAGIC_NUMBER("audio/ogg", "OggS") |
| + MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0") |
| + MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0") MAGIC_NUMBER( |
| + "video/3gpp", |
| + "....ftyp3g") MAGIC_NUMBER("video/3gpp", "....ftypavcl") |
| + MAGIC_NUMBER("video/mp4", "....ftyp") |
| + MAGIC_NUMBER("video/quicktime", "....moov") |
| + MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
| + MAGIC_NUMBER("application/x-shockwave-flash", |
| + "FWS") |
| + MAGIC_NUMBER("video/x-flv", "FLV") |
| + MAGIC_NUMBER("audio/x-flac", "fLaC") |
| + |
| + // RAW image types. |
| + MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR") |
| + MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR") |
| + MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM") |
| + MAGIC_NUMBER("image/x-olympus-orf", "MMOR") // big-endian |
| + MAGIC_NUMBER("image/x-olympus-orf", "IIRO") // little-endian |
| + MAGIC_NUMBER("image/x-olympus-orf", "IIRS") // little-endian |
| + MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW ") |
| + MAGIC_NUMBER("image/x-panasonic-raw", |
| + "IIU\x00\x08\x00\x00\x00") // Panasonic .raw |
| + MAGIC_NUMBER("image/x-panasonic-raw", |
| + "IIU\x00\x18\x00\x00\x00") // Panasonic .rw2 |
| + MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw") |
| + MAGIC_NUMBER("image/x-x3f", "FOVb")}; |
| // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will |
| // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is |
| // HTML, but we will not. |
| -#define MAGIC_HTML_TAG(tag) \ |
| - MAGIC_STRING("text/html", "<" tag) |
| +#define MAGIC_HTML_TAG(tag) MAGIC_STRING("text/html", "<" tag) |
| static const MagicNumber kSniffableTags[] = { |
| - // XML processing directive. Although this is not an HTML mime type, we sniff |
| - // for this in the HTML phase because text/xml is just as powerful as HTML and |
| - // we want to leverage our white space skipping technology. |
| - MAGIC_NUMBER("text/xml", "<?xml") // Mozilla |
| - // DOCTYPEs |
| - MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec |
| - // Sniffable tags, ordered by how often they occur in sniffable documents. |
| - MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla |
| - MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla |
| - MAGIC_HTML_TAG("!--") |
| - MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla |
| - MAGIC_HTML_TAG("iframe") // Mozilla |
| - MAGIC_HTML_TAG("h1") // Mozilla |
| - MAGIC_HTML_TAG("div") // Mozilla |
| - MAGIC_HTML_TAG("font") // Mozilla |
| - MAGIC_HTML_TAG("table") // Mozilla |
| - MAGIC_HTML_TAG("a") // Mozilla |
| - MAGIC_HTML_TAG("style") // Mozilla |
| - MAGIC_HTML_TAG("title") // Mozilla |
| - MAGIC_HTML_TAG("b") // Mozilla |
| - MAGIC_HTML_TAG("body") // Mozilla |
| - MAGIC_HTML_TAG("br") |
| - MAGIC_HTML_TAG("p") // Mozilla |
| + // XML processing directive. Although this is not an HTML mime type, we |
| + // sniff |
| + // for this in the HTML phase because text/xml is just as powerful as HTML |
| + // and |
| + // we want to leverage our white space skipping technology. |
| + MAGIC_NUMBER("text/xml", "<?xml") // Mozilla |
| + // DOCTYPEs |
| + MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec |
| + // Sniffable tags, ordered by how often they occur in sniffable documents. |
| + MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla |
| + MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla |
| + MAGIC_HTML_TAG("!--") MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla |
| + MAGIC_HTML_TAG("iframe") // Mozilla |
| + MAGIC_HTML_TAG("h1") // Mozilla |
| + MAGIC_HTML_TAG("div") // Mozilla |
| + MAGIC_HTML_TAG("font") // Mozilla |
| + MAGIC_HTML_TAG("table") // Mozilla |
| + MAGIC_HTML_TAG("a") // Mozilla |
| + MAGIC_HTML_TAG("style") // Mozilla |
| + MAGIC_HTML_TAG("title") // Mozilla |
| + MAGIC_HTML_TAG("b") // Mozilla |
| + MAGIC_HTML_TAG("body") // Mozilla |
| + MAGIC_HTML_TAG("br") MAGIC_HTML_TAG("p") // Mozilla |
| }; |
| static base::HistogramBase* UMASnifferHistogramGet(const char* name, |
| int array_size) { |
| - base::HistogramBase* counter = |
| - base::LinearHistogram::FactoryGet(name, 1, array_size - 1, array_size, |
| - base::HistogramBase::kUmaTargetedHistogramFlag); |
| + base::HistogramBase* counter = base::LinearHistogram::FactoryGet( |
| + name, |
| + 1, |
| + array_size - 1, |
| + array_size, |
| + base::HistogramBase::kUmaTargetedHistogramFlag); |
| return counter; |
| } |
| @@ -368,13 +374,16 @@ static bool MatchMagicNumber(const char* content, |
| return false; |
| } |
| -static bool CheckForMagicNumbers(const char* content, size_t size, |
| - const MagicNumber* magic, size_t magic_len, |
| +static bool CheckForMagicNumbers(const char* content, |
| + size_t size, |
| + const MagicNumber* magic, |
| + size_t magic_len, |
| base::HistogramBase* counter, |
| std::string* result) { |
| for (size_t i = 0; i < magic_len; ++i) { |
| if (MatchMagicNumber(content, size, magic[i], result)) { |
| - if (counter) counter->Add(static_cast<int>(i)); |
| + if (counter) |
| + counter->Add(static_cast<int>(i)); |
| return true; |
| } |
| } |
| @@ -418,9 +427,12 @@ static bool SniffForHTML(const char* content, |
| arraysize(kSniffableTags)); |
| } |
| // |pos| now points to first non-whitespace character (or at end). |
| - return CheckForMagicNumbers(pos, end - pos, |
| - kSniffableTags, arraysize(kSniffableTags), |
| - counter, result); |
| + return CheckForMagicNumbers(pos, |
| + end - pos, |
| + kSniffableTags, |
| + arraysize(kSniffableTags), |
| + counter, |
| + result); |
| } |
| // Returns true and sets result if the content matches any of kMagicNumbers. |
| @@ -437,9 +449,8 @@ static bool SniffForMagicNumbers(const char* content, |
| counter = UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2", |
| arraysize(kMagicNumbers)); |
| } |
| - return CheckForMagicNumbers(content, size, |
| - kMagicNumbers, arraysize(kMagicNumbers), |
| - counter, result); |
| + return CheckForMagicNumbers( |
| + content, size, kMagicNumbers, arraysize(kMagicNumbers), counter, result); |
| } |
| // Returns true and sets result if the content matches any of |
| @@ -454,9 +465,12 @@ static bool SniffForOfficeDocs(const char* content, |
| // Check our table of magic numbers for Office file types. |
| std::string office_version; |
| - if (!CheckForMagicNumbers(content, size, |
| - kOfficeMagicNumbers, arraysize(kOfficeMagicNumbers), |
| - NULL, &office_version)) |
| + if (!CheckForMagicNumbers(content, |
| + size, |
| + kOfficeMagicNumbers, |
| + arraysize(kOfficeMagicNumbers), |
| + NULL, |
| + &office_version)) |
| return false; |
| OfficeDocType type = DOC_TYPE_NONE; |
| @@ -469,7 +483,8 @@ static bool SniffForOfficeDocs(const char* content, |
| const char* extension = |
| &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len]; |
| - if (0 == base::strncasecmp(extension, kOfficeExtensionTypes[i].extension, |
| + if (0 == base::strncasecmp(extension, |
| + kOfficeExtensionTypes[i].extension, |
| kOfficeExtensionTypes[i].extension_len)) { |
| type = kOfficeExtensionTypes[i].doc_type; |
| break; |
| @@ -497,16 +512,19 @@ static bool SniffForOfficeDocs(const char* content, |
| } else if (office_version == "OOXML") { |
| switch (type) { |
| case DOC_TYPE_WORD: |
| - *result = "application/vnd.openxmlformats-officedocument." |
| - "wordprocessingml.document"; |
| + *result = |
| + "application/vnd.openxmlformats-officedocument." |
| + "wordprocessingml.document"; |
| return true; |
| case DOC_TYPE_EXCEL: |
| - *result = "application/vnd.openxmlformats-officedocument." |
| - "spreadsheetml.sheet"; |
| + *result = |
| + "application/vnd.openxmlformats-officedocument." |
| + "spreadsheetml.sheet"; |
| return true; |
| case DOC_TYPE_POWERPOINT: |
| - *result = "application/vnd.openxmlformats-officedocument." |
| - "presentationml.presentation"; |
| + *result = |
| + "application/vnd.openxmlformats-officedocument." |
| + "presentationml.presentation"; |
| return true; |
| case DOC_TYPE_NONE: |
| NOTREACHED(); |
| @@ -522,16 +540,20 @@ static bool IsOfficeType(const std::string& type_hint) { |
| return (type_hint == "application/msword" || |
| type_hint == "application/vnd.ms-excel" || |
| type_hint == "application/vnd.ms-powerpoint" || |
| - type_hint == "application/vnd.openxmlformats-officedocument." |
| - "wordprocessingml.document" || |
| - type_hint == "application/vnd.openxmlformats-officedocument." |
| - "spreadsheetml.sheet" || |
| - type_hint == "application/vnd.openxmlformats-officedocument." |
| - "presentationml.presentation" || |
| + type_hint == |
| + "application/vnd.openxmlformats-officedocument." |
| + "wordprocessingml.document" || |
| + type_hint == |
| + "application/vnd.openxmlformats-officedocument." |
| + "spreadsheetml.sheet" || |
| + type_hint == |
| + "application/vnd.openxmlformats-officedocument." |
| + "presentationml.presentation" || |
| type_hint == "application/vnd.ms-excel.sheet.macroenabled.12" || |
| type_hint == "application/vnd.ms-word.document.macroenabled.12" || |
| - type_hint == "application/vnd.ms-powerpoint.presentation." |
| - "macroenabled.12" || |
| + type_hint == |
| + "application/vnd.ms-powerpoint.presentation." |
| + "macroenabled.12" || |
| type_hint == "application/mspowerpoint" || |
| type_hint == "application/msexcel" || |
| type_hint == "application/vnd.ms-word" || |
| @@ -557,9 +579,12 @@ static bool SniffForInvalidOfficeDocs(const char* content, |
| // Check our table of magic numbers for Office file types. If it does not |
| // match one, the MIME type was invalid. Set it instead to a safe value. |
| std::string office_version; |
| - if (!CheckForMagicNumbers(content, size, |
| - kOfficeMagicNumbers, arraysize(kOfficeMagicNumbers), |
| - NULL, &office_version)) { |
| + if (!CheckForMagicNumbers(content, |
| + size, |
| + kOfficeMagicNumbers, |
| + arraysize(kOfficeMagicNumbers), |
| + NULL, |
| + &office_version)) { |
| *result = "application/octet-stream"; |
| } |
| @@ -570,14 +595,14 @@ static bool SniffForInvalidOfficeDocs(const char* content, |
| // Byte order marks |
| static const MagicNumber kMagicXML[] = { |
| - // We want to be very conservative in interpreting text/xml content as |
| - // XHTML -- we just want to sniff enough to make unit tests pass. |
| - // So we match explicitly on this, and don't match other ways of writing |
| - // it in semantically-equivalent ways. |
| - MAGIC_STRING("application/xhtml+xml", |
| - "<html xmlns=\"http://www.w3.org/1999/xhtml\"") |
| - MAGIC_STRING("application/atom+xml", "<feed") |
| - MAGIC_STRING("application/rss+xml", "<rss") // UTF-8 |
| + // We want to be very conservative in interpreting text/xml content as |
| + // XHTML -- we just want to sniff enough to make unit tests pass. |
| + // So we match explicitly on this, and don't match other ways of writing |
| + // it in semantically-equivalent ways. |
| + MAGIC_STRING("application/xhtml+xml", |
| + "<html xmlns=\"http://www.w3.org/1999/xhtml\"") |
| + MAGIC_STRING("application/atom+xml", "<feed") |
| + MAGIC_STRING("application/rss+xml", "<rss") // UTF-8 |
| }; |
| // Returns true and sets result if the content appears to contain XHTML or a |
| @@ -603,8 +628,8 @@ static bool SniffXML(const char* content, |
| // based on the name (or possibly attributes) of that tag. |
| static base::HistogramBase* counter(NULL); |
| if (!counter) { |
| - counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2", |
| - arraysize(kMagicXML)); |
| + counter = |
| + UMASnifferHistogramGet("mime_sniffer.kMagicXML2", arraysize(kMagicXML)); |
| } |
| const int kMaxTagIterations = 5; |
| for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { |
| @@ -625,9 +650,8 @@ static bool SniffXML(const char* content, |
| continue; |
| } |
| - if (CheckForMagicNumbers(pos, end - pos, |
| - kMagicXML, arraysize(kMagicXML), |
| - counter, result)) |
| + if (CheckForMagicNumbers( |
| + pos, end - pos, kMagicXML, arraysize(kMagicXML), counter, result)) |
| return true; |
| // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult |
| @@ -646,30 +670,30 @@ static bool SniffXML(const char* content, |
| // Byte order marks |
| static const MagicNumber kByteOrderMark[] = { |
| - MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE |
| - MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE |
| - MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
| + MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE |
| + MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE |
| + MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
| }; |
| // Whether a given byte looks like it might be part of binary content. |
| // Source: HTML5 spec |
| static char kByteLooksBinary[] = { |
| - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F |
| - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF |
| + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F |
|
davidben
2014/10/10 20:24:16
Is this a clang-format bug? google-c-style.el says
|
| + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF |
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF |
| }; |
| // Returns true and sets result to "application/octet-stream" if the content |
| @@ -695,9 +719,12 @@ static bool SniffBinary(const char* content, |
| arraysize(kByteOrderMark)); |
| } |
| std::string unused; |
| - if (CheckForMagicNumbers(content, size, |
| - kByteOrderMark, arraysize(kByteOrderMark), |
| - counter, &unused)) { |
| + if (CheckForMagicNumbers(content, |
| + size, |
| + kByteOrderMark, |
| + arraysize(kByteOrderMark), |
| + counter, |
| + &unused)) { |
| // If there is BOM, we think the buffer is not binary. |
| result->assign("text/plain"); |
| return false; |
| @@ -724,14 +751,14 @@ static bool IsUnknownMimeType(const std::string& mime_type) { |
| // TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here. |
| // If we do, please be careful not to alter the semantics at all. |
| static const char* kUnknownMimeTypes[] = { |
| - // Empty mime types are as unknown as they get. |
| - "", |
| - // The unknown/unknown type is popular and uninformative |
| - "unknown/unknown", |
| - // The second most popular unknown mime type is application/unknown |
| - "application/unknown", |
| - // Firefox rejects a mime type if it is exactly */* |
| - "*/*", |
| + // Empty mime types are as unknown as they get. |
| + "", |
| + // The unknown/unknown type is popular and uninformative |
| + "unknown/unknown", |
| + // The second most popular unknown mime type is application/unknown |
| + "application/unknown", |
| + // Firefox rejects a mime type if it is exactly */* |
| + "*/*", |
| }; |
| static base::HistogramBase* counter(NULL); |
| if (!counter) { |
| @@ -773,8 +800,7 @@ static bool SniffCRX(const char* content, |
| // TODO(aa): If we ever have another magic number, we'll want to pass a |
| // histogram into CheckForMagicNumbers(), below, to see which one matched. |
| static const struct MagicNumber kCRXMagicNumbers[] = { |
| - MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00") |
| - }; |
| + MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")}; |
|
davidben
2014/10/10 20:24:16
I feel like the newline between 776 and 777 should
|
| // Only consider files that have the extension ".crx". |
| static const char kCRXExtension[] = ".crx"; |
| @@ -788,9 +814,12 @@ static bool SniffCRX(const char* content, |
| } |
| *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); |
| - if (CheckForMagicNumbers(content, size, |
| - kCRXMagicNumbers, arraysize(kCRXMagicNumbers), |
| - NULL, result)) { |
| + if (CheckForMagicNumbers(content, |
| + size, |
| + kCRXMagicNumbers, |
| + arraysize(kCRXMagicNumbers), |
| + NULL, |
| + result)) { |
| counter->Add(2); |
| } else { |
| return false; |
| @@ -805,45 +834,44 @@ bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { |
| should_sniff_counter = |
| UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3); |
| } |
| - bool sniffable_scheme = url.is_empty() || |
| - url.SchemeIsHTTPOrHTTPS() || |
| + bool sniffable_scheme = url.is_empty() || url.SchemeIsHTTPOrHTTPS() || |
| url.SchemeIs("ftp") || |
| #if defined(OS_ANDROID) |
| url.SchemeIs("content") || |
| #endif |
| - url.SchemeIsFile() || |
| - url.SchemeIsFileSystem(); |
| + url.SchemeIsFile() || url.SchemeIsFileSystem(); |
| if (!sniffable_scheme) { |
| should_sniff_counter->Add(1); |
| return false; |
| } |
| static const char* kSniffableTypes[] = { |
| - // Many web servers are misconfigured to send text/plain for many |
| - // different types of content. |
| - "text/plain", |
| - // We want to sniff application/octet-stream for |
| - // application/x-chrome-extension, but nothing else. |
| - "application/octet-stream", |
| - // XHTML and Atom/RSS feeds are often served as plain xml instead of |
| - // their more specific mime types. |
| - "text/xml", |
| - "application/xml", |
| - // Check for false Microsoft Office MIME types. |
| - "application/msword", |
| - "application/vnd.ms-excel", |
| - "application/vnd.ms-powerpoint", |
| - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", |
| - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
| - "application/vnd.openxmlformats-officedocument.presentationml.presentation", |
| - "application/vnd.ms-excel.sheet.macroenabled.12", |
| - "application/vnd.ms-word.document.macroenabled.12", |
| - "application/vnd.ms-powerpoint.presentation.macroenabled.12", |
| - "application/mspowerpoint", |
| - "application/msexcel", |
| - "application/vnd.ms-word", |
| - "application/vnd.ms-word.document.12", |
| - "application/vnd.msword", |
| + // Many web servers are misconfigured to send text/plain for many |
| + // different types of content. |
| + "text/plain", |
| + // We want to sniff application/octet-stream for |
| + // application/x-chrome-extension, but nothing else. |
| + "application/octet-stream", |
| + // XHTML and Atom/RSS feeds are often served as plain xml instead of |
| + // their more specific mime types. |
| + "text/xml", |
| + "application/xml", |
| + // Check for false Microsoft Office MIME types. |
| + "application/msword", |
| + "application/vnd.ms-excel", |
| + "application/vnd.ms-powerpoint", |
| + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", |
| + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
| + "application/" |
| + "vnd.openxmlformats-officedocument.presentationml.presentation", |
|
davidben
2014/10/10 20:24:16
This is kind of unfortunate. Worth a clang-format
|
| + "application/vnd.ms-excel.sheet.macroenabled.12", |
| + "application/vnd.ms-word.document.macroenabled.12", |
| + "application/vnd.ms-powerpoint.presentation.macroenabled.12", |
| + "application/mspowerpoint", |
| + "application/msexcel", |
| + "application/vnd.ms-word", |
| + "application/vnd.ms-word.document.12", |
| + "application/vnd.msword", |
| }; |
| static base::HistogramBase* counter(NULL); |
| if (!counter) { |
| @@ -932,15 +960,15 @@ bool SniffMimeType(const char* content, |
| // CRX files (Chrome extensions) have a special sniffing algorithm. It is |
| // tighter than the others because we don't have to match legacy behavior. |
| - if (SniffCRX(content, content_size, url, type_hint, |
| - &have_enough_content, result)) |
| + if (SniffCRX( |
| + content, content_size, url, type_hint, &have_enough_content, result)) |
| return true; |
| // Check the file extension and magic numbers to see if this is an Office |
| // document. This needs to be checked before the general magic numbers |
| // because zip files and Office documents (OOXML) have the same magic number. |
| - if (SniffForOfficeDocs(content, content_size, url, |
| - &have_enough_content, result)) |
| + if (SniffForOfficeDocs( |
| + content, content_size, url, &have_enough_content, result)) |
| return true; // We've matched a magic number. No more content needed. |
| // We're not interested in sniffing for magic numbers when the type_hint |
| @@ -950,8 +978,7 @@ bool SniffMimeType(const char* content, |
| // Now we look in our large table of magic numbers to see if we can find |
| // anything that matches the content. |
| - if (SniffForMagicNumbers(content, content_size, |
| - &have_enough_content, result)) |
| + if (SniffForMagicNumbers(content, content_size, &have_enough_content, result)) |
| return true; // We've matched a magic number. No more content needed. |
| return have_enough_content; |
| @@ -961,12 +988,16 @@ bool SniffMimeTypeFromLocalData(const char* content, |
| size_t size, |
| std::string* result) { |
| // First check the extra table. |
| - if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, |
| - arraysize(kExtraMagicNumbers), NULL, result)) |
| + if (CheckForMagicNumbers(content, |
| + size, |
| + kExtraMagicNumbers, |
| + arraysize(kExtraMagicNumbers), |
| + NULL, |
| + result)) |
| return true; |
| // Finally check the original table. |
| - return CheckForMagicNumbers(content, size, kMagicNumbers, |
| - arraysize(kMagicNumbers), NULL, result); |
| + return CheckForMagicNumbers( |
| + content, size, kMagicNumbers, arraysize(kMagicNumbers), NULL, result); |
| } |
| } // namespace net |