Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
| 6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
| 7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
| 8 // | 8 // |
| 9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
| 10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 109 // to increase this number if you add a longer magic number. | 109 // to increase this number if you add a longer magic number. |
| 110 static const size_t kBytesRequiredForMagic = 42; | 110 static const size_t kBytesRequiredForMagic = 42; |
| 111 | 111 |
| 112 struct MagicNumber { | 112 struct MagicNumber { |
| 113 const char* mime_type; | 113 const char* mime_type; |
| 114 const char* magic; | 114 const char* magic; |
| 115 size_t magic_len; | 115 size_t magic_len; |
| 116 bool is_string; | 116 bool is_string; |
| 117 const char* mask; // if set, must have same length as |magic| | 117 const char* mask; // if set, must have same length as |magic| |
| 118 }; | 118 }; |
| 119 | 119 |
|
davidben
2014/10/10 20:24:16
Okay, the tables in this file are pretty badly mes
| |
| 120 #define MAGIC_NUMBER(mime_type, magic) \ | 120 #define MAGIC_NUMBER(mime_type, magic) \ |
| 121 { (mime_type), (magic), sizeof(magic)-1, false, NULL }, | 121 { (mime_type), (magic), sizeof(magic) - 1, false, NULL } \ |
| 122 , | |
| 122 | 123 |
| 123 template <int MagicSize, int MaskSize> | 124 template <int MagicSize, int MaskSize> |
| 124 class VerifySizes { | 125 class VerifySizes { |
| 125 COMPILE_ASSERT(MagicSize == MaskSize, sizes_must_be_equal); | 126 COMPILE_ASSERT(MagicSize == MaskSize, sizes_must_be_equal); |
| 127 | |
| 126 public: | 128 public: |
| 127 enum { SIZES = MagicSize }; | 129 enum { SIZES = MagicSize }; |
| 128 }; | 130 }; |
| 129 | 131 |
| 130 #define verified_sizeof(magic, mask) \ | 132 #define verified_sizeof(magic, mask) \ |
| 131 VerifySizes<sizeof(magic), sizeof(mask)>::SIZES | 133 VerifySizes<sizeof(magic), sizeof(mask)>::SIZES |
| 132 | 134 |
| 133 #define MAGIC_MASK(mime_type, magic, mask) \ | 135 #define MAGIC_MASK(mime_type, magic, mask) \ |
| 134 { (mime_type), (magic), verified_sizeof(magic, mask)-1, false, (mask) }, | 136 { (mime_type), (magic), verified_sizeof(magic, mask) - 1, false, (mask) } \ |
| 137 , | |
| 135 | 138 |
| 136 // Magic strings are case insensitive and must not include '\0' characters | 139 // Magic strings are case insensitive and must not include '\0' characters |
| 137 #define MAGIC_STRING(mime_type, magic) \ | 140 #define MAGIC_STRING(mime_type, magic) \ |
| 138 { (mime_type), (magic), sizeof(magic)-1, true, NULL }, | 141 { (mime_type), (magic), sizeof(magic) - 1, true, NULL } \ |
| 142 , | |
| 139 | 143 |
| 140 static const MagicNumber kMagicNumbers[] = { | 144 static const MagicNumber kMagicNumbers[] = { |
| 141 // Source: HTML 5 specification | 145 // Source: HTML 5 specification |
| 142 MAGIC_NUMBER("application/pdf", "%PDF-") | 146 MAGIC_NUMBER("application/pdf", |
| 143 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") | 147 "%PDF-") MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") |
| 144 MAGIC_NUMBER("image/gif", "GIF87a") | 148 MAGIC_NUMBER("image/gif", "GIF87a") MAGIC_NUMBER("image/gif", "GIF89a") |
| 145 MAGIC_NUMBER("image/gif", "GIF89a") | 149 MAGIC_NUMBER("image/png", |
| 146 MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A") | 150 "\x89" |
| 147 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") | 151 "PNG\x0D\x0A\x1A\x0A") |
| 148 MAGIC_NUMBER("image/bmp", "BM") | 152 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") |
| 149 // Source: Mozilla | 153 MAGIC_NUMBER("image/bmp", "BM") |
| 150 MAGIC_NUMBER("text/plain", "#!") // Script | 154 // Source: Mozilla |
| 151 MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS | 155 MAGIC_NUMBER("text/plain", "#!") // Script |
| 152 MAGIC_NUMBER("text/plain", "From") | 156 MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS |
| 153 MAGIC_NUMBER("text/plain", ">From") | 157 MAGIC_NUMBER("text/plain", "From") MAGIC_NUMBER("text/plain", ">From") |
| 154 // Chrome specific | 158 // Chrome specific |
| 155 MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08") | 159 MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08") |
| 156 MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46") | 160 MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46") MAGIC_NUMBER( |
| 157 MAGIC_NUMBER("video/x-ms-asf", | 161 "video/x-ms-asf", |
| 158 "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C") | 162 "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C") |
| 159 MAGIC_NUMBER("image/tiff", "I I") | 163 MAGIC_NUMBER("image/tiff", "I I") MAGIC_NUMBER( |
| 160 MAGIC_NUMBER("image/tiff", "II*") | 164 "image/tiff", |
| 161 MAGIC_NUMBER("image/tiff", "MM\x00*") | 165 "II*") MAGIC_NUMBER("image/tiff", |
| 162 MAGIC_NUMBER("audio/mpeg", "ID3") | 166 "MM\x00*") MAGIC_NUMBER("audio/mpeg", "ID3") |
| 163 MAGIC_NUMBER("image/webp", "RIFF....WEBPVP8 ") | 167 MAGIC_NUMBER("image/webp", "RIFF....WEBPVP8 ") MAGIC_NUMBER( |
| 164 MAGIC_NUMBER("video/webm", "\x1A\x45\xDF\xA3") | 168 "video/webm", |
| 165 // TODO(abarth): we don't handle partial byte matches yet | 169 "\x1A\x45\xDF\xA3") |
| 166 // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB") | 170 // TODO(abarth): we don't handle partial byte matches yet |
| 167 // MAGIC_NUMBER("audio/mpeg", "\xFF\xE") | 171 // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB") |
| 168 // MAGIC_NUMBER("audio/mpeg", "\xFF\xF") | 172 // MAGIC_NUMBER("audio/mpeg", "\xFF\xE") |
| 169 MAGIC_NUMBER("application/zip", "PK\x03\x04") | 173 // MAGIC_NUMBER("audio/mpeg", "\xFF\xF") |
| 170 MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00") | 174 MAGIC_NUMBER("application/zip", "PK\x03\x04") |
| 171 MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A") | 175 MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00") |
| 172 MAGIC_NUMBER("application/octet-stream", "MZ") // EXE | 176 MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A") |
| 173 // Sniffing for Flash: | 177 MAGIC_NUMBER("application/octet-stream", "MZ") // EXE |
| 174 // | 178 // Sniffing for Flash: |
| 175 // MAGIC_NUMBER("application/x-shockwave-flash", "CWS") | 179 // |
| 176 // MAGIC_NUMBER("application/x-shockwave-flash", "FLV") | 180 // MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
| 177 // MAGIC_NUMBER("application/x-shockwave-flash", "FWS") | 181 // MAGIC_NUMBER("application/x-shockwave-flash", "FLV") |
| 178 // | 182 // MAGIC_NUMBER("application/x-shockwave-flash", "FWS") |
| 179 // Including these magic number for Flash is a trade off. | 183 // |
| 180 // | 184 // Including these magic number for Flash is a trade off. |
| 181 // Pros: | 185 // |
| 182 // * Flash is an important and popular file format | 186 // Pros: |
| 183 // | 187 // * Flash is an important and popular file format |
| 184 // Cons: | 188 // |
| 185 // * These patterns are fairly weak | 189 // Cons: |
| 186 // * If we mistakenly decide something is Flash, we will execute it | 190 // * These patterns are fairly weak |
| 187 // in the origin of an unsuspecting site. This could be a security | 191 // * If we mistakenly decide something is Flash, we will execute it |
| 188 // vulnerability if the site allows users to upload content. | 192 // in the origin of an unsuspecting site. This could be a security |
| 189 // | 193 // vulnerability if the site allows users to upload content. |
| 190 // On balance, we do not include these patterns. | 194 // |
| 195 // On balance, we do not include these patterns. | |
| 191 }; | 196 }; |
| 192 | 197 |
| 193 // The number of content bytes we need to use all our Microsoft Office magic | 198 // The number of content bytes we need to use all our Microsoft Office magic |
| 194 // numbers. | 199 // numbers. |
| 195 static const size_t kBytesRequiredForOfficeMagic = 8; | 200 static const size_t kBytesRequiredForOfficeMagic = 8; |
| 196 | 201 |
| 197 static const MagicNumber kOfficeMagicNumbers[] = { | 202 static const MagicNumber kOfficeMagicNumbers[] = { |
| 198 MAGIC_NUMBER("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1") | 203 MAGIC_NUMBER("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1") |
| 199 MAGIC_NUMBER("OOXML", "PK\x03\x04") | 204 MAGIC_NUMBER("OOXML", "PK\x03\x04")}; |
| 200 }; | |
| 201 | 205 |
| 202 enum OfficeDocType { | 206 enum OfficeDocType { |
| 203 DOC_TYPE_WORD, | 207 DOC_TYPE_WORD, |
| 204 DOC_TYPE_EXCEL, | 208 DOC_TYPE_EXCEL, |
| 205 DOC_TYPE_POWERPOINT, | 209 DOC_TYPE_POWERPOINT, |
| 206 DOC_TYPE_NONE | 210 DOC_TYPE_NONE |
| 207 }; | 211 }; |
| 208 | 212 |
| 209 struct OfficeExtensionType { | 213 struct OfficeExtensionType { |
| 210 OfficeDocType doc_type; | 214 OfficeDocType doc_type; |
| 211 const char* extension; | 215 const char* extension; |
| 212 size_t extension_len; | 216 size_t extension_len; |
| 213 }; | 217 }; |
| 214 | 218 |
| 215 #define OFFICE_EXTENSION(type, extension) \ | 219 #define OFFICE_EXTENSION(type, extension) \ |
| 216 { (type), (extension), sizeof(extension) - 1 }, | 220 { (type), (extension), sizeof(extension) - 1 } \ |
| 221 , | |
| 217 | 222 |
| 218 static const OfficeExtensionType kOfficeExtensionTypes[] = { | 223 static const OfficeExtensionType kOfficeExtensionTypes[] = { |
| 219 OFFICE_EXTENSION(DOC_TYPE_WORD, ".doc") | 224 OFFICE_EXTENSION(DOC_TYPE_WORD, ".doc") |
| 220 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xls") | 225 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xls") |
| 221 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".ppt") | 226 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".ppt") |
| 222 OFFICE_EXTENSION(DOC_TYPE_WORD, ".docx") | 227 OFFICE_EXTENSION(DOC_TYPE_WORD, ".docx") |
| 223 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xlsx") | 228 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xlsx") |
| 224 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".pptx") | 229 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".pptx")}; |
| 225 }; | |
| 226 | 230 |
| 227 static const MagicNumber kExtraMagicNumbers[] = { | 231 static const MagicNumber kExtraMagicNumbers[] = { |
| 228 MAGIC_NUMBER("image/x-xbitmap", "#define") | 232 MAGIC_NUMBER("image/x-xbitmap", "#define") MAGIC_NUMBER( |
| 229 MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00") | 233 "image/x-icon", |
| 230 MAGIC_NUMBER("image/svg+xml", "<?xml_version=") | 234 "\x00\x00\x01\x00") MAGIC_NUMBER("image/svg+xml", "<?xml_version=") |
| 231 MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ") | 235 MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ") MAGIC_NUMBER( |
| 232 MAGIC_NUMBER("video/avi", "RIFF....AVI LIST") | 236 "video/avi", |
| 233 MAGIC_NUMBER("audio/ogg", "OggS") | 237 "RIFF....AVI LIST") MAGIC_NUMBER("audio/ogg", "OggS") |
| 234 MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0") | 238 MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0") |
| 235 MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0") | 239 MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0") MAGIC_NUMBER( |
| 236 MAGIC_NUMBER("video/3gpp", "....ftyp3g") | 240 "video/3gpp", |
| 237 MAGIC_NUMBER("video/3gpp", "....ftypavcl") | 241 "....ftyp3g") MAGIC_NUMBER("video/3gpp", "....ftypavcl") |
| 238 MAGIC_NUMBER("video/mp4", "....ftyp") | 242 MAGIC_NUMBER("video/mp4", "....ftyp") |
| 239 MAGIC_NUMBER("video/quicktime", "....moov") | 243 MAGIC_NUMBER("video/quicktime", "....moov") |
| 240 MAGIC_NUMBER("application/x-shockwave-flash", "CWS") | 244 MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
| 241 MAGIC_NUMBER("application/x-shockwave-flash", "FWS") | 245 MAGIC_NUMBER("application/x-shockwave-flash", |
| 242 MAGIC_NUMBER("video/x-flv", "FLV") | 246 "FWS") |
| 243 MAGIC_NUMBER("audio/x-flac", "fLaC") | 247 MAGIC_NUMBER("video/x-flv", "FLV") |
| 248 MAGIC_NUMBER("audio/x-flac", "fLaC") | |
| 244 | 249 |
| 245 // RAW image types. | 250 // RAW image types. |
| 246 MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR") | 251 MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR") |
| 247 MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR") | 252 MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR") |
| 248 MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM") | 253 MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM") |
| 249 MAGIC_NUMBER("image/x-olympus-orf", "MMOR") // big-endian | 254 MAGIC_NUMBER("image/x-olympus-orf", "MMOR") // big-endian |
| 250 MAGIC_NUMBER("image/x-olympus-orf", "IIRO") // little-endian | 255 MAGIC_NUMBER("image/x-olympus-orf", "IIRO") // little-endian |
| 251 MAGIC_NUMBER("image/x-olympus-orf", "IIRS") // little-endian | 256 MAGIC_NUMBER("image/x-olympus-orf", "IIRS") // little-endian |
| 252 MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW ") | 257 MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW ") |
| 253 MAGIC_NUMBER("image/x-panasonic-raw", | 258 MAGIC_NUMBER("image/x-panasonic-raw", |
| 254 "IIU\x00\x08\x00\x00\x00") // Panasonic .raw | 259 "IIU\x00\x08\x00\x00\x00") // Panasonic .raw |
| 255 MAGIC_NUMBER("image/x-panasonic-raw", | 260 MAGIC_NUMBER("image/x-panasonic-raw", |
| 256 "IIU\x00\x18\x00\x00\x00") // Panasonic .rw2 | 261 "IIU\x00\x18\x00\x00\x00") // Panasonic .rw2 |
| 257 MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw") | 262 MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw") |
| 258 MAGIC_NUMBER("image/x-x3f", "FOVb") | 263 MAGIC_NUMBER("image/x-x3f", "FOVb")}; |
| 259 }; | |
| 260 | 264 |
| 261 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will | 265 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will |
| 262 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is | 266 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is |
| 263 // HTML, but we will not. | 267 // HTML, but we will not. |
| 264 | 268 |
| 265 #define MAGIC_HTML_TAG(tag) \ | 269 #define MAGIC_HTML_TAG(tag) MAGIC_STRING("text/html", "<" tag) |
| 266 MAGIC_STRING("text/html", "<" tag) | |
| 267 | 270 |
| 268 static const MagicNumber kSniffableTags[] = { | 271 static const MagicNumber kSniffableTags[] = { |
| 269 // XML processing directive. Although this is not an HTML mime type, we sniff | 272 // XML processing directive. Although this is not an HTML mime type, we |
| 270 // for this in the HTML phase because text/xml is just as powerful as HTML and | 273 // sniff |
| 271 // we want to leverage our white space skipping technology. | 274 // for this in the HTML phase because text/xml is just as powerful as HTML |
| 272 MAGIC_NUMBER("text/xml", "<?xml") // Mozilla | 275 // and |
| 273 // DOCTYPEs | 276 // we want to leverage our white space skipping technology. |
| 274 MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec | 277 MAGIC_NUMBER("text/xml", "<?xml") // Mozilla |
| 275 // Sniffable tags, ordered by how often they occur in sniffable documents. | 278 // DOCTYPEs |
| 276 MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla | 279 MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec |
| 277 MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla | 280 // Sniffable tags, ordered by how often they occur in sniffable documents. |
| 278 MAGIC_HTML_TAG("!--") | 281 MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla |
| 279 MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla | 282 MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla |
| 280 MAGIC_HTML_TAG("iframe") // Mozilla | 283 MAGIC_HTML_TAG("!--") MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla |
| 281 MAGIC_HTML_TAG("h1") // Mozilla | 284 MAGIC_HTML_TAG("iframe") // Mozilla |
| 282 MAGIC_HTML_TAG("div") // Mozilla | 285 MAGIC_HTML_TAG("h1") // Mozilla |
| 283 MAGIC_HTML_TAG("font") // Mozilla | 286 MAGIC_HTML_TAG("div") // Mozilla |
| 284 MAGIC_HTML_TAG("table") // Mozilla | 287 MAGIC_HTML_TAG("font") // Mozilla |
| 285 MAGIC_HTML_TAG("a") // Mozilla | 288 MAGIC_HTML_TAG("table") // Mozilla |
| 286 MAGIC_HTML_TAG("style") // Mozilla | 289 MAGIC_HTML_TAG("a") // Mozilla |
| 287 MAGIC_HTML_TAG("title") // Mozilla | 290 MAGIC_HTML_TAG("style") // Mozilla |
| 288 MAGIC_HTML_TAG("b") // Mozilla | 291 MAGIC_HTML_TAG("title") // Mozilla |
| 289 MAGIC_HTML_TAG("body") // Mozilla | 292 MAGIC_HTML_TAG("b") // Mozilla |
| 290 MAGIC_HTML_TAG("br") | 293 MAGIC_HTML_TAG("body") // Mozilla |
| 291 MAGIC_HTML_TAG("p") // Mozilla | 294 MAGIC_HTML_TAG("br") MAGIC_HTML_TAG("p") // Mozilla |
| 292 }; | 295 }; |
| 293 | 296 |
| 294 static base::HistogramBase* UMASnifferHistogramGet(const char* name, | 297 static base::HistogramBase* UMASnifferHistogramGet(const char* name, |
| 295 int array_size) { | 298 int array_size) { |
| 296 base::HistogramBase* counter = | 299 base::HistogramBase* counter = base::LinearHistogram::FactoryGet( |
| 297 base::LinearHistogram::FactoryGet(name, 1, array_size - 1, array_size, | 300 name, |
| 298 base::HistogramBase::kUmaTargetedHistogramFlag); | 301 1, |
| 302 array_size - 1, | |
| 303 array_size, | |
| 304 base::HistogramBase::kUmaTargetedHistogramFlag); | |
| 299 return counter; | 305 return counter; |
| 300 } | 306 } |
| 301 | 307 |
| 302 // Compare content header to a magic number where magic_entry can contain '.' | 308 // Compare content header to a magic number where magic_entry can contain '.' |
| 303 // for single character of anything, allowing some bytes to be skipped. | 309 // for single character of anything, allowing some bytes to be skipped. |
| 304 static bool MagicCmp(const char* magic_entry, const char* content, size_t len) { | 310 static bool MagicCmp(const char* magic_entry, const char* content, size_t len) { |
| 305 while (len) { | 311 while (len) { |
| 306 if ((*magic_entry != '.') && (*magic_entry != *content)) | 312 if ((*magic_entry != '.') && (*magic_entry != *content)) |
| 307 return false; | 313 return false; |
| 308 ++magic_entry; | 314 ++magic_entry; |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 361 } | 367 } |
| 362 } | 368 } |
| 363 | 369 |
| 364 if (match) { | 370 if (match) { |
| 365 result->assign(magic_entry.mime_type); | 371 result->assign(magic_entry.mime_type); |
| 366 return true; | 372 return true; |
| 367 } | 373 } |
| 368 return false; | 374 return false; |
| 369 } | 375 } |
| 370 | 376 |
| 371 static bool CheckForMagicNumbers(const char* content, size_t size, | 377 static bool CheckForMagicNumbers(const char* content, |
| 372 const MagicNumber* magic, size_t magic_len, | 378 size_t size, |
| 379 const MagicNumber* magic, | |
| 380 size_t magic_len, | |
| 373 base::HistogramBase* counter, | 381 base::HistogramBase* counter, |
| 374 std::string* result) { | 382 std::string* result) { |
| 375 for (size_t i = 0; i < magic_len; ++i) { | 383 for (size_t i = 0; i < magic_len; ++i) { |
| 376 if (MatchMagicNumber(content, size, magic[i], result)) { | 384 if (MatchMagicNumber(content, size, magic[i], result)) { |
| 377 if (counter) counter->Add(static_cast<int>(i)); | 385 if (counter) |
| 386 counter->Add(static_cast<int>(i)); | |
| 378 return true; | 387 return true; |
| 379 } | 388 } |
| 380 } | 389 } |
| 381 return false; | 390 return false; |
| 382 } | 391 } |
| 383 | 392 |
| 384 // Truncates |size| to |max_size| and returns true if |size| is at least | 393 // Truncates |size| to |max_size| and returns true if |size| is at least |
| 385 // |max_size|. | 394 // |max_size|. |
| 386 static bool TruncateSize(const size_t max_size, size_t* size) { | 395 static bool TruncateSize(const size_t max_size, size_t* size) { |
| 387 // Keep kMaxBytesToSniff honest. | 396 // Keep kMaxBytesToSniff honest. |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 411 for (pos = content; pos < end; ++pos) { | 420 for (pos = content; pos < end; ++pos) { |
| 412 if (!IsAsciiWhitespace(*pos)) | 421 if (!IsAsciiWhitespace(*pos)) |
| 413 break; | 422 break; |
| 414 } | 423 } |
| 415 static base::HistogramBase* counter(NULL); | 424 static base::HistogramBase* counter(NULL); |
| 416 if (!counter) { | 425 if (!counter) { |
| 417 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", | 426 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", |
| 418 arraysize(kSniffableTags)); | 427 arraysize(kSniffableTags)); |
| 419 } | 428 } |
| 420 // |pos| now points to first non-whitespace character (or at end). | 429 // |pos| now points to first non-whitespace character (or at end). |
| 421 return CheckForMagicNumbers(pos, end - pos, | 430 return CheckForMagicNumbers(pos, |
| 422 kSniffableTags, arraysize(kSniffableTags), | 431 end - pos, |
| 423 counter, result); | 432 kSniffableTags, |
| 433 arraysize(kSniffableTags), | |
| 434 counter, | |
| 435 result); | |
| 424 } | 436 } |
| 425 | 437 |
| 426 // Returns true and sets result if the content matches any of kMagicNumbers. | 438 // Returns true and sets result if the content matches any of kMagicNumbers. |
| 427 // Clears have_enough_content if more data could possibly change the result. | 439 // Clears have_enough_content if more data could possibly change the result. |
| 428 static bool SniffForMagicNumbers(const char* content, | 440 static bool SniffForMagicNumbers(const char* content, |
| 429 size_t size, | 441 size_t size, |
| 430 bool* have_enough_content, | 442 bool* have_enough_content, |
| 431 std::string* result) { | 443 std::string* result) { |
| 432 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); | 444 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); |
| 433 | 445 |
| 434 // Check our big table of Magic Numbers | 446 // Check our big table of Magic Numbers |
| 435 static base::HistogramBase* counter(NULL); | 447 static base::HistogramBase* counter(NULL); |
| 436 if (!counter) { | 448 if (!counter) { |
| 437 counter = UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2", | 449 counter = UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2", |
| 438 arraysize(kMagicNumbers)); | 450 arraysize(kMagicNumbers)); |
| 439 } | 451 } |
| 440 return CheckForMagicNumbers(content, size, | 452 return CheckForMagicNumbers( |
| 441 kMagicNumbers, arraysize(kMagicNumbers), | 453 content, size, kMagicNumbers, arraysize(kMagicNumbers), counter, result); |
| 442 counter, result); | |
| 443 } | 454 } |
| 444 | 455 |
| 445 // Returns true and sets result if the content matches any of | 456 // Returns true and sets result if the content matches any of |
| 446 // kOfficeMagicNumbers, and the URL has the proper extension. | 457 // kOfficeMagicNumbers, and the URL has the proper extension. |
| 447 // Clears |have_enough_content| if more data could possibly change the result. | 458 // Clears |have_enough_content| if more data could possibly change the result. |
| 448 static bool SniffForOfficeDocs(const char* content, | 459 static bool SniffForOfficeDocs(const char* content, |
| 449 size_t size, | 460 size_t size, |
| 450 const GURL& url, | 461 const GURL& url, |
| 451 bool* have_enough_content, | 462 bool* have_enough_content, |
| 452 std::string* result) { | 463 std::string* result) { |
| 453 *have_enough_content &= TruncateSize(kBytesRequiredForOfficeMagic, &size); | 464 *have_enough_content &= TruncateSize(kBytesRequiredForOfficeMagic, &size); |
| 454 | 465 |
| 455 // Check our table of magic numbers for Office file types. | 466 // Check our table of magic numbers for Office file types. |
| 456 std::string office_version; | 467 std::string office_version; |
| 457 if (!CheckForMagicNumbers(content, size, | 468 if (!CheckForMagicNumbers(content, |
| 458 kOfficeMagicNumbers, arraysize(kOfficeMagicNumbers), | 469 size, |
| 459 NULL, &office_version)) | 470 kOfficeMagicNumbers, |
| 471 arraysize(kOfficeMagicNumbers), | |
| 472 NULL, | |
| 473 &office_version)) | |
| 460 return false; | 474 return false; |
| 461 | 475 |
| 462 OfficeDocType type = DOC_TYPE_NONE; | 476 OfficeDocType type = DOC_TYPE_NONE; |
| 463 for (size_t i = 0; i < arraysize(kOfficeExtensionTypes); ++i) { | 477 for (size_t i = 0; i < arraysize(kOfficeExtensionTypes); ++i) { |
| 464 std::string url_path = url.path(); | 478 std::string url_path = url.path(); |
| 465 | 479 |
| 466 if (url_path.length() < kOfficeExtensionTypes[i].extension_len) | 480 if (url_path.length() < kOfficeExtensionTypes[i].extension_len) |
| 467 continue; | 481 continue; |
| 468 | 482 |
| 469 const char* extension = | 483 const char* extension = |
| 470 &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len]; | 484 &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len]; |
| 471 | 485 |
| 472 if (0 == base::strncasecmp(extension, kOfficeExtensionTypes[i].extension, | 486 if (0 == base::strncasecmp(extension, |
| 487 kOfficeExtensionTypes[i].extension, | |
| 473 kOfficeExtensionTypes[i].extension_len)) { | 488 kOfficeExtensionTypes[i].extension_len)) { |
| 474 type = kOfficeExtensionTypes[i].doc_type; | 489 type = kOfficeExtensionTypes[i].doc_type; |
| 475 break; | 490 break; |
| 476 } | 491 } |
| 477 } | 492 } |
| 478 | 493 |
| 479 if (type == DOC_TYPE_NONE) | 494 if (type == DOC_TYPE_NONE) |
| 480 return false; | 495 return false; |
| 481 | 496 |
| 482 if (office_version == "CFB") { | 497 if (office_version == "CFB") { |
| 483 switch (type) { | 498 switch (type) { |
| 484 case DOC_TYPE_WORD: | 499 case DOC_TYPE_WORD: |
| 485 *result = "application/msword"; | 500 *result = "application/msword"; |
| 486 return true; | 501 return true; |
| 487 case DOC_TYPE_EXCEL: | 502 case DOC_TYPE_EXCEL: |
| 488 *result = "application/vnd.ms-excel"; | 503 *result = "application/vnd.ms-excel"; |
| 489 return true; | 504 return true; |
| 490 case DOC_TYPE_POWERPOINT: | 505 case DOC_TYPE_POWERPOINT: |
| 491 *result = "application/vnd.ms-powerpoint"; | 506 *result = "application/vnd.ms-powerpoint"; |
| 492 return true; | 507 return true; |
| 493 case DOC_TYPE_NONE: | 508 case DOC_TYPE_NONE: |
| 494 NOTREACHED(); | 509 NOTREACHED(); |
| 495 return false; | 510 return false; |
| 496 } | 511 } |
| 497 } else if (office_version == "OOXML") { | 512 } else if (office_version == "OOXML") { |
| 498 switch (type) { | 513 switch (type) { |
| 499 case DOC_TYPE_WORD: | 514 case DOC_TYPE_WORD: |
| 500 *result = "application/vnd.openxmlformats-officedocument." | 515 *result = |
| 501 "wordprocessingml.document"; | 516 "application/vnd.openxmlformats-officedocument." |
| 517 "wordprocessingml.document"; | |
| 502 return true; | 518 return true; |
| 503 case DOC_TYPE_EXCEL: | 519 case DOC_TYPE_EXCEL: |
| 504 *result = "application/vnd.openxmlformats-officedocument." | 520 *result = |
| 505 "spreadsheetml.sheet"; | 521 "application/vnd.openxmlformats-officedocument." |
| 522 "spreadsheetml.sheet"; | |
| 506 return true; | 523 return true; |
| 507 case DOC_TYPE_POWERPOINT: | 524 case DOC_TYPE_POWERPOINT: |
| 508 *result = "application/vnd.openxmlformats-officedocument." | 525 *result = |
| 509 "presentationml.presentation"; | 526 "application/vnd.openxmlformats-officedocument." |
| 527 "presentationml.presentation"; | |
| 510 return true; | 528 return true; |
| 511 case DOC_TYPE_NONE: | 529 case DOC_TYPE_NONE: |
| 512 NOTREACHED(); | 530 NOTREACHED(); |
| 513 return false; | 531 return false; |
| 514 } | 532 } |
| 515 } | 533 } |
| 516 | 534 |
| 517 NOTREACHED(); | 535 NOTREACHED(); |
| 518 return false; | 536 return false; |
| 519 } | 537 } |
| 520 | 538 |
| 521 static bool IsOfficeType(const std::string& type_hint) { | 539 static bool IsOfficeType(const std::string& type_hint) { |
| 522 return (type_hint == "application/msword" || | 540 return (type_hint == "application/msword" || |
| 523 type_hint == "application/vnd.ms-excel" || | 541 type_hint == "application/vnd.ms-excel" || |
| 524 type_hint == "application/vnd.ms-powerpoint" || | 542 type_hint == "application/vnd.ms-powerpoint" || |
| 525 type_hint == "application/vnd.openxmlformats-officedocument." | 543 type_hint == |
| 526 "wordprocessingml.document" || | 544 "application/vnd.openxmlformats-officedocument." |
| 527 type_hint == "application/vnd.openxmlformats-officedocument." | 545 "wordprocessingml.document" || |
| 528 "spreadsheetml.sheet" || | 546 type_hint == |
| 529 type_hint == "application/vnd.openxmlformats-officedocument." | 547 "application/vnd.openxmlformats-officedocument." |
| 530 "presentationml.presentation" || | 548 "spreadsheetml.sheet" || |
| 549 type_hint == | |
| 550 "application/vnd.openxmlformats-officedocument." | |
| 551 "presentationml.presentation" || | |
| 531 type_hint == "application/vnd.ms-excel.sheet.macroenabled.12" || | 552 type_hint == "application/vnd.ms-excel.sheet.macroenabled.12" || |
| 532 type_hint == "application/vnd.ms-word.document.macroenabled.12" || | 553 type_hint == "application/vnd.ms-word.document.macroenabled.12" || |
| 533 type_hint == "application/vnd.ms-powerpoint.presentation." | 554 type_hint == |
| 534 "macroenabled.12" || | 555 "application/vnd.ms-powerpoint.presentation." |
| 556 "macroenabled.12" || | |
| 535 type_hint == "application/mspowerpoint" || | 557 type_hint == "application/mspowerpoint" || |
| 536 type_hint == "application/msexcel" || | 558 type_hint == "application/msexcel" || |
| 537 type_hint == "application/vnd.ms-word" || | 559 type_hint == "application/vnd.ms-word" || |
| 538 type_hint == "application/vnd.ms-word.document.12" || | 560 type_hint == "application/vnd.ms-word.document.12" || |
| 539 type_hint == "application/vnd.msword"); | 561 type_hint == "application/vnd.msword"); |
| 540 } | 562 } |
| 541 | 563 |
| 542 // This function checks for files that have a Microsoft Office MIME type | 564 // This function checks for files that have a Microsoft Office MIME type |
| 543 // set, but are not actually Office files. | 565 // set, but are not actually Office files. |
| 544 // | 566 // |
| 545 // If this is not actually an Office file, |*result| is set to | 567 // If this is not actually an Office file, |*result| is set to |
| 546 // "application/octet-stream", otherwise it is not modified. | 568 // "application/octet-stream", otherwise it is not modified. |
| 547 // | 569 // |
| 548 // Returns false if additional data is required to determine the file type, or | 570 // Returns false if additional data is required to determine the file type, or |
| 549 // true if there is enough data to make a decision. | 571 // true if there is enough data to make a decision. |
| 550 static bool SniffForInvalidOfficeDocs(const char* content, | 572 static bool SniffForInvalidOfficeDocs(const char* content, |
| 551 size_t size, | 573 size_t size, |
| 552 const GURL& url, | 574 const GURL& url, |
| 553 std::string* result) { | 575 std::string* result) { |
| 554 if (!TruncateSize(kBytesRequiredForOfficeMagic, &size)) | 576 if (!TruncateSize(kBytesRequiredForOfficeMagic, &size)) |
| 555 return false; | 577 return false; |
| 556 | 578 |
| 557 // Check our table of magic numbers for Office file types. If it does not | 579 // Check our table of magic numbers for Office file types. If it does not |
| 558 // match one, the MIME type was invalid. Set it instead to a safe value. | 580 // match one, the MIME type was invalid. Set it instead to a safe value. |
| 559 std::string office_version; | 581 std::string office_version; |
| 560 if (!CheckForMagicNumbers(content, size, | 582 if (!CheckForMagicNumbers(content, |
| 561 kOfficeMagicNumbers, arraysize(kOfficeMagicNumbers), | 583 size, |
| 562 NULL, &office_version)) { | 584 kOfficeMagicNumbers, |
| 585 arraysize(kOfficeMagicNumbers), | |
| 586 NULL, | |
| 587 &office_version)) { | |
| 563 *result = "application/octet-stream"; | 588 *result = "application/octet-stream"; |
| 564 } | 589 } |
| 565 | 590 |
| 566 // We have enough information to determine if this was a Microsoft Office | 591 // We have enough information to determine if this was a Microsoft Office |
| 567 // document or not, so sniffing is completed. | 592 // document or not, so sniffing is completed. |
| 568 return true; | 593 return true; |
| 569 } | 594 } |
| 570 | 595 |
| 571 // Byte order marks | 596 // Byte order marks |
| 572 static const MagicNumber kMagicXML[] = { | 597 static const MagicNumber kMagicXML[] = { |
| 573 // We want to be very conservative in interpreting text/xml content as | 598 // We want to be very conservative in interpreting text/xml content as |
| 574 // XHTML -- we just want to sniff enough to make unit tests pass. | 599 // XHTML -- we just want to sniff enough to make unit tests pass. |
| 575 // So we match explicitly on this, and don't match other ways of writing | 600 // So we match explicitly on this, and don't match other ways of writing |
| 576 // it in semantically-equivalent ways. | 601 // it in semantically-equivalent ways. |
| 577 MAGIC_STRING("application/xhtml+xml", | 602 MAGIC_STRING("application/xhtml+xml", |
| 578 "<html xmlns=\"http://www.w3.org/1999/xhtml\"") | 603 "<html xmlns=\"http://www.w3.org/1999/xhtml\"") |
| 579 MAGIC_STRING("application/atom+xml", "<feed") | 604 MAGIC_STRING("application/atom+xml", "<feed") |
| 580 MAGIC_STRING("application/rss+xml", "<rss") // UTF-8 | 605 MAGIC_STRING("application/rss+xml", "<rss") // UTF-8 |
| 581 }; | 606 }; |
| 582 | 607 |
| 583 // Returns true and sets result if the content appears to contain XHTML or a | 608 // Returns true and sets result if the content appears to contain XHTML or a |
| 584 // feed. | 609 // feed. |
| 585 // Clears have_enough_content if more data could possibly change the result. | 610 // Clears have_enough_content if more data could possibly change the result. |
| 586 // | 611 // |
| 587 // TODO(evanm): this is similar but more conservative than what Safari does, | 612 // TODO(evanm): this is similar but more conservative than what Safari does, |
| 588 // while HTML5 has a different recommendation -- what should we do? | 613 // while HTML5 has a different recommendation -- what should we do? |
| 589 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset | 614 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset |
| 590 // of ASCII -- do we care? | 615 // of ASCII -- do we care? |
| 591 static bool SniffXML(const char* content, | 616 static bool SniffXML(const char* content, |
| 592 size_t size, | 617 size_t size, |
| 593 bool* have_enough_content, | 618 bool* have_enough_content, |
| 594 std::string* result) { | 619 std::string* result) { |
| 595 // We allow at most 300 bytes of content before we expect the opening tag. | 620 // We allow at most 300 bytes of content before we expect the opening tag. |
| 596 *have_enough_content &= TruncateSize(300, &size); | 621 *have_enough_content &= TruncateSize(300, &size); |
| 597 const char* pos = content; | 622 const char* pos = content; |
| 598 const char* const end = content + size; | 623 const char* const end = content + size; |
| 599 | 624 |
| 600 // This loop iterates through tag-looking offsets in the file. | 625 // This loop iterates through tag-looking offsets in the file. |
| 601 // We want to skip XML processing instructions (of the form "<?xml ...") | 626 // We want to skip XML processing instructions (of the form "<?xml ...") |
| 602 // and stop at the first "plain" tag, then make a decision on the mime-type | 627 // and stop at the first "plain" tag, then make a decision on the mime-type |
| 603 // based on the name (or possibly attributes) of that tag. | 628 // based on the name (or possibly attributes) of that tag. |
| 604 static base::HistogramBase* counter(NULL); | 629 static base::HistogramBase* counter(NULL); |
| 605 if (!counter) { | 630 if (!counter) { |
| 606 counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2", | 631 counter = |
| 607 arraysize(kMagicXML)); | 632 UMASnifferHistogramGet("mime_sniffer.kMagicXML2", arraysize(kMagicXML)); |
| 608 } | 633 } |
| 609 const int kMaxTagIterations = 5; | 634 const int kMaxTagIterations = 5; |
| 610 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { | 635 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { |
| 611 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); | 636 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); |
| 612 if (!pos) | 637 if (!pos) |
| 613 return false; | 638 return false; |
| 614 | 639 |
| 615 if ((pos + sizeof("<?xml") - 1 <= end) && | 640 if ((pos + sizeof("<?xml") - 1 <= end) && |
| 616 (base::strncasecmp(pos, "<?xml", sizeof("<?xml") - 1) == 0)) { | 641 (base::strncasecmp(pos, "<?xml", sizeof("<?xml") - 1) == 0)) { |
| 617 // Skip XML declarations. | 642 // Skip XML declarations. |
| 618 ++pos; | 643 ++pos; |
| 619 continue; | 644 continue; |
| 620 } else if ((pos + sizeof("<!DOCTYPE") - 1 <= end) && | 645 } else if ((pos + sizeof("<!DOCTYPE") - 1 <= end) && |
| 621 (base::strncasecmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE") - 1) == | 646 (base::strncasecmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE") - 1) == |
| 622 0)) { | 647 0)) { |
| 623 // Skip DOCTYPE declarations. | 648 // Skip DOCTYPE declarations. |
| 624 ++pos; | 649 ++pos; |
| 625 continue; | 650 continue; |
| 626 } | 651 } |
| 627 | 652 |
| 628 if (CheckForMagicNumbers(pos, end - pos, | 653 if (CheckForMagicNumbers( |
| 629 kMagicXML, arraysize(kMagicXML), | 654 pos, end - pos, kMagicXML, arraysize(kMagicXML), counter, result)) |
| 630 counter, result)) | |
| 631 return true; | 655 return true; |
| 632 | 656 |
| 633 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult | 657 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult |
| 634 // to identify. | 658 // to identify. |
| 635 | 659 |
| 636 // If we get here, we've hit an initial tag that hasn't matched one of the | 660 // If we get here, we've hit an initial tag that hasn't matched one of the |
| 637 // above tests. Abort. | 661 // above tests. Abort. |
| 638 return true; | 662 return true; |
| 639 } | 663 } |
| 640 | 664 |
| 641 // We iterated too far without finding a start tag. | 665 // We iterated too far without finding a start tag. |
| 642 // If we have more content to look at, we aren't going to change our mind by | 666 // If we have more content to look at, we aren't going to change our mind by |
| 643 // seeing more bytes from the network. | 667 // seeing more bytes from the network. |
| 644 return pos < end; | 668 return pos < end; |
| 645 } | 669 } |
| 646 | 670 |
| 647 // Byte order marks | 671 // Byte order marks |
| 648 static const MagicNumber kByteOrderMark[] = { | 672 static const MagicNumber kByteOrderMark[] = { |
| 649 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE | 673 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE |
| 650 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE | 674 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE |
| 651 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 | 675 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8 |
| 652 }; | 676 }; |
| 653 | 677 |
| 654 // Whether a given byte looks like it might be part of binary content. | 678 // Whether a given byte looks like it might be part of binary content. |
| 655 // Source: HTML5 spec | 679 // Source: HTML5 spec |
| 656 static char kByteLooksBinary[] = { | 680 static char kByteLooksBinary[] = { |
| 657 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F | 681 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F |
|
davidben
2014/10/10 20:24:16
Is this a clang-format bug? google-c-style.el says
| |
| 658 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F | 682 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F |
| 659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F | 683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F |
| 660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F | 684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F |
| 661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F | 685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F |
| 662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F | 686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F |
| 663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F | 687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F |
| 664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F | 688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F |
| 665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F | 689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F |
| 666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F | 690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F |
| 667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF | 691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF |
| 668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF | 692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF |
| 669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF | 693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF |
| 670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF | 694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF |
| 671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF | 695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF |
| 672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF | 696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF |
| 673 }; | 697 }; |
| 674 | 698 |
| 675 // Returns true and sets result to "application/octet-stream" if the content | 699 // Returns true and sets result to "application/octet-stream" if the content |
| 676 // appears to be binary data. Otherwise, returns false and sets "text/plain". | 700 // appears to be binary data. Otherwise, returns false and sets "text/plain". |
| 677 // Clears have_enough_content if more data could possibly change the result. | 701 // Clears have_enough_content if more data could possibly change the result. |
| 678 static bool SniffBinary(const char* content, | 702 static bool SniffBinary(const char* content, |
| 679 size_t size, | 703 size_t size, |
| 680 bool* have_enough_content, | 704 bool* have_enough_content, |
| 681 std::string* result) { | 705 std::string* result) { |
| 682 // There is no concensus about exactly how to sniff for binary content. | 706 // There is no concensus about exactly how to sniff for binary content. |
| 683 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. | 707 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. |
| 684 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. | 708 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. |
| 685 // Here, we side with FF, but with a smaller buffer. This size was chosen | 709 // Here, we side with FF, but with a smaller buffer. This size was chosen |
| 686 // because it is small enough to comfortably fit into a single packet (after | 710 // because it is small enough to comfortably fit into a single packet (after |
| 687 // allowing for headers) and yet large enough to account for binary formats | 711 // allowing for headers) and yet large enough to account for binary formats |
| 688 // that have a significant amount of ASCII at the beginning (crbug.com/15314). | 712 // that have a significant amount of ASCII at the beginning (crbug.com/15314). |
| 689 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size); | 713 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size); |
| 690 | 714 |
| 691 // First, we look for a BOM. | 715 // First, we look for a BOM. |
| 692 static base::HistogramBase* counter(NULL); | 716 static base::HistogramBase* counter(NULL); |
| 693 if (!counter) { | 717 if (!counter) { |
| 694 counter = UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2", | 718 counter = UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2", |
| 695 arraysize(kByteOrderMark)); | 719 arraysize(kByteOrderMark)); |
| 696 } | 720 } |
| 697 std::string unused; | 721 std::string unused; |
| 698 if (CheckForMagicNumbers(content, size, | 722 if (CheckForMagicNumbers(content, |
| 699 kByteOrderMark, arraysize(kByteOrderMark), | 723 size, |
| 700 counter, &unused)) { | 724 kByteOrderMark, |
| 725 arraysize(kByteOrderMark), | |
| 726 counter, | |
| 727 &unused)) { | |
| 701 // If there is BOM, we think the buffer is not binary. | 728 // If there is BOM, we think the buffer is not binary. |
| 702 result->assign("text/plain"); | 729 result->assign("text/plain"); |
| 703 return false; | 730 return false; |
| 704 } | 731 } |
| 705 | 732 |
| 706 // Next we look to see if any of the bytes "look binary." | 733 // Next we look to see if any of the bytes "look binary." |
| 707 for (size_t i = 0; i < size; ++i) { | 734 for (size_t i = 0; i < size; ++i) { |
| 708 // If we a see a binary-looking byte, we think the content is binary. | 735 // If we a see a binary-looking byte, we think the content is binary. |
| 709 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) { | 736 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) { |
| 710 result->assign("application/octet-stream"); | 737 result->assign("application/octet-stream"); |
| 711 return true; | 738 return true; |
| 712 } | 739 } |
| 713 } | 740 } |
| 714 | 741 |
| 715 // No evidence either way. Default to non-binary and, if truncated, clear | 742 // No evidence either way. Default to non-binary and, if truncated, clear |
| 716 // have_enough_content because there could be a binary looking byte in the | 743 // have_enough_content because there could be a binary looking byte in the |
| 717 // truncated data. | 744 // truncated data. |
| 718 *have_enough_content &= is_truncated; | 745 *have_enough_content &= is_truncated; |
| 719 result->assign("text/plain"); | 746 result->assign("text/plain"); |
| 720 return false; | 747 return false; |
| 721 } | 748 } |
| 722 | 749 |
| 723 static bool IsUnknownMimeType(const std::string& mime_type) { | 750 static bool IsUnknownMimeType(const std::string& mime_type) { |
| 724 // TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here. | 751 // TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here. |
| 725 // If we do, please be careful not to alter the semantics at all. | 752 // If we do, please be careful not to alter the semantics at all. |
| 726 static const char* kUnknownMimeTypes[] = { | 753 static const char* kUnknownMimeTypes[] = { |
| 727 // Empty mime types are as unknown as they get. | 754 // Empty mime types are as unknown as they get. |
| 728 "", | 755 "", |
| 729 // The unknown/unknown type is popular and uninformative | 756 // The unknown/unknown type is popular and uninformative |
| 730 "unknown/unknown", | 757 "unknown/unknown", |
| 731 // The second most popular unknown mime type is application/unknown | 758 // The second most popular unknown mime type is application/unknown |
| 732 "application/unknown", | 759 "application/unknown", |
| 733 // Firefox rejects a mime type if it is exactly */* | 760 // Firefox rejects a mime type if it is exactly */* |
| 734 "*/*", | 761 "*/*", |
| 735 }; | 762 }; |
| 736 static base::HistogramBase* counter(NULL); | 763 static base::HistogramBase* counter(NULL); |
| 737 if (!counter) { | 764 if (!counter) { |
| 738 counter = UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2", | 765 counter = UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2", |
| 739 arraysize(kUnknownMimeTypes) + 1); | 766 arraysize(kUnknownMimeTypes) + 1); |
| 740 } | 767 } |
| 741 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) { | 768 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) { |
| 742 if (mime_type == kUnknownMimeTypes[i]) { | 769 if (mime_type == kUnknownMimeTypes[i]) { |
| 743 counter->Add(i); | 770 counter->Add(i); |
| 744 return true; | 771 return true; |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 766 counter = UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3); | 793 counter = UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3); |
| 767 | 794 |
| 768 // Technically, the crx magic number is just Cr24, but the bytes after that | 795 // Technically, the crx magic number is just Cr24, but the bytes after that |
| 769 // are a version number which changes infrequently. Including it in the | 796 // are a version number which changes infrequently. Including it in the |
| 770 // sniffing gives us less room for error. If the version number ever changes, | 797 // sniffing gives us less room for error. If the version number ever changes, |
| 771 // we can just add an entry to this list. | 798 // we can just add an entry to this list. |
| 772 // | 799 // |
| 773 // TODO(aa): If we ever have another magic number, we'll want to pass a | 800 // TODO(aa): If we ever have another magic number, we'll want to pass a |
| 774 // histogram into CheckForMagicNumbers(), below, to see which one matched. | 801 // histogram into CheckForMagicNumbers(), below, to see which one matched. |
| 775 static const struct MagicNumber kCRXMagicNumbers[] = { | 802 static const struct MagicNumber kCRXMagicNumbers[] = { |
| 776 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00") | 803 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")}; |
|
davidben
2014/10/10 20:24:16
I feel like the newline between 776 and 777 should
| |
| 777 }; | |
| 778 | 804 |
| 779 // Only consider files that have the extension ".crx". | 805 // Only consider files that have the extension ".crx". |
| 780 static const char kCRXExtension[] = ".crx"; | 806 static const char kCRXExtension[] = ".crx"; |
| 781 // Ignore null by subtracting 1. | 807 // Ignore null by subtracting 1. |
| 782 static const int kExtensionLength = arraysize(kCRXExtension) - 1; | 808 static const int kExtensionLength = arraysize(kCRXExtension) - 1; |
| 783 if (url.path().rfind(kCRXExtension, std::string::npos, kExtensionLength) == | 809 if (url.path().rfind(kCRXExtension, std::string::npos, kExtensionLength) == |
| 784 url.path().size() - kExtensionLength) { | 810 url.path().size() - kExtensionLength) { |
| 785 counter->Add(1); | 811 counter->Add(1); |
| 786 } else { | 812 } else { |
| 787 return false; | 813 return false; |
| 788 } | 814 } |
| 789 | 815 |
| 790 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); | 816 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); |
| 791 if (CheckForMagicNumbers(content, size, | 817 if (CheckForMagicNumbers(content, |
| 792 kCRXMagicNumbers, arraysize(kCRXMagicNumbers), | 818 size, |
| 793 NULL, result)) { | 819 kCRXMagicNumbers, |
| 820 arraysize(kCRXMagicNumbers), | |
| 821 NULL, | |
| 822 result)) { | |
| 794 counter->Add(2); | 823 counter->Add(2); |
| 795 } else { | 824 } else { |
| 796 return false; | 825 return false; |
| 797 } | 826 } |
| 798 | 827 |
| 799 return true; | 828 return true; |
| 800 } | 829 } |
| 801 | 830 |
| 802 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { | 831 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { |
| 803 static base::HistogramBase* should_sniff_counter(NULL); | 832 static base::HistogramBase* should_sniff_counter(NULL); |
| 804 if (!should_sniff_counter) { | 833 if (!should_sniff_counter) { |
| 805 should_sniff_counter = | 834 should_sniff_counter = |
| 806 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3); | 835 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3); |
| 807 } | 836 } |
| 808 bool sniffable_scheme = url.is_empty() || | 837 bool sniffable_scheme = url.is_empty() || url.SchemeIsHTTPOrHTTPS() || |
| 809 url.SchemeIsHTTPOrHTTPS() || | |
| 810 url.SchemeIs("ftp") || | 838 url.SchemeIs("ftp") || |
| 811 #if defined(OS_ANDROID) | 839 #if defined(OS_ANDROID) |
| 812 url.SchemeIs("content") || | 840 url.SchemeIs("content") || |
| 813 #endif | 841 #endif |
| 814 url.SchemeIsFile() || | 842 url.SchemeIsFile() || url.SchemeIsFileSystem(); |
| 815 url.SchemeIsFileSystem(); | |
| 816 if (!sniffable_scheme) { | 843 if (!sniffable_scheme) { |
| 817 should_sniff_counter->Add(1); | 844 should_sniff_counter->Add(1); |
| 818 return false; | 845 return false; |
| 819 } | 846 } |
| 820 | 847 |
| 821 static const char* kSniffableTypes[] = { | 848 static const char* kSniffableTypes[] = { |
| 822 // Many web servers are misconfigured to send text/plain for many | 849 // Many web servers are misconfigured to send text/plain for many |
| 823 // different types of content. | 850 // different types of content. |
| 824 "text/plain", | 851 "text/plain", |
| 825 // We want to sniff application/octet-stream for | 852 // We want to sniff application/octet-stream for |
| 826 // application/x-chrome-extension, but nothing else. | 853 // application/x-chrome-extension, but nothing else. |
| 827 "application/octet-stream", | 854 "application/octet-stream", |
| 828 // XHTML and Atom/RSS feeds are often served as plain xml instead of | 855 // XHTML and Atom/RSS feeds are often served as plain xml instead of |
| 829 // their more specific mime types. | 856 // their more specific mime types. |
| 830 "text/xml", | 857 "text/xml", |
| 831 "application/xml", | 858 "application/xml", |
| 832 // Check for false Microsoft Office MIME types. | 859 // Check for false Microsoft Office MIME types. |
| 833 "application/msword", | 860 "application/msword", |
| 834 "application/vnd.ms-excel", | 861 "application/vnd.ms-excel", |
| 835 "application/vnd.ms-powerpoint", | 862 "application/vnd.ms-powerpoint", |
| 836 "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | 863 "application/vnd.openxmlformats-officedocument.wordprocessingml.document", |
| 837 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | 864 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
| 838 "application/vnd.openxmlformats-officedocument.presentationml.presentation", | 865 "application/" |
| 839 "application/vnd.ms-excel.sheet.macroenabled.12", | 866 "vnd.openxmlformats-officedocument.presentationml.presentation", |
|
davidben
2014/10/10 20:24:16
This is kind of unfortunate. Worth a clang-format
| |
| 840 "application/vnd.ms-word.document.macroenabled.12", | 867 "application/vnd.ms-excel.sheet.macroenabled.12", |
| 841 "application/vnd.ms-powerpoint.presentation.macroenabled.12", | 868 "application/vnd.ms-word.document.macroenabled.12", |
| 842 "application/mspowerpoint", | 869 "application/vnd.ms-powerpoint.presentation.macroenabled.12", |
| 843 "application/msexcel", | 870 "application/mspowerpoint", |
| 844 "application/vnd.ms-word", | 871 "application/msexcel", |
| 845 "application/vnd.ms-word.document.12", | 872 "application/vnd.ms-word", |
| 846 "application/vnd.msword", | 873 "application/vnd.ms-word.document.12", |
| 874 "application/vnd.msword", | |
| 847 }; | 875 }; |
| 848 static base::HistogramBase* counter(NULL); | 876 static base::HistogramBase* counter(NULL); |
| 849 if (!counter) { | 877 if (!counter) { |
| 850 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2", | 878 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2", |
| 851 arraysize(kSniffableTypes) + 1); | 879 arraysize(kSniffableTypes) + 1); |
| 852 } | 880 } |
| 853 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { | 881 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { |
| 854 if (mime_type == kSniffableTypes[i]) { | 882 if (mime_type == kSniffableTypes[i]) { |
| 855 counter->Add(i); | 883 counter->Add(i); |
| 856 should_sniff_counter->Add(2); | 884 should_sniff_counter->Add(2); |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 925 // We're not interested in sniffing these types for images and the like. | 953 // We're not interested in sniffing these types for images and the like. |
| 926 // Instead, we're looking explicitly for a feed. If we don't find one | 954 // Instead, we're looking explicitly for a feed. If we don't find one |
| 927 // we're done and return early. | 955 // we're done and return early. |
| 928 if (SniffXML(content, content_size, &have_enough_content, result)) | 956 if (SniffXML(content, content_size, &have_enough_content, result)) |
| 929 return true; | 957 return true; |
| 930 return have_enough_content; | 958 return have_enough_content; |
| 931 } | 959 } |
| 932 | 960 |
| 933 // CRX files (Chrome extensions) have a special sniffing algorithm. It is | 961 // CRX files (Chrome extensions) have a special sniffing algorithm. It is |
| 934 // tighter than the others because we don't have to match legacy behavior. | 962 // tighter than the others because we don't have to match legacy behavior. |
| 935 if (SniffCRX(content, content_size, url, type_hint, | 963 if (SniffCRX( |
| 936 &have_enough_content, result)) | 964 content, content_size, url, type_hint, &have_enough_content, result)) |
| 937 return true; | 965 return true; |
| 938 | 966 |
| 939 // Check the file extension and magic numbers to see if this is an Office | 967 // Check the file extension and magic numbers to see if this is an Office |
| 940 // document. This needs to be checked before the general magic numbers | 968 // document. This needs to be checked before the general magic numbers |
| 941 // because zip files and Office documents (OOXML) have the same magic number. | 969 // because zip files and Office documents (OOXML) have the same magic number. |
| 942 if (SniffForOfficeDocs(content, content_size, url, | 970 if (SniffForOfficeDocs( |
| 943 &have_enough_content, result)) | 971 content, content_size, url, &have_enough_content, result)) |
| 944 return true; // We've matched a magic number. No more content needed. | 972 return true; // We've matched a magic number. No more content needed. |
| 945 | 973 |
| 946 // We're not interested in sniffing for magic numbers when the type_hint | 974 // We're not interested in sniffing for magic numbers when the type_hint |
| 947 // is application/octet-stream. Time to bail out. | 975 // is application/octet-stream. Time to bail out. |
| 948 if (type_hint == "application/octet-stream") | 976 if (type_hint == "application/octet-stream") |
| 949 return have_enough_content; | 977 return have_enough_content; |
| 950 | 978 |
| 951 // Now we look in our large table of magic numbers to see if we can find | 979 // Now we look in our large table of magic numbers to see if we can find |
| 952 // anything that matches the content. | 980 // anything that matches the content. |
| 953 if (SniffForMagicNumbers(content, content_size, | 981 if (SniffForMagicNumbers(content, content_size, &have_enough_content, result)) |
| 954 &have_enough_content, result)) | |
| 955 return true; // We've matched a magic number. No more content needed. | 982 return true; // We've matched a magic number. No more content needed. |
| 956 | 983 |
| 957 return have_enough_content; | 984 return have_enough_content; |
| 958 } | 985 } |
| 959 | 986 |
| 960 bool SniffMimeTypeFromLocalData(const char* content, | 987 bool SniffMimeTypeFromLocalData(const char* content, |
| 961 size_t size, | 988 size_t size, |
| 962 std::string* result) { | 989 std::string* result) { |
| 963 // First check the extra table. | 990 // First check the extra table. |
| 964 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, | 991 if (CheckForMagicNumbers(content, |
| 965 arraysize(kExtraMagicNumbers), NULL, result)) | 992 size, |
| 993 kExtraMagicNumbers, | |
| 994 arraysize(kExtraMagicNumbers), | |
| 995 NULL, | |
| 996 result)) | |
| 966 return true; | 997 return true; |
| 967 // Finally check the original table. | 998 // Finally check the original table. |
| 968 return CheckForMagicNumbers(content, size, kMagicNumbers, | 999 return CheckForMagicNumbers( |
| 969 arraysize(kMagicNumbers), NULL, result); | 1000 content, size, kMagicNumbers, arraysize(kMagicNumbers), NULL, result); |
| 970 } | 1001 } |
| 971 | 1002 |
| 972 } // namespace net | 1003 } // namespace net |
| OLD | NEW |