| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
| 6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
| 7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
| 8 // | 8 // |
| 9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
| 10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 107 | 107 |
| 108 // The number of content bytes we need to use all our magic numbers. Feel free | 108 // The number of content bytes we need to use all our magic numbers. Feel free |
| 109 // to increase this number if you add a longer magic number. | 109 // to increase this number if you add a longer magic number. |
| 110 static const size_t kBytesRequiredForMagic = 42; | 110 static const size_t kBytesRequiredForMagic = 42; |
| 111 | 111 |
| 112 struct MagicNumber { | 112 struct MagicNumber { |
| 113 const char* mime_type; | 113 const char* mime_type; |
| 114 const char* magic; | 114 const char* magic; |
| 115 size_t magic_len; | 115 size_t magic_len; |
| 116 bool is_string; | 116 bool is_string; |
| 117 const char* mask; // if set, must have same length as |magic| |
| 117 }; | 118 }; |
| 118 | 119 |
| 119 #define MAGIC_NUMBER(mime_type, magic) \ | 120 #define MAGIC_NUMBER(mime_type, magic) \ |
| 120 { (mime_type), (magic), sizeof(magic)-1, false }, | 121 { (mime_type), (magic), sizeof(magic)-1, false, NULL }, |
| 122 |
| 123 template <int MagicSize, int MaskSize> |
| 124 class VerifySizes { |
| 125 COMPILE_ASSERT(MagicSize == MaskSize, sizes_must_be_equal); |
| 126 public: |
| 127 enum { SIZES = MagicSize }; |
| 128 }; |
| 129 |
| 130 #define verified_sizeof(magic, mask) \ |
| 131 VerifySizes<sizeof(magic), sizeof(mask)>::SIZES |
| 132 |
| 133 #define MAGIC_MASK(mime_type, magic, mask) \ |
| 134 { (mime_type), (magic), verified_sizeof(magic, mask)-1, false, (mask) }, |
| 121 | 135 |
| 122 // Magic strings are case insensitive and must not include '\0' characters | 136 // Magic strings are case insensitive and must not include '\0' characters |
| 123 #define MAGIC_STRING(mime_type, magic) \ | 137 #define MAGIC_STRING(mime_type, magic) \ |
| 124 { (mime_type), (magic), sizeof(magic)-1, true }, | 138 { (mime_type), (magic), sizeof(magic)-1, true, NULL }, |
| 125 | 139 |
| 126 static const MagicNumber kMagicNumbers[] = { | 140 static const MagicNumber kMagicNumbers[] = { |
| 127 // Source: HTML 5 specification | 141 // Source: HTML 5 specification |
| 128 MAGIC_NUMBER("application/pdf", "%PDF-") | 142 MAGIC_NUMBER("application/pdf", "%PDF-") |
| 129 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") | 143 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") |
| 130 MAGIC_NUMBER("image/gif", "GIF87a") | 144 MAGIC_NUMBER("image/gif", "GIF87a") |
| 131 MAGIC_NUMBER("image/gif", "GIF89a") | 145 MAGIC_NUMBER("image/gif", "GIF89a") |
| 132 MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A") | 146 MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A") |
| 133 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") | 147 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") |
| 134 MAGIC_NUMBER("image/bmp", "BM") | 148 MAGIC_NUMBER("image/bmp", "BM") |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 169 // | 183 // |
| 170 // Cons: | 184 // Cons: |
| 171 // * These patterns are fairly weak | 185 // * These patterns are fairly weak |
| 172 // * If we mistakenly decide something is Flash, we will execute it | 186 // * If we mistakenly decide something is Flash, we will execute it |
| 173 // in the origin of an unsuspecting site. This could be a security | 187 // in the origin of an unsuspecting site. This could be a security |
| 174 // vulnerability if the site allows users to upload content. | 188 // vulnerability if the site allows users to upload content. |
| 175 // | 189 // |
| 176 // On balance, we do not include these patterns. | 190 // On balance, we do not include these patterns. |
| 177 }; | 191 }; |
| 178 | 192 |
| 193 static const MagicNumber kExtraMagicNumbers[] = { |
| 194 MAGIC_NUMBER("image/x-xbitmap", "#define") |
| 195 MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00") |
| 196 MAGIC_NUMBER("image/svg+xml", "<?xml_version=") |
| 197 MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ") |
| 198 MAGIC_NUMBER("video/avi", "RIFF....AVI LIST") |
| 199 MAGIC_NUMBER("audio/ogg", "OggS") |
| 200 MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0") |
| 201 MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0") |
| 202 MAGIC_NUMBER("video/3gpp", "....ftyp3g") |
| 203 MAGIC_NUMBER("video/3gpp", "....ftypavcl") |
| 204 MAGIC_NUMBER("video/mp4", "....ftyp") |
| 205 MAGIC_NUMBER("video/quicktime", "MOVI") |
| 206 MAGIC_NUMBER("application/x-shockwave-flash", "CWS") |
| 207 MAGIC_NUMBER("application/x-shockwave-flash", "FWS") |
| 208 MAGIC_NUMBER("video/x-flv", "FLV") |
| 209 }; |
| 210 |
| 179 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will | 211 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will |
| 180 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is | 212 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is |
| 181 // HTML, but we will not. | 213 // HTML, but we will not. |
| 182 | 214 |
| 183 #define MAGIC_HTML_TAG(tag) \ | 215 #define MAGIC_HTML_TAG(tag) \ |
| 184 MAGIC_STRING("text/html", "<" tag) | 216 MAGIC_STRING("text/html", "<" tag) |
| 185 | 217 |
| 186 static const MagicNumber kSniffableTags[] = { | 218 static const MagicNumber kSniffableTags[] = { |
| 187 // XML processing directive. Although this is not an HTML mime type, we sniff | 219 // XML processing directive. Although this is not an HTML mime type, we sniff |
| 188 // for this in the HTML phase because text/xml is just as powerful as HTML and | 220 // for this in the HTML phase because text/xml is just as powerful as HTML and |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 223 while (len) { | 255 while (len) { |
| 224 if ((*magic_entry != '.') && (*magic_entry != *content)) | 256 if ((*magic_entry != '.') && (*magic_entry != *content)) |
| 225 return false; | 257 return false; |
| 226 ++magic_entry; | 258 ++magic_entry; |
| 227 ++content; | 259 ++content; |
| 228 --len; | 260 --len; |
| 229 } | 261 } |
| 230 return true; | 262 return true; |
| 231 } | 263 } |
| 232 | 264 |
| 233 static bool MatchMagicNumber(const char* content, size_t size, | 265 // Like MagicCmp() except that it ANDs each byte with a mask before |
| 266 // the comparison, because there are some bits we don't care about. |
| 267 static bool MagicMaskCmp(const char* magic_entry, |
| 268 const char* content, |
| 269 size_t len, |
| 270 const char* mask) { |
| 271 while (len) { |
| 272 if ((*magic_entry != '.') && (*magic_entry != (*mask & *content))) |
| 273 return false; |
| 274 ++magic_entry; |
| 275 ++content; |
| 276 ++mask; |
| 277 --len; |
| 278 } |
| 279 return true; |
| 280 } |
| 281 |
| 282 static bool MatchMagicNumber(const char* content, |
| 283 size_t size, |
| 234 const MagicNumber* magic_entry, | 284 const MagicNumber* magic_entry, |
| 235 std::string* result) { | 285 std::string* result) { |
| 236 const size_t len = magic_entry->magic_len; | 286 const size_t len = magic_entry->magic_len; |
| 237 | 287 |
| 238 // Keep kBytesRequiredForMagic honest. | 288 // Keep kBytesRequiredForMagic honest. |
| 239 DCHECK_LE(len, kBytesRequiredForMagic); | 289 DCHECK_LE(len, kBytesRequiredForMagic); |
| 240 | 290 |
| 241 // To compare with magic strings, we need to compute strlen(content), but | 291 // To compare with magic strings, we need to compute strlen(content), but |
| 242 // content might not actually have a null terminator. In that case, we | 292 // content might not actually have a null terminator. In that case, we |
| 243 // pretend the length is content_size. | 293 // pretend the length is content_size. |
| 244 const char* end = | 294 const char* end = |
| 245 static_cast<const char*>(memchr(content, '\0', size)); | 295 static_cast<const char*>(memchr(content, '\0', size)); |
| 246 const size_t content_strlen = | 296 const size_t content_strlen = |
| 247 (end != NULL) ? static_cast<size_t>(end - content) : size; | 297 (end != NULL) ? static_cast<size_t>(end - content) : size; |
| 248 | 298 |
| 249 bool match = false; | 299 bool match = false; |
| 250 if (magic_entry->is_string) { | 300 if (magic_entry->is_string) { |
| 251 if (content_strlen >= len) { | 301 if (content_strlen >= len) { |
| 252 // String comparisons are case-insensitive | 302 // String comparisons are case-insensitive |
| 253 match = (base::strncasecmp(magic_entry->magic, content, len) == 0); | 303 match = (base::strncasecmp(magic_entry->magic, content, len) == 0); |
| 254 } | 304 } |
| 255 } else { | 305 } else { |
| 256 if (size >= len) | 306 if (size >= len) { |
| 257 match = MagicCmp(magic_entry->magic, content, len); | 307 if (!magic_entry->mask) { |
| 308 match = MagicCmp(magic_entry->magic, content, len); |
| 309 } else { |
| 310 match = MagicMaskCmp(magic_entry->magic, content, len, |
| 311 magic_entry->mask); |
| 312 } |
| 313 } |
| 258 } | 314 } |
| 259 | 315 |
| 260 if (match) { | 316 if (match) { |
| 261 result->assign(magic_entry->mime_type); | 317 result->assign(magic_entry->mime_type); |
| 262 return true; | 318 return true; |
| 263 } | 319 } |
| 264 return false; | 320 return false; |
| 265 } | 321 } |
| 266 | 322 |
| 267 static bool CheckForMagicNumbers(const char* content, size_t size, | 323 static bool CheckForMagicNumbers(const char* content, size_t size, |
| (...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 605 // The web server didn't specify a content type or specified a mime | 661 // The web server didn't specify a content type or specified a mime |
| 606 // type that we ignore. | 662 // type that we ignore. |
| 607 counter->Add(arraysize(kSniffableTypes)); | 663 counter->Add(arraysize(kSniffableTypes)); |
| 608 should_sniff_counter->Add(2); | 664 should_sniff_counter->Add(2); |
| 609 return true; | 665 return true; |
| 610 } | 666 } |
| 611 should_sniff_counter->Add(1); | 667 should_sniff_counter->Add(1); |
| 612 return false; | 668 return false; |
| 613 } | 669 } |
| 614 | 670 |
| 615 bool SniffMimeType(const char* content, size_t content_size, | 671 bool SniffMimeType(const char* content, |
| 616 const GURL& url, const std::string& type_hint, | 672 size_t content_size, |
| 673 const GURL& url, |
| 674 const std::string& type_hint, |
| 617 std::string* result) { | 675 std::string* result) { |
| 618 DCHECK_LT(content_size, 1000000U); // sanity check | 676 DCHECK_LT(content_size, 1000000U); // sanity check |
| 619 DCHECK(content); | 677 DCHECK(content); |
| 620 DCHECK(result); | 678 DCHECK(result); |
| 621 | 679 |
| 622 // By default, we assume we have enough content. | 680 // By default, we assume we have enough content. |
| 623 // Each sniff routine may unset this if it wasn't provided enough content. | 681 // Each sniff routine may unset this if it wasn't provided enough content. |
| 624 bool have_enough_content = true; | 682 bool have_enough_content = true; |
| 625 | 683 |
| 626 // By default, we'll return the type hint. | 684 // By default, we'll return the type hint. |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 679 | 737 |
| 680 // Now we look in our large table of magic numbers to see if we can find | 738 // Now we look in our large table of magic numbers to see if we can find |
| 681 // anything that matches the content. | 739 // anything that matches the content. |
| 682 if (SniffForMagicNumbers(content, content_size, | 740 if (SniffForMagicNumbers(content, content_size, |
| 683 &have_enough_content, result)) | 741 &have_enough_content, result)) |
| 684 return true; // We've matched a magic number. No more content needed. | 742 return true; // We've matched a magic number. No more content needed. |
| 685 | 743 |
| 686 return have_enough_content; | 744 return have_enough_content; |
| 687 } | 745 } |
| 688 | 746 |
| 747 bool SniffMimeTypeFromLocalData(const char* content, |
| 748 size_t size, |
| 749 std::string* result) { |
| 750 // First check the extra table. |
| 751 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, |
| 752 arraysize(kExtraMagicNumbers), NULL, result)) |
| 753 return true; |
| 754 // Finally check the original table. |
| 755 return CheckForMagicNumbers(content, size, kMagicNumbers, |
| 756 arraysize(kMagicNumbers), NULL, result); |
| 757 } |
| 758 |
| 689 } // namespace net | 759 } // namespace net |
| OLD | NEW |