| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
| 6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
| 7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
| 8 // | 8 // |
| 9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
| 10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
| (...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 249 return true; | 249 return true; |
| 250 } | 250 } |
| 251 return false; | 251 return false; |
| 252 } | 252 } |
| 253 | 253 |
| 254 static bool CheckForMagicNumbers(const char* content, size_t size, | 254 static bool CheckForMagicNumbers(const char* content, size_t size, |
| 255 const MagicNumber* magic, size_t magic_len, | 255 const MagicNumber* magic, size_t magic_len, |
| 256 Histogram* counter, std::string* result) { | 256 Histogram* counter, std::string* result) { |
| 257 for (size_t i = 0; i < magic_len; ++i) { | 257 for (size_t i = 0; i < magic_len; ++i) { |
| 258 if (MatchMagicNumber(content, size, &(magic[i]), result)) { | 258 if (MatchMagicNumber(content, size, &(magic[i]), result)) { |
| 259 counter->Add(static_cast<int>(i)); | 259 if (counter) counter->Add(static_cast<int>(i)); |
| 260 return true; | 260 return true; |
| 261 } | 261 } |
| 262 } | 262 } |
| 263 return false; | 263 return false; |
| 264 } | 264 } |
| 265 | 265 |
| 266 static bool SniffForHTML(const char* content, size_t size, | 266 static bool SniffForHTML(const char* content, size_t size, |
| 267 std::string* result) { | 267 std::string* result) { |
| 268 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, | 268 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, |
| 269 // but with some modifications to better match the HTML5 spec. | 269 // but with some modifications to better match the HTML5 spec. |
| (...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 431 } | 431 } |
| 432 } | 432 } |
| 433 if (mime_type.find('/') == std::string::npos) { | 433 if (mime_type.find('/') == std::string::npos) { |
| 434 // Firefox rejects a mime type if it does not contain a slash | 434 // Firefox rejects a mime type if it does not contain a slash |
| 435 counter.Add(arraysize(kUnknownMimeTypes)); | 435 counter.Add(arraysize(kUnknownMimeTypes)); |
| 436 return true; | 436 return true; |
| 437 } | 437 } |
| 438 return false; | 438 return false; |
| 439 } | 439 } |
| 440 | 440 |
| 441 // Sniff a crx (chrome extension) file. |
| 442 static bool SniffCRX(const char* content, size_t content_size, const GURL& url, |
| 443 const std::string& type_hint, std::string* result) { |
| 444 static SnifferHistogram counter("mime_sniffer.kSniffCRX", 3); |
| 445 |
| 446 // Technically, the crx magic number is just Cr24, but the bytes after that |
| 447 // are a version number which changes infrequently. Including it in the |
| 448 // sniffing gives us less room for error. If the version number ever changes, |
| 449 // we can just add an entry to this list. |
| 450 // |
| 451 // TODO(aa): If we ever have another magic number, we'll want to pass a |
| 452 // histogram into CheckForMagicNumbers(), below, to see which one matched. |
| 453 const struct MagicNumber kCRXMagicNumbers[] = { |
| 454 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00") |
| 455 }; |
| 456 |
| 457 // Only consider files that have the extension ".crx". |
| 458 const char kCRXExtension[] = ".crx"; |
| 459 const int kExtensionLength = arraysize(kCRXExtension) - 1; // ignore null |
| 460 if (url.path().rfind(kCRXExtension, std::string::npos, kExtensionLength) == |
| 461 url.path().size() - kExtensionLength) { |
| 462 counter.Add(1); |
| 463 } else { |
| 464 return false; |
| 465 } |
| 466 |
| 467 if (CheckForMagicNumbers(content, content_size, |
| 468 kCRXMagicNumbers, arraysize(kCRXMagicNumbers), |
| 469 NULL, result)) { |
| 470 counter.Add(2); |
| 471 } else { |
| 472 return false; |
| 473 } |
| 474 |
| 475 return true; |
| 476 } |
| 477 |
| 441 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { | 478 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { |
| 442 static SnifferHistogram should_sniff_counter( | 479 static SnifferHistogram should_sniff_counter( |
| 443 "mime_sniffer.ShouldSniffMimeType2", 3); | 480 "mime_sniffer.ShouldSniffMimeType2", 3); |
| 444 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP | 481 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP |
| 445 bool sniffable_scheme = url.is_empty() || | 482 bool sniffable_scheme = url.is_empty() || |
| 446 url.SchemeIs("http") || | 483 url.SchemeIs("http") || |
| 447 url.SchemeIs("https") || | 484 url.SchemeIs("https") || |
| 448 url.SchemeIs("ftp"); | 485 url.SchemeIs("ftp"); |
| 449 if (!sniffable_scheme) { | 486 if (!sniffable_scheme) { |
| 450 should_sniff_counter.Add(1); | 487 should_sniff_counter.Add(1); |
| 451 return false; | 488 return false; |
| 452 } | 489 } |
| 453 | 490 |
| 454 static const char* kSniffableTypes[] = { | 491 static const char* kSniffableTypes[] = { |
| 455 // Many web servers are misconfigured to send text/plain for many | 492 // Many web servers are misconfigured to send text/plain for many |
| 456 // different types of content. | 493 // different types of content. |
| 457 "text/plain", | 494 "text/plain", |
| 458 // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml | 495 // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml |
| 459 // files. Firefox 2.0 does not sniff xhtml here, but Safari 3, | 496 // files. Firefox 2.0 does not sniff xhtml here, but Safari 3, |
| 460 // Opera 9, and IE do. | 497 // Opera 9, and IE do. |
| 461 "application/octet-stream", | 498 "application/octet-stream", |
| 462 // XHTML and Atom/RSS feeds are often served as plain xml instead of | 499 // XHTML and Atom/RSS feeds are often served as plain xml instead of |
| 463 // their more specific mime types. | 500 // their more specific mime types. |
| 464 "text/xml", | 501 "text/xml", |
| 465 "application/xml", | 502 "application/xml", |
| 466 }; | 503 }; |
| 467 static SnifferHistogram counter("mime_sniffer.kSniffableTypes2", | 504 static SnifferHistogram counter("mime_sniffer.kSniffableTypes2", |
| 468 arraysize(kSniffableTypes) + 1); | 505 arraysize(kSniffableTypes) + 1); |
| 469 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { | 506 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { |
| 470 if (mime_type == kSniffableTypes[i]) { | 507 if (mime_type == kSniffableTypes[i]) { |
| 471 counter.Add(i); | 508 counter.Add(i); |
| 472 should_sniff_counter.Add(2); | 509 should_sniff_counter.Add(2); |
| 473 return true; | 510 return true; |
| 474 } | 511 } |
| 475 } | 512 } |
| 476 if (IsUnknownMimeType(mime_type)) { | 513 if (IsUnknownMimeType(mime_type)) { |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 528 // If we have plain XML, sniff XML subtypes. | 565 // If we have plain XML, sniff XML subtypes. |
| 529 if (type_hint == "text/xml" || type_hint == "application/xml") { | 566 if (type_hint == "text/xml" || type_hint == "application/xml") { |
| 530 // We're not interested in sniffing these types for images and the like. | 567 // We're not interested in sniffing these types for images and the like. |
| 531 // Instead, we're looking explicitly for a feed. If we don't find one we're | 568 // Instead, we're looking explicitly for a feed. If we don't find one we're |
| 532 // done and return early. | 569 // done and return early. |
| 533 if (SniffXML(content, content_size, result)) | 570 if (SniffXML(content, content_size, result)) |
| 534 return true; | 571 return true; |
| 535 return content_size >= kMaxBytesToSniff; | 572 return content_size >= kMaxBytesToSniff; |
| 536 } | 573 } |
| 537 | 574 |
| 575 // CRX files (chrome extensions) have a special sniffing algorithm. It is |
| 576 // tighter than the others because we don't have to match legacy behavior. |
| 577 if (SniffCRX(content, content_size, url, type_hint, result)) |
| 578 return true; |
| 579 |
| 538 // Now we look in our large table of magic numbers to see if we can find | 580 // Now we look in our large table of magic numbers to see if we can find |
| 539 // anything that matches the content. | 581 // anything that matches the content. |
| 540 if (SniffForMagicNumbers(content, content_size, result)) | 582 if (SniffForMagicNumbers(content, content_size, result)) |
| 541 return true; // We've matched a magic number. No more content needed. | 583 return true; // We've matched a magic number. No more content needed. |
| 542 | 584 |
| 543 // Having failed thus far, we're willing to override unknown mime types and | 585 // Having failed thus far, we're willing to override unknown mime types and |
| 544 // text/plain. | 586 // text/plain. |
| 545 if (hint_is_unknown_mime_type || hint_is_text_plain) { | 587 if (hint_is_unknown_mime_type || hint_is_text_plain) { |
| 546 if (looks_binary) | 588 if (looks_binary) |
| 547 result->assign("application/octet-stream"); | 589 result->assign("application/octet-stream"); |
| 548 else | 590 else |
| 549 result->assign("text/plain"); | 591 result->assign("text/plain"); |
| 550 // We could change our mind if a binary-looking byte appears later in | 592 // We could change our mind if a binary-looking byte appears later in |
| 551 // the content, so we only have enough content if we have the max. | 593 // the content, so we only have enough content if we have the max. |
| 552 return content_size >= kMaxBytesToSniff; | 594 return content_size >= kMaxBytesToSniff; |
| 553 } | 595 } |
| 554 | 596 |
| 555 return have_enough_content; | 597 return have_enough_content; |
| 556 } | 598 } |
| 557 | 599 |
| 558 } // namespace net | 600 } // namespace net |
| OLD | NEW |