Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
| 6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
| 7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
| 8 // | 8 // |
| 9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
| 10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
| (...skipping 528 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 539 // document or not, so sniffing is completed. | 539 // document or not, so sniffing is completed. |
| 540 return true; | 540 return true; |
| 541 } | 541 } |
| 542 | 542 |
| 543 // Byte order marks | 543 // Byte order marks |
| 544 static const MagicNumber kMagicXML[] = { | 544 static const MagicNumber kMagicXML[] = { |
| 545 // We want to be very conservative in interpreting text/xml content as | 545 // We want to be very conservative in interpreting text/xml content as |
| 546 // XHTML -- we just want to sniff enough to make unit tests pass. | 546 // XHTML -- we just want to sniff enough to make unit tests pass. |
| 547 // So we match explicitly on this, and don't match other ways of writing | 547 // So we match explicitly on this, and don't match other ways of writing |
| 548 // it in semantically-equivalent ways. | 548 // it in semantically-equivalent ways. |
| 549 MAGIC_STRING("application/xhtml+xml", | 549 MAGIC_STRING("application/xhtml+xml", |
|
tkent
2017/05/15 07:25:32
I think just removing this entry would be another
| |
| 550 "<html xmlns=\"http://www.w3.org/1999/xhtml\""), | 550 "<html xmlns=\"http://www.w3.org/1999/xhtml\""), |
| 551 MAGIC_STRING("application/atom+xml", "<feed"), | 551 MAGIC_STRING("application/atom+xml", "<feed"), |
| 552 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 | 552 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 |
| 553 }; | 553 }; |
| 554 | 554 |
| 555 static const MagicNumber kMagicXMLForApplicationXML[] = { | |
| 556 MAGIC_STRING("application/atom+xml", "<feed"), | |
| 557 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 | |
| 558 }; | |
| 559 | |
| 555 // Returns true and sets result if the content appears to contain XHTML or a | 560 // Returns true and sets result if the content appears to contain XHTML or a |
| 556 // feed. | 561 // feed. |
| 557 // Clears have_enough_content if more data could possibly change the result. | 562 // Clears have_enough_content if more data could possibly change the result. |
| 558 // | 563 // |
| 559 // TODO(evanm): this is similar but more conservative than what Safari does, | 564 // TODO(evanm): this is similar but more conservative than what Safari does, |
| 560 // while HTML5 has a different recommendation -- what should we do? | 565 // while HTML5 has a different recommendation -- what should we do? |
| 561 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset | 566 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset |
| 562 // of ASCII -- do we care? | 567 // of ASCII -- do we care? |
| 563 static bool SniffXML(const char* content, | 568 static bool SniffXML(const char* content, |
| 564 size_t size, | 569 size_t size, |
| 570 const std::string& type_hint, | |
| 565 bool* have_enough_content, | 571 bool* have_enough_content, |
| 566 std::string* result) { | 572 std::string* result) { |
| 567 // We allow at most 300 bytes of content before we expect the opening tag. | 573 // We allow at most 300 bytes of content before we expect the opening tag. |
| 568 *have_enough_content &= TruncateSize(300, &size); | 574 *have_enough_content &= TruncateSize(300, &size); |
| 569 const char* pos = content; | 575 const char* pos = content; |
| 570 const char* const end = content + size; | 576 const char* const end = content + size; |
| 571 | 577 |
| 572 // This loop iterates through tag-looking offsets in the file. | 578 // This loop iterates through tag-looking offsets in the file. |
| 573 // We want to skip XML processing instructions (of the form "<?xml ...") | 579 // We want to skip XML processing instructions (of the form "<?xml ...") |
| 574 // and stop at the first "plain" tag, then make a decision on the mime-type | 580 // and stop at the first "plain" tag, then make a decision on the mime-type |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 593 continue; | 599 continue; |
| 594 } else if ((pos + kDocTypePrefixLength <= end) && | 600 } else if ((pos + kDocTypePrefixLength <= end) && |
| 595 base::EqualsCaseInsensitiveASCII( | 601 base::EqualsCaseInsensitiveASCII( |
| 596 base::StringPiece(pos, kDocTypePrefixLength), | 602 base::StringPiece(pos, kDocTypePrefixLength), |
| 597 base::StringPiece(kDocTypePrefix, kDocTypePrefixLength))) { | 603 base::StringPiece(kDocTypePrefix, kDocTypePrefixLength))) { |
| 598 // Skip DOCTYPE declarations. | 604 // Skip DOCTYPE declarations. |
| 599 ++pos; | 605 ++pos; |
| 600 continue; | 606 continue; |
| 601 } | 607 } |
| 602 | 608 |
| 603 if (CheckForMagicNumbers(pos, end - pos, kMagicXML, arraysize(kMagicXML), | 609 if (type_hint == "application/xml") { |
| 604 result)) | 610 if (CheckForMagicNumbers(pos, end - pos, kMagicXMLForApplicationXML, |
| 605 return true; | 611 arraysize(kMagicXMLForApplicationXML), result)) |
| 612 return true; | |
| 613 } else { | |
| 614 if (CheckForMagicNumbers(pos, end - pos, kMagicXML, arraysize(kMagicXML), | |
| 615 result)) | |
| 616 return true; | |
| 617 } | |
| 606 | 618 |
| 607 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult | 619 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult |
| 608 // to identify. | 620 // to identify. |
| 609 | 621 |
| 610 // If we get here, we've hit an initial tag that hasn't matched one of the | 622 // If we get here, we've hit an initial tag that hasn't matched one of the |
| 611 // above tests. Abort. | 623 // above tests. Abort. |
| 612 return true; | 624 return true; |
| 613 } | 625 } |
| 614 | 626 |
| 615 // We iterated too far without finding a start tag. | 627 // We iterated too far without finding a start tag. |
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 815 return have_enough_content; | 827 return have_enough_content; |
| 816 } | 828 } |
| 817 } | 829 } |
| 818 } | 830 } |
| 819 | 831 |
| 820 // If we have plain XML, sniff XML subtypes. | 832 // If we have plain XML, sniff XML subtypes. |
| 821 if (type_hint == "text/xml" || type_hint == "application/xml") { | 833 if (type_hint == "text/xml" || type_hint == "application/xml") { |
| 822 // We're not interested in sniffing these types for images and the like. | 834 // We're not interested in sniffing these types for images and the like. |
| 823 // Instead, we're looking explicitly for a feed. If we don't find one | 835 // Instead, we're looking explicitly for a feed. If we don't find one |
| 824 // we're done and return early. | 836 // we're done and return early. |
| 825 if (SniffXML(content, content_size, &have_enough_content, result)) | 837 if (SniffXML(content, content_size, type_hint, &have_enough_content, |
| 838 result)) | |
| 826 return true; | 839 return true; |
| 827 return have_enough_content; | 840 return have_enough_content; |
| 828 } | 841 } |
| 829 | 842 |
| 830 // CRX files (Chrome extensions) have a special sniffing algorithm. It is | 843 // CRX files (Chrome extensions) have a special sniffing algorithm. It is |
| 831 // tighter than the others because we don't have to match legacy behavior. | 844 // tighter than the others because we don't have to match legacy behavior. |
| 832 if (SniffCRX(content, content_size, url, type_hint, | 845 if (SniffCRX(content, content_size, url, type_hint, |
| 833 &have_enough_content, result)) | 846 &have_enough_content, result)) |
| 834 return true; | 847 return true; |
| 835 | 848 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 878 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); | 891 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); |
| 879 for (size_t i = 0; i < size; ++i) { | 892 for (size_t i = 0; i < size; ++i) { |
| 880 uint8_t byte = static_cast<uint8_t>(content[i]); | 893 uint8_t byte = static_cast<uint8_t>(content[i]); |
| 881 if (byte < 0x20 && (kBinaryBits & (1u << byte))) | 894 if (byte < 0x20 && (kBinaryBits & (1u << byte))) |
| 882 return true; | 895 return true; |
| 883 } | 896 } |
| 884 return false; | 897 return false; |
| 885 } | 898 } |
| 886 | 899 |
| 887 } // namespace net | 900 } // namespace net |
| OLD | NEW |