OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
8 // | 8 // |
9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
(...skipping 528 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
539 // document or not, so sniffing is completed. | 539 // document or not, so sniffing is completed. |
540 return true; | 540 return true; |
541 } | 541 } |
542 | 542 |
543 // Byte order marks | 543 // Byte order marks |
544 static const MagicNumber kMagicXML[] = { | 544 static const MagicNumber kMagicXML[] = { |
545 // We want to be very conservative in interpreting text/xml content as | 545 // We want to be very conservative in interpreting text/xml content as |
546 // XHTML -- we just want to sniff enough to make unit tests pass. | 546 // XHTML -- we just want to sniff enough to make unit tests pass. |
547 // So we match explicitly on this, and don't match other ways of writing | 547 // So we match explicitly on this, and don't match other ways of writing |
548 // it in semantically-equivalent ways. | 548 // it in semantically-equivalent ways. |
549 MAGIC_STRING("application/xhtml+xml", | 549 MAGIC_STRING("application/xhtml+xml", |
tkent
2017/05/15 07:25:32
I think just removing this entry would be another
| |
550 "<html xmlns=\"http://www.w3.org/1999/xhtml\""), | 550 "<html xmlns=\"http://www.w3.org/1999/xhtml\""), |
551 MAGIC_STRING("application/atom+xml", "<feed"), | 551 MAGIC_STRING("application/atom+xml", "<feed"), |
552 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 | 552 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 |
553 }; | 553 }; |
554 | 554 |
555 static const MagicNumber kMagicXMLForApplicationXML[] = { | |
556 MAGIC_STRING("application/atom+xml", "<feed"), | |
557 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 | |
558 }; | |
559 | |
555 // Returns true and sets result if the content appears to contain XHTML or a | 560 // Returns true and sets result if the content appears to contain XHTML or a |
556 // feed. | 561 // feed. |
557 // Clears have_enough_content if more data could possibly change the result. | 562 // Clears have_enough_content if more data could possibly change the result. |
558 // | 563 // |
559 // TODO(evanm): this is similar but more conservative than what Safari does, | 564 // TODO(evanm): this is similar but more conservative than what Safari does, |
560 // while HTML5 has a different recommendation -- what should we do? | 565 // while HTML5 has a different recommendation -- what should we do? |
561 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset | 566 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset |
562 // of ASCII -- do we care? | 567 // of ASCII -- do we care? |
563 static bool SniffXML(const char* content, | 568 static bool SniffXML(const char* content, |
564 size_t size, | 569 size_t size, |
570 const std::string& type_hint, | |
565 bool* have_enough_content, | 571 bool* have_enough_content, |
566 std::string* result) { | 572 std::string* result) { |
567 // We allow at most 300 bytes of content before we expect the opening tag. | 573 // We allow at most 300 bytes of content before we expect the opening tag. |
568 *have_enough_content &= TruncateSize(300, &size); | 574 *have_enough_content &= TruncateSize(300, &size); |
569 const char* pos = content; | 575 const char* pos = content; |
570 const char* const end = content + size; | 576 const char* const end = content + size; |
571 | 577 |
572 // This loop iterates through tag-looking offsets in the file. | 578 // This loop iterates through tag-looking offsets in the file. |
573 // We want to skip XML processing instructions (of the form "<?xml ...") | 579 // We want to skip XML processing instructions (of the form "<?xml ...") |
574 // and stop at the first "plain" tag, then make a decision on the mime-type | 580 // and stop at the first "plain" tag, then make a decision on the mime-type |
(...skipping 18 matching lines...) Expand all Loading... | |
593 continue; | 599 continue; |
594 } else if ((pos + kDocTypePrefixLength <= end) && | 600 } else if ((pos + kDocTypePrefixLength <= end) && |
595 base::EqualsCaseInsensitiveASCII( | 601 base::EqualsCaseInsensitiveASCII( |
596 base::StringPiece(pos, kDocTypePrefixLength), | 602 base::StringPiece(pos, kDocTypePrefixLength), |
597 base::StringPiece(kDocTypePrefix, kDocTypePrefixLength))) { | 603 base::StringPiece(kDocTypePrefix, kDocTypePrefixLength))) { |
598 // Skip DOCTYPE declarations. | 604 // Skip DOCTYPE declarations. |
599 ++pos; | 605 ++pos; |
600 continue; | 606 continue; |
601 } | 607 } |
602 | 608 |
603 if (CheckForMagicNumbers(pos, end - pos, kMagicXML, arraysize(kMagicXML), | 609 if (type_hint == "application/xml") { |
604 result)) | 610 if (CheckForMagicNumbers(pos, end - pos, kMagicXMLForApplicationXML, |
605 return true; | 611 arraysize(kMagicXMLForApplicationXML), result)) |
612 return true; | |
613 } else { | |
614 if (CheckForMagicNumbers(pos, end - pos, kMagicXML, arraysize(kMagicXML), | |
615 result)) | |
616 return true; | |
617 } | |
606 | 618 |
607 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult | 619 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult |
608 // to identify. | 620 // to identify. |
609 | 621 |
610 // If we get here, we've hit an initial tag that hasn't matched one of the | 622 // If we get here, we've hit an initial tag that hasn't matched one of the |
611 // above tests. Abort. | 623 // above tests. Abort. |
612 return true; | 624 return true; |
613 } | 625 } |
614 | 626 |
615 // We iterated too far without finding a start tag. | 627 // We iterated too far without finding a start tag. |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
815 return have_enough_content; | 827 return have_enough_content; |
816 } | 828 } |
817 } | 829 } |
818 } | 830 } |
819 | 831 |
820 // If we have plain XML, sniff XML subtypes. | 832 // If we have plain XML, sniff XML subtypes. |
821 if (type_hint == "text/xml" || type_hint == "application/xml") { | 833 if (type_hint == "text/xml" || type_hint == "application/xml") { |
822 // We're not interested in sniffing these types for images and the like. | 834 // We're not interested in sniffing these types for images and the like. |
823 // Instead, we're looking explicitly for a feed. If we don't find one | 835 // Instead, we're looking explicitly for a feed. If we don't find one |
824 // we're done and return early. | 836 // we're done and return early. |
825 if (SniffXML(content, content_size, &have_enough_content, result)) | 837 if (SniffXML(content, content_size, type_hint, &have_enough_content, |
838 result)) | |
826 return true; | 839 return true; |
827 return have_enough_content; | 840 return have_enough_content; |
828 } | 841 } |
829 | 842 |
830 // CRX files (Chrome extensions) have a special sniffing algorithm. It is | 843 // CRX files (Chrome extensions) have a special sniffing algorithm. It is |
831 // tighter than the others because we don't have to match legacy behavior. | 844 // tighter than the others because we don't have to match legacy behavior. |
832 if (SniffCRX(content, content_size, url, type_hint, | 845 if (SniffCRX(content, content_size, url, type_hint, |
833 &have_enough_content, result)) | 846 &have_enough_content, result)) |
834 return true; | 847 return true; |
835 | 848 |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
878 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); | 891 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); |
879 for (size_t i = 0; i < size; ++i) { | 892 for (size_t i = 0; i < size; ++i) { |
880 uint8_t byte = static_cast<uint8_t>(content[i]); | 893 uint8_t byte = static_cast<uint8_t>(content[i]); |
881 if (byte < 0x20 && (kBinaryBits & (1u << byte))) | 894 if (byte < 0x20 && (kBinaryBits & (1u << byte))) |
882 return true; | 895 return true; |
883 } | 896 } |
884 return false; | 897 return false; |
885 } | 898 } |
886 | 899 |
887 } // namespace net | 900 } // namespace net |
OLD | NEW |