Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(40)

Side by Side Diff: net/base/mime_sniffer.cc

Issue 2883833002: net: Do not sniff XHTML content (Closed)
Patch Set: . Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | net/base/mime_sniffer_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Detecting mime types is a tricky business because we need to balance 5 // Detecting mime types is a tricky business because we need to balance
6 // compatibility concerns with security issues. Here is a survey of how other 6 // compatibility concerns with security issues. Here is a survey of how other
7 // browsers behave and then a description of how we intend to behave. 7 // browsers behave and then a description of how we intend to behave.
8 // 8 //
9 // HTML payload, no Content-Type header: 9 // HTML payload, no Content-Type header:
10 // * IE 7: Render as HTML 10 // * IE 7: Render as HTML
(...skipping 528 matching lines...) Expand 10 before | Expand all | Expand 10 after
539 // document or not, so sniffing is completed. 539 // document or not, so sniffing is completed.
540 return true; 540 return true;
541 } 541 }
542 542
543 // Byte order marks 543 // Byte order marks
544 static const MagicNumber kMagicXML[] = { 544 static const MagicNumber kMagicXML[] = {
545 // We want to be very conservative in interpreting text/xml content as 545 // We want to be very conservative in interpreting text/xml content as
546 // XHTML -- we just want to sniff enough to make unit tests pass. 546 // XHTML -- we just want to sniff enough to make unit tests pass.
547 // So we match explicitly on this, and don't match other ways of writing 547 // So we match explicitly on this, and don't match other ways of writing
548 // it in semantically-equivalent ways. 548 // it in semantically-equivalent ways.
549 MAGIC_STRING("application/xhtml+xml", 549 MAGIC_STRING("application/xhtml+xml",
tkent 2017/05/15 07:25:32 I think just removing this entry would be another
550 "<html xmlns=\"http://www.w3.org/1999/xhtml\""), 550 "<html xmlns=\"http://www.w3.org/1999/xhtml\""),
551 MAGIC_STRING("application/atom+xml", "<feed"), 551 MAGIC_STRING("application/atom+xml", "<feed"),
552 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 552 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8
553 }; 553 };
554 554
555 static const MagicNumber kMagicXMLForApplicationXML[] = {
556 MAGIC_STRING("application/atom+xml", "<feed"),
557 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8
558 };
559
555 // Returns true and sets result if the content appears to contain XHTML or a 560 // Returns true and sets result if the content appears to contain XHTML or a
556 // feed. 561 // feed.
557 // Clears have_enough_content if more data could possibly change the result. 562 // Clears have_enough_content if more data could possibly change the result.
558 // 563 //
559 // TODO(evanm): this is similar but more conservative than what Safari does, 564 // TODO(evanm): this is similar but more conservative than what Safari does,
560 // while HTML5 has a different recommendation -- what should we do? 565 // while HTML5 has a different recommendation -- what should we do?
561 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset 566 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset
562 // of ASCII -- do we care? 567 // of ASCII -- do we care?
563 static bool SniffXML(const char* content, 568 static bool SniffXML(const char* content,
564 size_t size, 569 size_t size,
570 const std::string& type_hint,
565 bool* have_enough_content, 571 bool* have_enough_content,
566 std::string* result) { 572 std::string* result) {
567 // We allow at most 300 bytes of content before we expect the opening tag. 573 // We allow at most 300 bytes of content before we expect the opening tag.
568 *have_enough_content &= TruncateSize(300, &size); 574 *have_enough_content &= TruncateSize(300, &size);
569 const char* pos = content; 575 const char* pos = content;
570 const char* const end = content + size; 576 const char* const end = content + size;
571 577
572 // This loop iterates through tag-looking offsets in the file. 578 // This loop iterates through tag-looking offsets in the file.
573 // We want to skip XML processing instructions (of the form "<?xml ...") 579 // We want to skip XML processing instructions (of the form "<?xml ...")
574 // and stop at the first "plain" tag, then make a decision on the mime-type 580 // and stop at the first "plain" tag, then make a decision on the mime-type
(...skipping 18 matching lines...) Expand all
593 continue; 599 continue;
594 } else if ((pos + kDocTypePrefixLength <= end) && 600 } else if ((pos + kDocTypePrefixLength <= end) &&
595 base::EqualsCaseInsensitiveASCII( 601 base::EqualsCaseInsensitiveASCII(
596 base::StringPiece(pos, kDocTypePrefixLength), 602 base::StringPiece(pos, kDocTypePrefixLength),
597 base::StringPiece(kDocTypePrefix, kDocTypePrefixLength))) { 603 base::StringPiece(kDocTypePrefix, kDocTypePrefixLength))) {
598 // Skip DOCTYPE declarations. 604 // Skip DOCTYPE declarations.
599 ++pos; 605 ++pos;
600 continue; 606 continue;
601 } 607 }
602 608
603 if (CheckForMagicNumbers(pos, end - pos, kMagicXML, arraysize(kMagicXML), 609 if (type_hint == "application/xml") {
604 result)) 610 if (CheckForMagicNumbers(pos, end - pos, kMagicXMLForApplicationXML,
605 return true; 611 arraysize(kMagicXMLForApplicationXML), result))
612 return true;
613 } else {
614 if (CheckForMagicNumbers(pos, end - pos, kMagicXML, arraysize(kMagicXML),
615 result))
616 return true;
617 }
606 618
607 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult 619 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult
608 // to identify. 620 // to identify.
609 621
610 // If we get here, we've hit an initial tag that hasn't matched one of the 622 // If we get here, we've hit an initial tag that hasn't matched one of the
611 // above tests. Abort. 623 // above tests. Abort.
612 return true; 624 return true;
613 } 625 }
614 626
615 // We iterated too far without finding a start tag. 627 // We iterated too far without finding a start tag.
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
815 return have_enough_content; 827 return have_enough_content;
816 } 828 }
817 } 829 }
818 } 830 }
819 831
820 // If we have plain XML, sniff XML subtypes. 832 // If we have plain XML, sniff XML subtypes.
821 if (type_hint == "text/xml" || type_hint == "application/xml") { 833 if (type_hint == "text/xml" || type_hint == "application/xml") {
822 // We're not interested in sniffing these types for images and the like. 834 // We're not interested in sniffing these types for images and the like.
823 // Instead, we're looking explicitly for a feed. If we don't find one 835 // Instead, we're looking explicitly for a feed. If we don't find one
824 // we're done and return early. 836 // we're done and return early.
825 if (SniffXML(content, content_size, &have_enough_content, result)) 837 if (SniffXML(content, content_size, type_hint, &have_enough_content,
838 result))
826 return true; 839 return true;
827 return have_enough_content; 840 return have_enough_content;
828 } 841 }
829 842
830 // CRX files (Chrome extensions) have a special sniffing algorithm. It is 843 // CRX files (Chrome extensions) have a special sniffing algorithm. It is
831 // tighter than the others because we don't have to match legacy behavior. 844 // tighter than the others because we don't have to match legacy behavior.
832 if (SniffCRX(content, content_size, url, type_hint, 845 if (SniffCRX(content, content_size, url, type_hint,
833 &have_enough_content, result)) 846 &have_enough_content, result))
834 return true; 847 return true;
835 848
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
878 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); 891 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b');
879 for (size_t i = 0; i < size; ++i) { 892 for (size_t i = 0; i < size; ++i) {
880 uint8_t byte = static_cast<uint8_t>(content[i]); 893 uint8_t byte = static_cast<uint8_t>(content[i]);
881 if (byte < 0x20 && (kBinaryBits & (1u << byte))) 894 if (byte < 0x20 && (kBinaryBits & (1u << byte)))
882 return true; 895 return true;
883 } 896 }
884 return false; 897 return false;
885 } 898 }
886 899
887 } // namespace net 900 } // namespace net
OLDNEW
« no previous file with comments | « no previous file | net/base/mime_sniffer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698