OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
8 // | 8 // |
9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
73 // URL has an GIF extension) | 73 // URL has an GIF extension) |
74 // * Opera 9: Render as GIF | 74 // * Opera 9: Render as GIF |
75 // | 75 // |
76 // We used to render as GIF here, but the problem is that some sites want to | 76 // We used to render as GIF here, but the problem is that some sites want to |
77 // trigger downloads by sending application/octet-stream (even though they | 77 // trigger downloads by sending application/octet-stream (even though they |
78 // should be sending Content-Disposition: attachment). Although it is safe | 78 // should be sending Content-Disposition: attachment). Although it is safe |
79 // to render as GIF from a security perspective, we actually get better | 79 // to render as GIF from a security perspective, we actually get better |
80 // compatibility if we don't sniff from application/octet stream at all. | 80 // compatibility if we don't sniff from application/octet stream at all. |
81 // => Chrome: Download as application/octet-stream | 81 // => Chrome: Download as application/octet-stream |
82 // | 82 // |
83 // XHTML payload, Content-Type: "text/xml": | |
84 // * IE 7: Render as XML | |
85 // * Firefox 2: Render as HTML | |
86 // * Safari 3: Render as HTML | |
87 // * Opera 9: Render as HTML | |
88 // The layout tests rely on us rendering this as HTML. | |
89 // But we're conservative in XHTML detection, as this runs afoul of the | |
90 // "don't detect dangerous mime types" rule. | |
91 // | |
92 // Note that our definition of HTML payload is much stricter than IE's | 83 // Note that our definition of HTML payload is much stricter than IE's |
93 // definition and roughly the same as Firefox's definition. | 84 // definition and roughly the same as Firefox's definition. |
94 | 85 |
95 #include <stdint.h> | 86 #include <stdint.h> |
96 #include <string> | 87 #include <string> |
97 | 88 |
98 #include "net/base/mime_sniffer.h" | 89 #include "net/base/mime_sniffer.h" |
99 | 90 |
100 #include "base/logging.h" | 91 #include "base/logging.h" |
101 #include "base/strings/string_util.h" | 92 #include "base/strings/string_util.h" |
(...skipping 433 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
535 *result = "application/octet-stream"; | 526 *result = "application/octet-stream"; |
536 } | 527 } |
537 | 528 |
538 // We have enough information to determine if this was a Microsoft Office | 529 // We have enough information to determine if this was a Microsoft Office |
539 // document or not, so sniffing is completed. | 530 // document or not, so sniffing is completed. |
540 return true; | 531 return true; |
541 } | 532 } |
542 | 533 |
543 // Byte order marks | 534 // Byte order marks |
544 static const MagicNumber kMagicXML[] = { | 535 static const MagicNumber kMagicXML[] = { |
545 // We want to be very conservative in interpreting text/xml content as | |
546 // XHTML -- we just want to sniff enough to make unit tests pass. | |
547 // So we match explicitly on this, and don't match other ways of writing | |
548 // it in semantically-equivalent ways. | |
549 MAGIC_STRING("application/xhtml+xml", | |
550 "<html xmlns=\"http://www.w3.org/1999/xhtml\""), | |
551 MAGIC_STRING("application/atom+xml", "<feed"), | 536 MAGIC_STRING("application/atom+xml", "<feed"), |
552 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 | 537 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 |
553 }; | 538 }; |
554 | 539 |
555 // Returns true and sets result if the content appears to contain XHTML or a | 540 // Returns true and sets result if the content appears to contain XHTML or a |
556 // feed. | 541 // feed. |
557 // Clears have_enough_content if more data could possibly change the result. | 542 // Clears have_enough_content if more data could possibly change the result. |
558 // | 543 // |
559 // TODO(evanm): this is similar but more conservative than what Safari does, | 544 // TODO(evanm): this is similar but more conservative than what Safari does, |
560 // while HTML5 has a different recommendation -- what should we do? | 545 // while HTML5 has a different recommendation -- what should we do? |
(...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
878 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); | 863 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); |
879 for (size_t i = 0; i < size; ++i) { | 864 for (size_t i = 0; i < size; ++i) { |
880 uint8_t byte = static_cast<uint8_t>(content[i]); | 865 uint8_t byte = static_cast<uint8_t>(content[i]); |
881 if (byte < 0x20 && (kBinaryBits & (1u << byte))) | 866 if (byte < 0x20 && (kBinaryBits & (1u << byte))) |
882 return true; | 867 return true; |
883 } | 868 } |
884 return false; | 869 return false; |
885 } | 870 } |
886 | 871 |
887 } // namespace net | 872 } // namespace net |
OLD | NEW |