| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
| 6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
| 7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
| 8 // | 8 // |
| 9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
| 10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 73 // URL has an GIF extension) | 73 // URL has an GIF extension) |
| 74 // * Opera 9: Render as GIF | 74 // * Opera 9: Render as GIF |
| 75 // | 75 // |
| 76 // We used to render as GIF here, but the problem is that some sites want to | 76 // We used to render as GIF here, but the problem is that some sites want to |
| 77 // trigger downloads by sending application/octet-stream (even though they | 77 // trigger downloads by sending application/octet-stream (even though they |
| 78 // should be sending Content-Disposition: attachment). Although it is safe | 78 // should be sending Content-Disposition: attachment). Although it is safe |
| 79 // to render as GIF from a security perspective, we actually get better | 79 // to render as GIF from a security perspective, we actually get better |
| 80 // compatibility if we don't sniff from application/octet stream at all. | 80 // compatibility if we don't sniff from application/octet stream at all. |
| 81 // => Chrome: Download as application/octet-stream | 81 // => Chrome: Download as application/octet-stream |
| 82 // | 82 // |
| 83 // XHTML payload, Content-Type: "text/xml": | |
| 84 // * IE 7: Render as XML | |
| 85 // * Firefox 2: Render as HTML | |
| 86 // * Safari 3: Render as HTML | |
| 87 // * Opera 9: Render as HTML | |
| 88 // The layout tests rely on us rendering this as HTML. | |
| 89 // But we're conservative in XHTML detection, as this runs afoul of the | |
| 90 // "don't detect dangerous mime types" rule. | |
| 91 // | |
| 92 // Note that our definition of HTML payload is much stricter than IE's | 83 // Note that our definition of HTML payload is much stricter than IE's |
| 93 // definition and roughly the same as Firefox's definition. | 84 // definition and roughly the same as Firefox's definition. |
| 94 | 85 |
| 95 #include <stdint.h> | 86 #include <stdint.h> |
| 96 #include <string> | 87 #include <string> |
| 97 | 88 |
| 98 #include "net/base/mime_sniffer.h" | 89 #include "net/base/mime_sniffer.h" |
| 99 | 90 |
| 100 #include "base/logging.h" | 91 #include "base/logging.h" |
| 101 #include "base/strings/string_util.h" | 92 #include "base/strings/string_util.h" |
| (...skipping 433 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 535 *result = "application/octet-stream"; | 526 *result = "application/octet-stream"; |
| 536 } | 527 } |
| 537 | 528 |
| 538 // We have enough information to determine if this was a Microsoft Office | 529 // We have enough information to determine if this was a Microsoft Office |
| 539 // document or not, so sniffing is completed. | 530 // document or not, so sniffing is completed. |
| 540 return true; | 531 return true; |
| 541 } | 532 } |
| 542 | 533 |
| 543 // Byte order marks | 534 // Byte order marks |
| 544 static const MagicNumber kMagicXML[] = { | 535 static const MagicNumber kMagicXML[] = { |
| 545 // We want to be very conservative in interpreting text/xml content as | |
| 546 // XHTML -- we just want to sniff enough to make unit tests pass. | |
| 547 // So we match explicitly on this, and don't match other ways of writing | |
| 548 // it in semantically-equivalent ways. | |
| 549 MAGIC_STRING("application/xhtml+xml", | |
| 550 "<html xmlns=\"http://www.w3.org/1999/xhtml\""), | |
| 551 MAGIC_STRING("application/atom+xml", "<feed"), | 536 MAGIC_STRING("application/atom+xml", "<feed"), |
| 552 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 | 537 MAGIC_STRING("application/rss+xml", "<rss"), // UTF-8 |
| 553 }; | 538 }; |
| 554 | 539 |
| 555 // Returns true and sets result if the content appears to contain XHTML or a | 540 // Returns true and sets result if the content appears to contain XHTML or a |
| 556 // feed. | 541 // feed. |
| 557 // Clears have_enough_content if more data could possibly change the result. | 542 // Clears have_enough_content if more data could possibly change the result. |
| 558 // | 543 // |
| 559 // TODO(evanm): this is similar but more conservative than what Safari does, | 544 // TODO(evanm): this is similar but more conservative than what Safari does, |
| 560 // while HTML5 has a different recommendation -- what should we do? | 545 // while HTML5 has a different recommendation -- what should we do? |
| (...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 878 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); | 863 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); |
| 879 for (size_t i = 0; i < size; ++i) { | 864 for (size_t i = 0; i < size; ++i) { |
| 880 uint8_t byte = static_cast<uint8_t>(content[i]); | 865 uint8_t byte = static_cast<uint8_t>(content[i]); |
| 881 if (byte < 0x20 && (kBinaryBits & (1u << byte))) | 866 if (byte < 0x20 && (kBinaryBits & (1u << byte))) |
| 882 return true; | 867 return true; |
| 883 } | 868 } |
| 884 return false; | 869 return false; |
| 885 } | 870 } |
| 886 | 871 |
| 887 } // namespace net | 872 } // namespace net |
| OLD | NEW |