| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
| 6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
| 7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
| 8 // | 8 // |
| 9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
| 10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
| (...skipping 327 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 338 // content might not actually have a null terminator. In that case, we | 338 // content might not actually have a null terminator. In that case, we |
| 339 // pretend the length is content_size. | 339 // pretend the length is content_size. |
| 340 const char* end = static_cast<const char*>(memchr(content, '\0', size)); | 340 const char* end = static_cast<const char*>(memchr(content, '\0', size)); |
| 341 const size_t content_strlen = | 341 const size_t content_strlen = |
| 342 (end != NULL) ? static_cast<size_t>(end - content) : size; | 342 (end != NULL) ? static_cast<size_t>(end - content) : size; |
| 343 | 343 |
| 344 bool match = false; | 344 bool match = false; |
| 345 if (magic_entry.is_string) { | 345 if (magic_entry.is_string) { |
| 346 if (content_strlen >= len) { | 346 if (content_strlen >= len) { |
| 347 // String comparisons are case-insensitive | 347 // String comparisons are case-insensitive |
| 348 match = (base::strncasecmp(magic_entry.magic, content, len) == 0); | 348 match = (strncasecmp(magic_entry.magic, content, len) == 0); |
| 349 } | 349 } |
| 350 } else { | 350 } else { |
| 351 if (size >= len) { | 351 if (size >= len) { |
| 352 if (!magic_entry.mask) { | 352 if (!magic_entry.mask) { |
| 353 match = MagicCmp(magic_entry.magic, content, len); | 353 match = MagicCmp(magic_entry.magic, content, len); |
| 354 } else { | 354 } else { |
| 355 match = MagicMaskCmp(magic_entry.magic, content, len, magic_entry.mask); | 355 match = MagicMaskCmp(magic_entry.magic, content, len, magic_entry.mask); |
| 356 } | 356 } |
| 357 } | 357 } |
| 358 } | 358 } |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 398 std::string* result) { | 398 std::string* result) { |
| 399 // For HTML, we are willing to consider up to 512 bytes. This may be overly | 399 // For HTML, we are willing to consider up to 512 bytes. This may be overly |
| 400 // conservative as IE only considers 256. | 400 // conservative as IE only considers 256. |
| 401 *have_enough_content &= TruncateSize(512, &size); | 401 *have_enough_content &= TruncateSize(512, &size); |
| 402 | 402 |
| 403 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, | 403 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, |
| 404 // but with some modifications to better match the HTML5 spec. | 404 // but with some modifications to better match the HTML5 spec. |
| 405 const char* const end = content + size; | 405 const char* const end = content + size; |
| 406 const char* pos; | 406 const char* pos; |
| 407 for (pos = content; pos < end; ++pos) { | 407 for (pos = content; pos < end; ++pos) { |
| 408 if (!IsAsciiWhitespace(*pos)) | 408 if (!base::IsAsciiWhitespace(*pos)) |
| 409 break; | 409 break; |
| 410 } | 410 } |
| 411 static base::HistogramBase* counter(NULL); | 411 static base::HistogramBase* counter(NULL); |
| 412 if (!counter) { | 412 if (!counter) { |
| 413 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", | 413 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", |
| 414 arraysize(kSniffableTags)); | 414 arraysize(kSniffableTags)); |
| 415 } | 415 } |
| 416 // |pos| now points to first non-whitespace character (or at end). | 416 // |pos| now points to first non-whitespace character (or at end). |
| 417 return CheckForMagicNumbers(pos, end - pos, | 417 return CheckForMagicNumbers(pos, end - pos, |
| 418 kSniffableTags, arraysize(kSniffableTags), | 418 kSniffableTags, arraysize(kSniffableTags), |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 458 OfficeDocType type = DOC_TYPE_NONE; | 458 OfficeDocType type = DOC_TYPE_NONE; |
| 459 for (size_t i = 0; i < arraysize(kOfficeExtensionTypes); ++i) { | 459 for (size_t i = 0; i < arraysize(kOfficeExtensionTypes); ++i) { |
| 460 std::string url_path = url.path(); | 460 std::string url_path = url.path(); |
| 461 | 461 |
| 462 if (url_path.length() < kOfficeExtensionTypes[i].extension_len) | 462 if (url_path.length() < kOfficeExtensionTypes[i].extension_len) |
| 463 continue; | 463 continue; |
| 464 | 464 |
| 465 const char* extension = | 465 const char* extension = |
| 466 &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len]; | 466 &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len]; |
| 467 | 467 |
| 468 if (0 == base::strncasecmp(extension, kOfficeExtensionTypes[i].extension, | 468 if (0 == strncasecmp(extension, kOfficeExtensionTypes[i].extension, |
| 469 kOfficeExtensionTypes[i].extension_len)) { | 469 kOfficeExtensionTypes[i].extension_len)) { |
| 470 type = kOfficeExtensionTypes[i].doc_type; | 470 type = kOfficeExtensionTypes[i].doc_type; |
| 471 break; | 471 break; |
| 472 } | 472 } |
| 473 } | 473 } |
| 474 | 474 |
| 475 if (type == DOC_TYPE_NONE) | 475 if (type == DOC_TYPE_NONE) |
| 476 return false; | 476 return false; |
| 477 | 477 |
| 478 if (office_version == "CFB") { | 478 if (office_version == "CFB") { |
| 479 switch (type) { | 479 switch (type) { |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 602 counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2", | 602 counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2", |
| 603 arraysize(kMagicXML)); | 603 arraysize(kMagicXML)); |
| 604 } | 604 } |
| 605 const int kMaxTagIterations = 5; | 605 const int kMaxTagIterations = 5; |
| 606 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { | 606 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { |
| 607 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); | 607 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); |
| 608 if (!pos) | 608 if (!pos) |
| 609 return false; | 609 return false; |
| 610 | 610 |
| 611 if ((pos + sizeof("<?xml") - 1 <= end) && | 611 if ((pos + sizeof("<?xml") - 1 <= end) && |
| 612 (base::strncasecmp(pos, "<?xml", sizeof("<?xml") - 1) == 0)) { | 612 (strncasecmp(pos, "<?xml", sizeof("<?xml") - 1) == 0)) { |
| 613 // Skip XML declarations. | 613 // Skip XML declarations. |
| 614 ++pos; | 614 ++pos; |
| 615 continue; | 615 continue; |
| 616 } else if ((pos + sizeof("<!DOCTYPE") - 1 <= end) && | 616 } else if ((pos + sizeof("<!DOCTYPE") - 1 <= end) && |
| 617 (base::strncasecmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE") - 1) == | 617 (strncasecmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE") - 1) == 0)) { |
| 618 0)) { | |
| 619 // Skip DOCTYPE declarations. | 618 // Skip DOCTYPE declarations. |
| 620 ++pos; | 619 ++pos; |
| 621 continue; | 620 continue; |
| 622 } | 621 } |
| 623 | 622 |
| 624 if (CheckForMagicNumbers(pos, end - pos, | 623 if (CheckForMagicNumbers(pos, end - pos, |
| 625 kMagicXML, arraysize(kMagicXML), | 624 kMagicXML, arraysize(kMagicXML), |
| 626 counter, result)) | 625 counter, result)) |
| 627 return true; | 626 return true; |
| 628 | 627 |
| (...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 953 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); | 952 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); |
| 954 for (size_t i = 0; i < size; ++i) { | 953 for (size_t i = 0; i < size; ++i) { |
| 955 uint8 byte = static_cast<uint8>(content[i]); | 954 uint8 byte = static_cast<uint8>(content[i]); |
| 956 if (byte < 0x20 && (kBinaryBits & (1u << byte))) | 955 if (byte < 0x20 && (kBinaryBits & (1u << byte))) |
| 957 return true; | 956 return true; |
| 958 } | 957 } |
| 959 return false; | 958 return false; |
| 960 } | 959 } |
| 961 | 960 |
| 962 } // namespace net | 961 } // namespace net |
| OLD | NEW |