OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
8 // | 8 // |
9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
(...skipping 327 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
338 // content might not actually have a null terminator. In that case, we | 338 // content might not actually have a null terminator. In that case, we |
339 // pretend the length is content_size. | 339 // pretend the length is content_size. |
340 const char* end = static_cast<const char*>(memchr(content, '\0', size)); | 340 const char* end = static_cast<const char*>(memchr(content, '\0', size)); |
341 const size_t content_strlen = | 341 const size_t content_strlen = |
342 (end != NULL) ? static_cast<size_t>(end - content) : size; | 342 (end != NULL) ? static_cast<size_t>(end - content) : size; |
343 | 343 |
344 bool match = false; | 344 bool match = false; |
345 if (magic_entry.is_string) { | 345 if (magic_entry.is_string) { |
346 if (content_strlen >= len) { | 346 if (content_strlen >= len) { |
347 // String comparisons are case-insensitive | 347 // String comparisons are case-insensitive |
348 match = (base::strncasecmp(magic_entry.magic, content, len) == 0); | 348 match = (strncasecmp(magic_entry.magic, content, len) == 0); |
349 } | 349 } |
350 } else { | 350 } else { |
351 if (size >= len) { | 351 if (size >= len) { |
352 if (!magic_entry.mask) { | 352 if (!magic_entry.mask) { |
353 match = MagicCmp(magic_entry.magic, content, len); | 353 match = MagicCmp(magic_entry.magic, content, len); |
354 } else { | 354 } else { |
355 match = MagicMaskCmp(magic_entry.magic, content, len, magic_entry.mask); | 355 match = MagicMaskCmp(magic_entry.magic, content, len, magic_entry.mask); |
356 } | 356 } |
357 } | 357 } |
358 } | 358 } |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
398 std::string* result) { | 398 std::string* result) { |
399 // For HTML, we are willing to consider up to 512 bytes. This may be overly | 399 // For HTML, we are willing to consider up to 512 bytes. This may be overly |
400 // conservative as IE only considers 256. | 400 // conservative as IE only considers 256. |
401 *have_enough_content &= TruncateSize(512, &size); | 401 *have_enough_content &= TruncateSize(512, &size); |
402 | 402 |
403 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, | 403 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, |
404 // but with some modifications to better match the HTML5 spec. | 404 // but with some modifications to better match the HTML5 spec. |
405 const char* const end = content + size; | 405 const char* const end = content + size; |
406 const char* pos; | 406 const char* pos; |
407 for (pos = content; pos < end; ++pos) { | 407 for (pos = content; pos < end; ++pos) { |
408 if (!IsAsciiWhitespace(*pos)) | 408 if (!base::IsAsciiWhitespace(*pos)) |
409 break; | 409 break; |
410 } | 410 } |
411 static base::HistogramBase* counter(NULL); | 411 static base::HistogramBase* counter(NULL); |
412 if (!counter) { | 412 if (!counter) { |
413 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", | 413 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", |
414 arraysize(kSniffableTags)); | 414 arraysize(kSniffableTags)); |
415 } | 415 } |
416 // |pos| now points to first non-whitespace character (or at end). | 416 // |pos| now points to first non-whitespace character (or at end). |
417 return CheckForMagicNumbers(pos, end - pos, | 417 return CheckForMagicNumbers(pos, end - pos, |
418 kSniffableTags, arraysize(kSniffableTags), | 418 kSniffableTags, arraysize(kSniffableTags), |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
458 OfficeDocType type = DOC_TYPE_NONE; | 458 OfficeDocType type = DOC_TYPE_NONE; |
459 for (size_t i = 0; i < arraysize(kOfficeExtensionTypes); ++i) { | 459 for (size_t i = 0; i < arraysize(kOfficeExtensionTypes); ++i) { |
460 std::string url_path = url.path(); | 460 std::string url_path = url.path(); |
461 | 461 |
462 if (url_path.length() < kOfficeExtensionTypes[i].extension_len) | 462 if (url_path.length() < kOfficeExtensionTypes[i].extension_len) |
463 continue; | 463 continue; |
464 | 464 |
465 const char* extension = | 465 const char* extension = |
466 &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len]; | 466 &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len]; |
467 | 467 |
468 if (0 == base::strncasecmp(extension, kOfficeExtensionTypes[i].extension, | 468 if (0 == strncasecmp(extension, kOfficeExtensionTypes[i].extension, |
469 kOfficeExtensionTypes[i].extension_len)) { | 469 kOfficeExtensionTypes[i].extension_len)) { |
470 type = kOfficeExtensionTypes[i].doc_type; | 470 type = kOfficeExtensionTypes[i].doc_type; |
471 break; | 471 break; |
472 } | 472 } |
473 } | 473 } |
474 | 474 |
475 if (type == DOC_TYPE_NONE) | 475 if (type == DOC_TYPE_NONE) |
476 return false; | 476 return false; |
477 | 477 |
478 if (office_version == "CFB") { | 478 if (office_version == "CFB") { |
479 switch (type) { | 479 switch (type) { |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
602 counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2", | 602 counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2", |
603 arraysize(kMagicXML)); | 603 arraysize(kMagicXML)); |
604 } | 604 } |
605 const int kMaxTagIterations = 5; | 605 const int kMaxTagIterations = 5; |
606 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { | 606 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { |
607 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); | 607 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); |
608 if (!pos) | 608 if (!pos) |
609 return false; | 609 return false; |
610 | 610 |
611 if ((pos + sizeof("<?xml") - 1 <= end) && | 611 if ((pos + sizeof("<?xml") - 1 <= end) && |
612 (base::strncasecmp(pos, "<?xml", sizeof("<?xml") - 1) == 0)) { | 612 (strncasecmp(pos, "<?xml", sizeof("<?xml") - 1) == 0)) { |
613 // Skip XML declarations. | 613 // Skip XML declarations. |
614 ++pos; | 614 ++pos; |
615 continue; | 615 continue; |
616 } else if ((pos + sizeof("<!DOCTYPE") - 1 <= end) && | 616 } else if ((pos + sizeof("<!DOCTYPE") - 1 <= end) && |
617 (base::strncasecmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE") - 1) == | 617 (strncasecmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE") - 1) == 0)) { |
618 0)) { | |
619 // Skip DOCTYPE declarations. | 618 // Skip DOCTYPE declarations. |
620 ++pos; | 619 ++pos; |
621 continue; | 620 continue; |
622 } | 621 } |
623 | 622 |
624 if (CheckForMagicNumbers(pos, end - pos, | 623 if (CheckForMagicNumbers(pos, end - pos, |
625 kMagicXML, arraysize(kMagicXML), | 624 kMagicXML, arraysize(kMagicXML), |
626 counter, result)) | 625 counter, result)) |
627 return true; | 626 return true; |
628 | 627 |
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
953 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); | 952 ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); |
954 for (size_t i = 0; i < size; ++i) { | 953 for (size_t i = 0; i < size; ++i) { |
955 uint8 byte = static_cast<uint8>(content[i]); | 954 uint8 byte = static_cast<uint8>(content[i]); |
956 if (byte < 0x20 && (kBinaryBits & (1u << byte))) | 955 if (byte < 0x20 && (kBinaryBits & (1u << byte))) |
957 return true; | 956 return true; |
958 } | 957 } |
959 return false; | 958 return false; |
960 } | 959 } |
961 | 960 |
962 } // namespace net | 961 } // namespace net |
OLD | NEW |