| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
| 6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
| 7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
| 8 // | 8 // |
| 9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
| 10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 96 #include "base/histogram.h" | 96 #include "base/histogram.h" |
| 97 #include "base/logging.h" | 97 #include "base/logging.h" |
| 98 #include "base/string_util.h" | 98 #include "base/string_util.h" |
| 99 #include "googleurl/src/gurl.h" | 99 #include "googleurl/src/gurl.h" |
| 100 #include "net/base/mime_util.h" | 100 #include "net/base/mime_util.h" |
| 101 | 101 |
| 102 namespace { | 102 namespace { |
| 103 | 103 |
| 104 class SnifferHistogram : public LinearHistogram { | 104 class SnifferHistogram : public LinearHistogram { |
| 105 public: | 105 public: |
| 106 SnifferHistogram(const wchar_t* name, int array_size) | 106 SnifferHistogram(const char* name, int array_size) |
| 107 : LinearHistogram(name, 0, array_size - 1, array_size) { | 107 : LinearHistogram(name, 0, array_size - 1, array_size) { |
| 108 SetFlags(kUmaTargetedHistogramFlag); | 108 SetFlags(kUmaTargetedHistogramFlag); |
| 109 } | 109 } |
| 110 }; | 110 }; |
| 111 | 111 |
| 112 } // namespace | 112 } // namespace |
| 113 | 113 |
| 114 namespace net { | 114 namespace net { |
| 115 | 115 |
| 116 // We aren't interested in looking at more than 512 bytes of content | 116 // We aren't interested in looking at more than 512 bytes of content |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 266 static bool SniffForHTML(const char* content, size_t size, | 266 static bool SniffForHTML(const char* content, size_t size, |
| 267 std::string* result) { | 267 std::string* result) { |
| 268 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, | 268 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, |
| 269 // but with some modifications to better match the HTML5 spec. | 269 // but with some modifications to better match the HTML5 spec. |
| 270 const char* const end = content + size; | 270 const char* const end = content + size; |
| 271 const char* pos; | 271 const char* pos; |
| 272 for (pos = content; pos < end; ++pos) { | 272 for (pos = content; pos < end; ++pos) { |
| 273 if (!IsAsciiWhitespace(*pos)) | 273 if (!IsAsciiWhitespace(*pos)) |
| 274 break; | 274 break; |
| 275 } | 275 } |
| 276 static SnifferHistogram counter(L"mime_sniffer.kSniffableTags2", | 276 static SnifferHistogram counter("mime_sniffer.kSniffableTags2", |
| 277 arraysize(kSniffableTags)); | 277 arraysize(kSniffableTags)); |
| 278 // |pos| now points to first non-whitespace character (or at end). | 278 // |pos| now points to first non-whitespace character (or at end). |
| 279 return CheckForMagicNumbers(pos, end - pos, | 279 return CheckForMagicNumbers(pos, end - pos, |
| 280 kSniffableTags, arraysize(kSniffableTags), | 280 kSniffableTags, arraysize(kSniffableTags), |
| 281 &counter, result); | 281 &counter, result); |
| 282 } | 282 } |
| 283 | 283 |
| 284 static bool SniffForMagicNumbers(const char* content, size_t size, | 284 static bool SniffForMagicNumbers(const char* content, size_t size, |
| 285 std::string* result) { | 285 std::string* result) { |
| 286 // Check our big table of Magic Numbers | 286 // Check our big table of Magic Numbers |
| 287 static SnifferHistogram counter(L"mime_sniffer.kMagicNumbers2", | 287 static SnifferHistogram counter("mime_sniffer.kMagicNumbers2", |
| 288 arraysize(kMagicNumbers)); | 288 arraysize(kMagicNumbers)); |
| 289 return CheckForMagicNumbers(content, size, | 289 return CheckForMagicNumbers(content, size, |
| 290 kMagicNumbers, arraysize(kMagicNumbers), | 290 kMagicNumbers, arraysize(kMagicNumbers), |
| 291 &counter, result); | 291 &counter, result); |
| 292 } | 292 } |
| 293 | 293 |
| 294 // Byte order marks | 294 // Byte order marks |
| 295 static const MagicNumber kMagicXML[] = { | 295 static const MagicNumber kMagicXML[] = { |
| 296 // We want to be very conservative in interpreting text/xml content as | 296 // We want to be very conservative in interpreting text/xml content as |
| 297 // XHTML -- we just want to sniff enough to make unit tests pass. | 297 // XHTML -- we just want to sniff enough to make unit tests pass. |
| (...skipping 15 matching lines...) Expand all Loading... |
| 313 // We allow at most kFirstTagBytes bytes of content before we expect the | 313 // We allow at most kFirstTagBytes bytes of content before we expect the |
| 314 // opening tag. | 314 // opening tag. |
| 315 const size_t kFeedAllowedHeaderBytes = 300; | 315 const size_t kFeedAllowedHeaderBytes = 300; |
| 316 const char* const end = content + std::min(size, kFeedAllowedHeaderBytes); | 316 const char* const end = content + std::min(size, kFeedAllowedHeaderBytes); |
| 317 const char* pos = content; | 317 const char* pos = content; |
| 318 | 318 |
| 319 // This loop iterates through tag-looking offsets in the file. | 319 // This loop iterates through tag-looking offsets in the file. |
| 320 // We want to skip XML processing instructions (of the form "<?xml ...") | 320 // We want to skip XML processing instructions (of the form "<?xml ...") |
| 321 // and stop at the first "plain" tag, then make a decision on the mime-type | 321 // and stop at the first "plain" tag, then make a decision on the mime-type |
| 322 // based on the name (or possibly attributes) of that tag. | 322 // based on the name (or possibly attributes) of that tag. |
| 323 static SnifferHistogram counter(L"mime_sniffer.kMagicXML2", | 323 static SnifferHistogram counter("mime_sniffer.kMagicXML2", |
| 324 arraysize(kMagicXML)); | 324 arraysize(kMagicXML)); |
| 325 const int kMaxTagIterations = 5; | 325 const int kMaxTagIterations = 5; |
| 326 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { | 326 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { |
| 327 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); | 327 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); |
| 328 if (!pos) | 328 if (!pos) |
| 329 return false; | 329 return false; |
| 330 | 330 |
| 331 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) { | 331 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) { |
| 332 // Skip XML declarations. | 332 // Skip XML declarations. |
| 333 ++pos; | 333 ++pos; |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF | 380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF |
| 381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF | 381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF |
| 382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF | 382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF |
| 383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF | 383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF |
| 384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF | 384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF |
| 385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF | 385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF |
| 386 }; | 386 }; |
| 387 | 387 |
| 388 static bool LooksBinary(const char* content, size_t size) { | 388 static bool LooksBinary(const char* content, size_t size) { |
| 389 // First, we look for a BOM. | 389 // First, we look for a BOM. |
| 390 static SnifferHistogram counter(L"mime_sniffer.kByteOrderMark2", | 390 static SnifferHistogram counter("mime_sniffer.kByteOrderMark2", |
| 391 arraysize(kByteOrderMark)); | 391 arraysize(kByteOrderMark)); |
| 392 std::string unused; | 392 std::string unused; |
| 393 if (CheckForMagicNumbers(content, size, | 393 if (CheckForMagicNumbers(content, size, |
| 394 kByteOrderMark, arraysize(kByteOrderMark), | 394 kByteOrderMark, arraysize(kByteOrderMark), |
| 395 &counter, &unused)) { | 395 &counter, &unused)) { |
| 396 // If there is BOM, we think the buffer is not binary. | 396 // If there is BOM, we think the buffer is not binary. |
| 397 return false; | 397 return false; |
| 398 } | 398 } |
| 399 | 399 |
| 400 // Next we look to see if any of the bytes "look binary." | 400 // Next we look to see if any of the bytes "look binary." |
| (...skipping 13 matching lines...) Expand all Loading... |
| 414 static const char* kUnknownMimeTypes[] = { | 414 static const char* kUnknownMimeTypes[] = { |
| 415 // Empty mime types are as unknown as they get. | 415 // Empty mime types are as unknown as they get. |
| 416 "", | 416 "", |
| 417 // The unknown/unknown type is popular and uninformative | 417 // The unknown/unknown type is popular and uninformative |
| 418 "unknown/unknown", | 418 "unknown/unknown", |
| 419 // The second most popular unknown mime type is application/unknown | 419 // The second most popular unknown mime type is application/unknown |
| 420 "application/unknown", | 420 "application/unknown", |
| 421 // Firefox rejects a mime type if it is exactly */* | 421 // Firefox rejects a mime type if it is exactly */* |
| 422 "*/*", | 422 "*/*", |
| 423 }; | 423 }; |
| 424 static SnifferHistogram counter(L"mime_sniffer.kUnknownMimeTypes2", | 424 static SnifferHistogram counter("mime_sniffer.kUnknownMimeTypes2", |
| 425 arraysize(kUnknownMimeTypes) + 1); | 425 arraysize(kUnknownMimeTypes) + 1); |
| 426 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) { | 426 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) { |
| 427 if (mime_type == kUnknownMimeTypes[i]) { | 427 if (mime_type == kUnknownMimeTypes[i]) { |
| 428 counter.Add(i); | 428 counter.Add(i); |
| 429 return true; | 429 return true; |
| 430 } | 430 } |
| 431 } | 431 } |
| 432 if (mime_type.find('/') == std::string::npos) { | 432 if (mime_type.find('/') == std::string::npos) { |
| 433 // Firefox rejects a mime type if it does not contain a slash | 433 // Firefox rejects a mime type if it does not contain a slash |
| 434 counter.Add(arraysize(kUnknownMimeTypes)); | 434 counter.Add(arraysize(kUnknownMimeTypes)); |
| 435 return true; | 435 return true; |
| 436 } | 436 } |
| 437 return false; | 437 return false; |
| 438 } | 438 } |
| 439 | 439 |
| 440 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { | 440 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { |
| 441 static SnifferHistogram should_sniff_counter( | 441 static SnifferHistogram should_sniff_counter( |
| 442 L"mime_sniffer.ShouldSniffMimeType2", 3); | 442 "mime_sniffer.ShouldSniffMimeType2", 3); |
| 443 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP | 443 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP |
| 444 bool sniffable_scheme = url.is_empty() || | 444 bool sniffable_scheme = url.is_empty() || |
| 445 url.SchemeIs("http") || | 445 url.SchemeIs("http") || |
| 446 url.SchemeIs("https") || | 446 url.SchemeIs("https") || |
| 447 url.SchemeIs("ftp"); | 447 url.SchemeIs("ftp"); |
| 448 if (!sniffable_scheme) { | 448 if (!sniffable_scheme) { |
| 449 should_sniff_counter.Add(1); | 449 should_sniff_counter.Add(1); |
| 450 return false; | 450 return false; |
| 451 } | 451 } |
| 452 | 452 |
| 453 static const char* kSniffableTypes[] = { | 453 static const char* kSniffableTypes[] = { |
| 454 // Many web servers are misconfigured to send text/plain for many | 454 // Many web servers are misconfigured to send text/plain for many |
| 455 // different types of content. | 455 // different types of content. |
| 456 "text/plain", | 456 "text/plain", |
| 457 // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml | 457 // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml |
| 458 // files. Firefox 2.0 does not sniff xhtml here, but Safari 3, | 458 // files. Firefox 2.0 does not sniff xhtml here, but Safari 3, |
| 459 // Opera 9, and IE do. | 459 // Opera 9, and IE do. |
| 460 "application/octet-stream", | 460 "application/octet-stream", |
| 461 // XHTML and Atom/RSS feeds are often served as plain xml instead of | 461 // XHTML and Atom/RSS feeds are often served as plain xml instead of |
| 462 // their more specific mime types. | 462 // their more specific mime types. |
| 463 "text/xml", | 463 "text/xml", |
| 464 "application/xml", | 464 "application/xml", |
| 465 }; | 465 }; |
| 466 static SnifferHistogram counter(L"mime_sniffer.kSniffableTypes2", | 466 static SnifferHistogram counter("mime_sniffer.kSniffableTypes2", |
| 467 arraysize(kSniffableTypes) + 1); | 467 arraysize(kSniffableTypes) + 1); |
| 468 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { | 468 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { |
| 469 if (mime_type == kSniffableTypes[i]) { | 469 if (mime_type == kSniffableTypes[i]) { |
| 470 counter.Add(i); | 470 counter.Add(i); |
| 471 should_sniff_counter.Add(2); | 471 should_sniff_counter.Add(2); |
| 472 return true; | 472 return true; |
| 473 } | 473 } |
| 474 } | 474 } |
| 475 if (IsUnknownMimeType(mime_type)) { | 475 if (IsUnknownMimeType(mime_type)) { |
| 476 // The web server didn't specify a content type or specified a mime | 476 // The web server didn't specify a content type or specified a mime |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 548 result->assign("text/plain"); | 548 result->assign("text/plain"); |
| 549 // We could change our mind if a binary-looking byte appears later in | 549 // We could change our mind if a binary-looking byte appears later in |
| 550 // the content, so we only have enough content if we have the max. | 550 // the content, so we only have enough content if we have the max. |
| 551 return content_size >= kMaxBytesToSniff; | 551 return content_size >= kMaxBytesToSniff; |
| 552 } | 552 } |
| 553 | 553 |
| 554 return have_enough_content; | 554 return have_enough_content; |
| 555 } | 555 } |
| 556 | 556 |
| 557 } // namespace net | 557 } // namespace net |
| 558 | |
| OLD | NEW |