Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(455)

Side by Side Diff: net/base/mime_sniffer.cc

Issue 159345: Implement mimetype sniffing for extensions. (Closed)
Patch Set: rebase Created 11 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « chrome/common/extensions/extension_unittest.cc ('k') | net/base/mime_sniffer_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Detecting mime types is a tricky business because we need to balance 5 // Detecting mime types is a tricky business because we need to balance
6 // compatibility concerns with security issues. Here is a survey of how other 6 // compatibility concerns with security issues. Here is a survey of how other
7 // browsers behave and then a description of how we intend to behave. 7 // browsers behave and then a description of how we intend to behave.
8 // 8 //
9 // HTML payload, no Content-Type header: 9 // HTML payload, no Content-Type header:
10 // * IE 7: Render as HTML 10 // * IE 7: Render as HTML
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after
249 return true; 249 return true;
250 } 250 }
251 return false; 251 return false;
252 } 252 }
253 253
254 static bool CheckForMagicNumbers(const char* content, size_t size, 254 static bool CheckForMagicNumbers(const char* content, size_t size,
255 const MagicNumber* magic, size_t magic_len, 255 const MagicNumber* magic, size_t magic_len,
256 Histogram* counter, std::string* result) { 256 Histogram* counter, std::string* result) {
257 for (size_t i = 0; i < magic_len; ++i) { 257 for (size_t i = 0; i < magic_len; ++i) {
258 if (MatchMagicNumber(content, size, &(magic[i]), result)) { 258 if (MatchMagicNumber(content, size, &(magic[i]), result)) {
259 counter->Add(static_cast<int>(i)); 259 if (counter) counter->Add(static_cast<int>(i));
260 return true; 260 return true;
261 } 261 }
262 } 262 }
263 return false; 263 return false;
264 } 264 }
265 265
266 static bool SniffForHTML(const char* content, size_t size, 266 static bool SniffForHTML(const char* content, size_t size,
267 std::string* result) { 267 std::string* result) {
268 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, 268 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags,
269 // but with some modifications to better match the HTML5 spec. 269 // but with some modifications to better match the HTML5 spec.
(...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after
431 } 431 }
432 } 432 }
433 if (mime_type.find('/') == std::string::npos) { 433 if (mime_type.find('/') == std::string::npos) {
434 // Firefox rejects a mime type if it does not contain a slash 434 // Firefox rejects a mime type if it does not contain a slash
435 counter.Add(arraysize(kUnknownMimeTypes)); 435 counter.Add(arraysize(kUnknownMimeTypes));
436 return true; 436 return true;
437 } 437 }
438 return false; 438 return false;
439 } 439 }
440 440
441 // Sniff a crx (chrome extension) file.
442 static bool SniffCRX(const char* content, size_t content_size, const GURL& url,
443 const std::string& type_hint, std::string* result) {
444 static SnifferHistogram counter("mime_sniffer.kSniffCRX", 3);
445
446 // Technically, the crx magic number is just Cr24, but the bytes after that
447 // are a version number which changes infrequently. Including it in the
448 // sniffing gives us less room for error. If the version number ever changes,
449 // we can just add an entry to this list.
450 //
451 // TODO(aa): If we ever have another magic number, we'll want to pass a
452 // histogram into CheckForMagicNumbers(), below, to see which one matched.
453 const struct MagicNumber kCRXMagicNumbers[] = {
454 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")
455 };
456
457 // Only consider files that have the extension ".crx".
458 const char kCRXExtension[] = ".crx";
459 const int kExtensionLength = arraysize(kCRXExtension) - 1; // ignore null
460 if (url.path().rfind(kCRXExtension, std::string::npos, kExtensionLength) ==
461 url.path().size() - kExtensionLength) {
462 counter.Add(1);
463 } else {
464 return false;
465 }
466
467 if (CheckForMagicNumbers(content, content_size,
468 kCRXMagicNumbers, arraysize(kCRXMagicNumbers),
469 NULL, result)) {
470 counter.Add(2);
471 } else {
472 return false;
473 }
474
475 return true;
476 }
477
441 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { 478 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {
442 static SnifferHistogram should_sniff_counter( 479 static SnifferHistogram should_sniff_counter(
443 "mime_sniffer.ShouldSniffMimeType2", 3); 480 "mime_sniffer.ShouldSniffMimeType2", 3);
444 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP 481 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP
445 bool sniffable_scheme = url.is_empty() || 482 bool sniffable_scheme = url.is_empty() ||
446 url.SchemeIs("http") || 483 url.SchemeIs("http") ||
447 url.SchemeIs("https") || 484 url.SchemeIs("https") ||
448 url.SchemeIs("ftp"); 485 url.SchemeIs("ftp");
449 if (!sniffable_scheme) { 486 if (!sniffable_scheme) {
450 should_sniff_counter.Add(1); 487 should_sniff_counter.Add(1);
451 return false; 488 return false;
452 } 489 }
453 490
454 static const char* kSniffableTypes[] = { 491 static const char* kSniffableTypes[] = {
455 // Many web servers are misconfigured to send text/plain for many 492 // Many web servers are misconfigured to send text/plain for many
456 // different types of content. 493 // different types of content.
457 "text/plain", 494 "text/plain",
458 // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml 495 // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml
459 // files. Firefox 2.0 does not sniff xhtml here, but Safari 3, 496 // files. Firefox 2.0 does not sniff xhtml here, but Safari 3,
460 // Opera 9, and IE do. 497 // Opera 9, and IE do.
461 "application/octet-stream", 498 "application/octet-stream",
462 // XHTML and Atom/RSS feeds are often served as plain xml instead of 499 // XHTML and Atom/RSS feeds are often served as plain xml instead of
463 // their more specific mime types. 500 // their more specific mime types.
464 "text/xml", 501 "text/xml",
465 "application/xml", 502 "application/xml",
466 }; 503 };
467 static SnifferHistogram counter("mime_sniffer.kSniffableTypes2", 504 static SnifferHistogram counter("mime_sniffer.kSniffableTypes2",
468 arraysize(kSniffableTypes) + 1); 505 arraysize(kSniffableTypes) + 1);
469 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { 506 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {
470 if (mime_type == kSniffableTypes[i]) { 507 if (mime_type == kSniffableTypes[i]) {
471 counter.Add(i); 508 counter.Add(i);
472 should_sniff_counter.Add(2); 509 should_sniff_counter.Add(2);
473 return true; 510 return true;
474 } 511 }
475 } 512 }
476 if (IsUnknownMimeType(mime_type)) { 513 if (IsUnknownMimeType(mime_type)) {
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
528 // If we have plain XML, sniff XML subtypes. 565 // If we have plain XML, sniff XML subtypes.
529 if (type_hint == "text/xml" || type_hint == "application/xml") { 566 if (type_hint == "text/xml" || type_hint == "application/xml") {
530 // We're not interested in sniffing these types for images and the like. 567 // We're not interested in sniffing these types for images and the like.
531 // Instead, we're looking explicitly for a feed. If we don't find one we're 568 // Instead, we're looking explicitly for a feed. If we don't find one we're
532 // done and return early. 569 // done and return early.
533 if (SniffXML(content, content_size, result)) 570 if (SniffXML(content, content_size, result))
534 return true; 571 return true;
535 return content_size >= kMaxBytesToSniff; 572 return content_size >= kMaxBytesToSniff;
536 } 573 }
537 574
575 // CRX files (chrome extensions) have a special sniffing algorithm. It is
576 // tighter than the others because we don't have to match legacy behavior.
577 if (SniffCRX(content, content_size, url, type_hint, result))
578 return true;
579
538 // Now we look in our large table of magic numbers to see if we can find 580 // Now we look in our large table of magic numbers to see if we can find
539 // anything that matches the content. 581 // anything that matches the content.
540 if (SniffForMagicNumbers(content, content_size, result)) 582 if (SniffForMagicNumbers(content, content_size, result))
541 return true; // We've matched a magic number. No more content needed. 583 return true; // We've matched a magic number. No more content needed.
542 584
543 // Having failed thus far, we're willing to override unknown mime types and 585 // Having failed thus far, we're willing to override unknown mime types and
544 // text/plain. 586 // text/plain.
545 if (hint_is_unknown_mime_type || hint_is_text_plain) { 587 if (hint_is_unknown_mime_type || hint_is_text_plain) {
546 if (looks_binary) 588 if (looks_binary)
547 result->assign("application/octet-stream"); 589 result->assign("application/octet-stream");
548 else 590 else
549 result->assign("text/plain"); 591 result->assign("text/plain");
550 // We could change our mind if a binary-looking byte appears later in 592 // We could change our mind if a binary-looking byte appears later in
551 // the content, so we only have enough content if we have the max. 593 // the content, so we only have enough content if we have the max.
552 return content_size >= kMaxBytesToSniff; 594 return content_size >= kMaxBytesToSniff;
553 } 595 }
554 596
555 return have_enough_content; 597 return have_enough_content;
556 } 598 }
557 599
558 } // namespace net 600 } // namespace net
OLDNEW
« no previous file with comments | « chrome/common/extensions/extension_unittest.cc ('k') | net/base/mime_sniffer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698