| OLD | NEW | 
|    1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |    1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 
|    2 // Use of this source code is governed by a BSD-style license that can be |    2 // Use of this source code is governed by a BSD-style license that can be | 
|    3 // found in the LICENSE file. |    3 // found in the LICENSE file. | 
|    4  |    4  | 
|    5 // Detecting mime types is a tricky business because we need to balance |    5 // Detecting mime types is a tricky business because we need to balance | 
|    6 // compatibility concerns with security issues.  Here is a survey of how other |    6 // compatibility concerns with security issues.  Here is a survey of how other | 
|    7 // browsers behave and then a description of how we intend to behave. |    7 // browsers behave and then a description of how we intend to behave. | 
|    8 // |    8 // | 
|    9 // HTML payload, no Content-Type header: |    9 // HTML payload, no Content-Type header: | 
|   10 // * IE 7: Render as HTML |   10 // * IE 7: Render as HTML | 
| (...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  301   *have_enough_content &= TruncateSize(512, &size); |  301   *have_enough_content &= TruncateSize(512, &size); | 
|  302  |  302  | 
|  303   // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, |  303   // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, | 
|  304   // but with some modifications to better match the HTML5 spec. |  304   // but with some modifications to better match the HTML5 spec. | 
|  305   const char* const end = content + size; |  305   const char* const end = content + size; | 
|  306   const char* pos; |  306   const char* pos; | 
|  307   for (pos = content; pos < end; ++pos) { |  307   for (pos = content; pos < end; ++pos) { | 
|  308     if (!IsAsciiWhitespace(*pos)) |  308     if (!IsAsciiWhitespace(*pos)) | 
|  309       break; |  309       break; | 
|  310   } |  310   } | 
|  311   static scoped_refptr<base::Histogram> counter = |  311   scoped_refptr<base::Histogram> counter = | 
|  312       UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", |  312       UMASnifferHistogramGet("mime_sniffer.kSniffableTags2", | 
|  313                              arraysize(kSniffableTags)); |  313                              arraysize(kSniffableTags)); | 
|  314   // |pos| now points to first non-whitespace character (or at end). |  314   // |pos| now points to first non-whitespace character (or at end). | 
|  315   return CheckForMagicNumbers(pos, end - pos, |  315   return CheckForMagicNumbers(pos, end - pos, | 
|  316                               kSniffableTags, arraysize(kSniffableTags), |  316                               kSniffableTags, arraysize(kSniffableTags), | 
|  317                               counter.get(), result); |  317                               counter.get(), result); | 
|  318 } |  318 } | 
|  319  |  319  | 
|  320 // Returns true and sets result if the content matches any of kMagicNumbers. |  320 // Returns true and sets result if the content matches any of kMagicNumbers. | 
|  321 // Clears have_enough_content if more data could possibly change the result. |  321 // Clears have_enough_content if more data could possibly change the result. | 
|  322 static bool SniffForMagicNumbers(const char* content, |  322 static bool SniffForMagicNumbers(const char* content, | 
|  323                                  size_t size, |  323                                  size_t size, | 
|  324                                  bool* have_enough_content, |  324                                  bool* have_enough_content, | 
|  325                                  std::string* result) { |  325                                  std::string* result) { | 
|  326   *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); |  326   *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); | 
|  327  |  327  | 
|  328   // Check our big table of Magic Numbers |  328   // Check our big table of Magic Numbers | 
|  329   static scoped_refptr<base::Histogram> counter = |  329   scoped_refptr<base::Histogram> counter = | 
|  330       UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2", |  330       UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2", | 
|  331                              arraysize(kMagicNumbers)); |  331                              arraysize(kMagicNumbers)); | 
|  332   return CheckForMagicNumbers(content, size, |  332   return CheckForMagicNumbers(content, size, | 
|  333                               kMagicNumbers, arraysize(kMagicNumbers), |  333                               kMagicNumbers, arraysize(kMagicNumbers), | 
|  334                               counter.get(), result); |  334                               counter.get(), result); | 
|  335 } |  335 } | 
|  336  |  336  | 
|  337 // Byte order marks |  337 // Byte order marks | 
|  338 static const MagicNumber kMagicXML[] = { |  338 static const MagicNumber kMagicXML[] = { | 
|  339   // We want to be very conservative in interpreting text/xml content as |  339   // We want to be very conservative in interpreting text/xml content as | 
| (...skipping 20 matching lines...) Expand all  Loading... | 
|  360                      std::string* result) { |  360                      std::string* result) { | 
|  361   // We allow at most 300 bytes of content before we expect the opening tag. |  361   // We allow at most 300 bytes of content before we expect the opening tag. | 
|  362   *have_enough_content &= TruncateSize(300, &size); |  362   *have_enough_content &= TruncateSize(300, &size); | 
|  363   const char* pos = content; |  363   const char* pos = content; | 
|  364   const char* const end = content + size; |  364   const char* const end = content + size; | 
|  365  |  365  | 
|  366   // This loop iterates through tag-looking offsets in the file. |  366   // This loop iterates through tag-looking offsets in the file. | 
|  367   // We want to skip XML processing instructions (of the form "<?xml ...") |  367   // We want to skip XML processing instructions (of the form "<?xml ...") | 
|  368   // and stop at the first "plain" tag, then make a decision on the mime-type |  368   // and stop at the first "plain" tag, then make a decision on the mime-type | 
|  369   // based on the name (or possibly attributes) of that tag. |  369   // based on the name (or possibly attributes) of that tag. | 
|  370   static scoped_refptr<base::Histogram> counter = |  370   scoped_refptr<base::Histogram> counter = | 
|  371       UMASnifferHistogramGet("mime_sniffer.kMagicXML2", |  371       UMASnifferHistogramGet("mime_sniffer.kMagicXML2", | 
|  372                              arraysize(kMagicXML)); |  372                              arraysize(kMagicXML)); | 
|  373   const int kMaxTagIterations = 5; |  373   const int kMaxTagIterations = 5; | 
|  374   for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { |  374   for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { | 
|  375     pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); |  375     pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); | 
|  376     if (!pos) |  376     if (!pos) | 
|  377       return false; |  377       return false; | 
|  378  |  378  | 
|  379     if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) { |  379     if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) { | 
|  380       // Skip XML declarations. |  380       // Skip XML declarations. | 
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  444   // There is no concensus about exactly how to sniff for binary content. |  444   // There is no concensus about exactly how to sniff for binary content. | 
|  445   // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. |  445   // * IE 7: Don't sniff for binary looking bytes, but trust the file extension. | 
|  446   // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. |  446   // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte. | 
|  447   // Here, we side with FF, but with a smaller buffer. This size was chosen |  447   // Here, we side with FF, but with a smaller buffer. This size was chosen | 
|  448   // because it is small enough to comfortably fit into a single packet (after |  448   // because it is small enough to comfortably fit into a single packet (after | 
|  449   // allowing for headers) and yet large enough to account for binary formats |  449   // allowing for headers) and yet large enough to account for binary formats | 
|  450   // that have a significant amount of ASCII at the beginning (crbug.com/15314). |  450   // that have a significant amount of ASCII at the beginning (crbug.com/15314). | 
|  451   const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size); |  451   const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size); | 
|  452  |  452  | 
|  453   // First, we look for a BOM. |  453   // First, we look for a BOM. | 
|  454   static scoped_refptr<base::Histogram> counter = |  454   scoped_refptr<base::Histogram> counter = | 
|  455       UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2", |  455       UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2", | 
|  456                              arraysize(kByteOrderMark)); |  456                              arraysize(kByteOrderMark)); | 
|  457   std::string unused; |  457   std::string unused; | 
|  458   if (CheckForMagicNumbers(content, size, |  458   if (CheckForMagicNumbers(content, size, | 
|  459                            kByteOrderMark, arraysize(kByteOrderMark), |  459                            kByteOrderMark, arraysize(kByteOrderMark), | 
|  460                            counter.get(), &unused)) { |  460                            counter.get(), &unused)) { | 
|  461     // If there is BOM, we think the buffer is not binary. |  461     // If there is BOM, we think the buffer is not binary. | 
|  462     result->assign("text/plain"); |  462     result->assign("text/plain"); | 
|  463     return false; |  463     return false; | 
|  464   } |  464   } | 
| (...skipping 21 matching lines...) Expand all  Loading... | 
|  486   static const char* kUnknownMimeTypes[] = { |  486   static const char* kUnknownMimeTypes[] = { | 
|  487     // Empty mime types are as unknown as they get. |  487     // Empty mime types are as unknown as they get. | 
|  488     "", |  488     "", | 
|  489     // The unknown/unknown type is popular and uninformative |  489     // The unknown/unknown type is popular and uninformative | 
|  490     "unknown/unknown", |  490     "unknown/unknown", | 
|  491     // The second most popular unknown mime type is application/unknown |  491     // The second most popular unknown mime type is application/unknown | 
|  492     "application/unknown", |  492     "application/unknown", | 
|  493     // Firefox rejects a mime type if it is exactly */* |  493     // Firefox rejects a mime type if it is exactly */* | 
|  494     "*/*", |  494     "*/*", | 
|  495   }; |  495   }; | 
|  496   static scoped_refptr<base::Histogram> counter = |  496   scoped_refptr<base::Histogram> counter = | 
|  497       UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2", |  497       UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2", | 
|  498                              arraysize(kUnknownMimeTypes) + 1); |  498                              arraysize(kUnknownMimeTypes) + 1); | 
|  499   for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) { |  499   for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) { | 
|  500     if (mime_type == kUnknownMimeTypes[i]) { |  500     if (mime_type == kUnknownMimeTypes[i]) { | 
|  501       counter->Add(i); |  501       counter->Add(i); | 
|  502       return true; |  502       return true; | 
|  503     } |  503     } | 
|  504   } |  504   } | 
|  505   if (mime_type.find('/') == std::string::npos) { |  505   if (mime_type.find('/') == std::string::npos) { | 
|  506     // Firefox rejects a mime type if it does not contain a slash |  506     // Firefox rejects a mime type if it does not contain a slash | 
|  507     counter->Add(arraysize(kUnknownMimeTypes)); |  507     counter->Add(arraysize(kUnknownMimeTypes)); | 
|  508     return true; |  508     return true; | 
|  509   } |  509   } | 
|  510   return false; |  510   return false; | 
|  511 } |  511 } | 
|  512  |  512  | 
|  513 // Returns true and sets result if the content appears to be a crx (chrome |  513 // Returns true and sets result if the content appears to be a crx (chrome | 
|  514 // extension) file. |  514 // extension) file. | 
|  515 // Clears have_enough_content if more data could possibly change the result. |  515 // Clears have_enough_content if more data could possibly change the result. | 
|  516 static bool SniffCRX(const char* content, |  516 static bool SniffCRX(const char* content, | 
|  517                      size_t size, |  517                      size_t size, | 
|  518                      const GURL& url, |  518                      const GURL& url, | 
|  519                      const std::string& type_hint, |  519                      const std::string& type_hint, | 
|  520                      bool* have_enough_content, |  520                      bool* have_enough_content, | 
|  521                      std::string* result) { |  521                      std::string* result) { | 
|  522   static scoped_refptr<base::Histogram> counter = |  522   scoped_refptr<base::Histogram> counter = | 
|  523       UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3); |  523       UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3); | 
|  524  |  524  | 
|  525   // Technically, the crx magic number is just Cr24, but the bytes after that |  525   // Technically, the crx magic number is just Cr24, but the bytes after that | 
|  526   // are a version number which changes infrequently. Including it in the |  526   // are a version number which changes infrequently. Including it in the | 
|  527   // sniffing gives us less room for error. If the version number ever changes, |  527   // sniffing gives us less room for error. If the version number ever changes, | 
|  528   // we can just add an entry to this list. |  528   // we can just add an entry to this list. | 
|  529   // |  529   // | 
|  530   // TODO(aa): If we ever have another magic number, we'll want to pass a |  530   // TODO(aa): If we ever have another magic number, we'll want to pass a | 
|  531   // histogram into CheckForMagicNumbers(), below, to see which one matched. |  531   // histogram into CheckForMagicNumbers(), below, to see which one matched. | 
|  532   static const struct MagicNumber kCRXMagicNumbers[] = { |  532   static const struct MagicNumber kCRXMagicNumbers[] = { | 
| (...skipping 17 matching lines...) Expand all  Loading... | 
|  550                            NULL, result)) { |  550                            NULL, result)) { | 
|  551     counter->Add(2); |  551     counter->Add(2); | 
|  552   } else { |  552   } else { | 
|  553     return false; |  553     return false; | 
|  554   } |  554   } | 
|  555  |  555  | 
|  556   return true; |  556   return true; | 
|  557 } |  557 } | 
|  558  |  558  | 
|  559 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { |  559 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { | 
|  560   static scoped_refptr<base::Histogram> should_sniff_counter = |  560   scoped_refptr<base::Histogram> should_sniff_counter = | 
|  561       UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3); |  561       UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3); | 
|  562   // We are willing to sniff the mime type for HTTP, HTTPS, and FTP |  562   // We are willing to sniff the mime type for HTTP, HTTPS, and FTP | 
|  563   bool sniffable_scheme = url.is_empty() || |  563   bool sniffable_scheme = url.is_empty() || | 
|  564                           url.SchemeIs("http") || |  564                           url.SchemeIs("http") || | 
|  565                           url.SchemeIs("https") || |  565                           url.SchemeIs("https") || | 
|  566                           url.SchemeIs("ftp") || |  566                           url.SchemeIs("ftp") || | 
|  567                           url.SchemeIsFile(); |  567                           url.SchemeIsFile(); | 
|  568   if (!sniffable_scheme) { |  568   if (!sniffable_scheme) { | 
|  569     should_sniff_counter->Add(1); |  569     should_sniff_counter->Add(1); | 
|  570     return false; |  570     return false; | 
|  571   } |  571   } | 
|  572  |  572  | 
|  573   static const char* kSniffableTypes[] = { |  573   static const char* kSniffableTypes[] = { | 
|  574     // Many web servers are misconfigured to send text/plain for many |  574     // Many web servers are misconfigured to send text/plain for many | 
|  575     // different types of content. |  575     // different types of content. | 
|  576     "text/plain", |  576     "text/plain", | 
|  577     // We want to sniff application/octet-stream for |  577     // We want to sniff application/octet-stream for | 
|  578     // application/x-chrome-extension, but nothing else. |  578     // application/x-chrome-extension, but nothing else. | 
|  579     "application/octet-stream", |  579     "application/octet-stream", | 
|  580     // XHTML and Atom/RSS feeds are often served as plain xml instead of |  580     // XHTML and Atom/RSS feeds are often served as plain xml instead of | 
|  581     // their more specific mime types. |  581     // their more specific mime types. | 
|  582     "text/xml", |  582     "text/xml", | 
|  583     "application/xml", |  583     "application/xml", | 
|  584   }; |  584   }; | 
|  585   static scoped_refptr<base::Histogram> counter = |  585   scoped_refptr<base::Histogram> counter = | 
|  586       UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2", |  586       UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2", | 
|  587                              arraysize(kSniffableTypes) + 1); |  587                              arraysize(kSniffableTypes) + 1); | 
|  588   for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { |  588   for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { | 
|  589     if (mime_type == kSniffableTypes[i]) { |  589     if (mime_type == kSniffableTypes[i]) { | 
|  590       counter->Add(i); |  590       counter->Add(i); | 
|  591       should_sniff_counter->Add(2); |  591       should_sniff_counter->Add(2); | 
|  592       return true; |  592       return true; | 
|  593     } |  593     } | 
|  594   } |  594   } | 
|  595   if (IsUnknownMimeType(mime_type)) { |  595   if (IsUnknownMimeType(mime_type)) { | 
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  671   // Now we look in our large table of magic numbers to see if we can find |  671   // Now we look in our large table of magic numbers to see if we can find | 
|  672   // anything that matches the content. |  672   // anything that matches the content. | 
|  673   if (SniffForMagicNumbers(content, content_size, |  673   if (SniffForMagicNumbers(content, content_size, | 
|  674                            &have_enough_content, result)) |  674                            &have_enough_content, result)) | 
|  675     return true;  // We've matched a magic number.  No more content needed. |  675     return true;  // We've matched a magic number.  No more content needed. | 
|  676  |  676  | 
|  677   return have_enough_content; |  677   return have_enough_content; | 
|  678 } |  678 } | 
|  679  |  679  | 
|  680 }  // namespace net |  680 }  // namespace net | 
| OLD | NEW |