Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(618)

Side by Side Diff: net/base/mime_sniffer.cc

Issue 28046: Use string for Histogram names since these are all ASCII anyway wide-characte... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/file_stream_win.cc ('k') | net/base/sdch_filter.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Detecting mime types is a tricky business because we need to balance 5 // Detecting mime types is a tricky business because we need to balance
6 // compatibility concerns with security issues. Here is a survey of how other 6 // compatibility concerns with security issues. Here is a survey of how other
7 // browsers behave and then a description of how we intend to behave. 7 // browsers behave and then a description of how we intend to behave.
8 // 8 //
9 // HTML payload, no Content-Type header: 9 // HTML payload, no Content-Type header:
10 // * IE 7: Render as HTML 10 // * IE 7: Render as HTML
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
96 #include "base/histogram.h" 96 #include "base/histogram.h"
97 #include "base/logging.h" 97 #include "base/logging.h"
98 #include "base/string_util.h" 98 #include "base/string_util.h"
99 #include "googleurl/src/gurl.h" 99 #include "googleurl/src/gurl.h"
100 #include "net/base/mime_util.h" 100 #include "net/base/mime_util.h"
101 101
102 namespace { 102 namespace {
103 103
104 class SnifferHistogram : public LinearHistogram { 104 class SnifferHistogram : public LinearHistogram {
105 public: 105 public:
106 SnifferHistogram(const wchar_t* name, int array_size) 106 SnifferHistogram(const char* name, int array_size)
107 : LinearHistogram(name, 0, array_size - 1, array_size) { 107 : LinearHistogram(name, 0, array_size - 1, array_size) {
108 SetFlags(kUmaTargetedHistogramFlag); 108 SetFlags(kUmaTargetedHistogramFlag);
109 } 109 }
110 }; 110 };
111 111
112 } // namespace 112 } // namespace
113 113
114 namespace net { 114 namespace net {
115 115
116 // We aren't interested in looking at more than 512 bytes of content 116 // We aren't interested in looking at more than 512 bytes of content
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
266 static bool SniffForHTML(const char* content, size_t size, 266 static bool SniffForHTML(const char* content, size_t size,
267 std::string* result) { 267 std::string* result) {
268 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags, 268 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags,
269 // but with some modifications to better match the HTML5 spec. 269 // but with some modifications to better match the HTML5 spec.
270 const char* const end = content + size; 270 const char* const end = content + size;
271 const char* pos; 271 const char* pos;
272 for (pos = content; pos < end; ++pos) { 272 for (pos = content; pos < end; ++pos) {
273 if (!IsAsciiWhitespace(*pos)) 273 if (!IsAsciiWhitespace(*pos))
274 break; 274 break;
275 } 275 }
276 static SnifferHistogram counter(L"mime_sniffer.kSniffableTags2", 276 static SnifferHistogram counter("mime_sniffer.kSniffableTags2",
277 arraysize(kSniffableTags)); 277 arraysize(kSniffableTags));
278 // |pos| now points to first non-whitespace character (or at end). 278 // |pos| now points to first non-whitespace character (or at end).
279 return CheckForMagicNumbers(pos, end - pos, 279 return CheckForMagicNumbers(pos, end - pos,
280 kSniffableTags, arraysize(kSniffableTags), 280 kSniffableTags, arraysize(kSniffableTags),
281 &counter, result); 281 &counter, result);
282 } 282 }
283 283
284 static bool SniffForMagicNumbers(const char* content, size_t size, 284 static bool SniffForMagicNumbers(const char* content, size_t size,
285 std::string* result) { 285 std::string* result) {
286 // Check our big table of Magic Numbers 286 // Check our big table of Magic Numbers
287 static SnifferHistogram counter(L"mime_sniffer.kMagicNumbers2", 287 static SnifferHistogram counter("mime_sniffer.kMagicNumbers2",
288 arraysize(kMagicNumbers)); 288 arraysize(kMagicNumbers));
289 return CheckForMagicNumbers(content, size, 289 return CheckForMagicNumbers(content, size,
290 kMagicNumbers, arraysize(kMagicNumbers), 290 kMagicNumbers, arraysize(kMagicNumbers),
291 &counter, result); 291 &counter, result);
292 } 292 }
293 293
294 // Byte order marks 294 // Byte order marks
295 static const MagicNumber kMagicXML[] = { 295 static const MagicNumber kMagicXML[] = {
296 // We want to be very conservative in interpreting text/xml content as 296 // We want to be very conservative in interpreting text/xml content as
297 // XHTML -- we just want to sniff enough to make unit tests pass. 297 // XHTML -- we just want to sniff enough to make unit tests pass.
(...skipping 15 matching lines...) Expand all
313 // We allow at most kFirstTagBytes bytes of content before we expect the 313 // We allow at most kFirstTagBytes bytes of content before we expect the
314 // opening tag. 314 // opening tag.
315 const size_t kFeedAllowedHeaderBytes = 300; 315 const size_t kFeedAllowedHeaderBytes = 300;
316 const char* const end = content + std::min(size, kFeedAllowedHeaderBytes); 316 const char* const end = content + std::min(size, kFeedAllowedHeaderBytes);
317 const char* pos = content; 317 const char* pos = content;
318 318
319 // This loop iterates through tag-looking offsets in the file. 319 // This loop iterates through tag-looking offsets in the file.
320 // We want to skip XML processing instructions (of the form "<?xml ...") 320 // We want to skip XML processing instructions (of the form "<?xml ...")
321 // and stop at the first "plain" tag, then make a decision on the mime-type 321 // and stop at the first "plain" tag, then make a decision on the mime-type
322 // based on the name (or possibly attributes) of that tag. 322 // based on the name (or possibly attributes) of that tag.
323 static SnifferHistogram counter(L"mime_sniffer.kMagicXML2", 323 static SnifferHistogram counter("mime_sniffer.kMagicXML2",
324 arraysize(kMagicXML)); 324 arraysize(kMagicXML));
325 const int kMaxTagIterations = 5; 325 const int kMaxTagIterations = 5;
326 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { 326 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {
327 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); 327 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));
328 if (!pos) 328 if (!pos)
329 return false; 329 return false;
330 330
331 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) { 331 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) {
332 // Skip XML declarations. 332 // Skip XML declarations.
333 ++pos; 333 ++pos;
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF 380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF 381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF 382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF 383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF 384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF 385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF
386 }; 386 };
387 387
388 static bool LooksBinary(const char* content, size_t size) { 388 static bool LooksBinary(const char* content, size_t size) {
389 // First, we look for a BOM. 389 // First, we look for a BOM.
390 static SnifferHistogram counter(L"mime_sniffer.kByteOrderMark2", 390 static SnifferHistogram counter("mime_sniffer.kByteOrderMark2",
391 arraysize(kByteOrderMark)); 391 arraysize(kByteOrderMark));
392 std::string unused; 392 std::string unused;
393 if (CheckForMagicNumbers(content, size, 393 if (CheckForMagicNumbers(content, size,
394 kByteOrderMark, arraysize(kByteOrderMark), 394 kByteOrderMark, arraysize(kByteOrderMark),
395 &counter, &unused)) { 395 &counter, &unused)) {
396 // If there is BOM, we think the buffer is not binary. 396 // If there is BOM, we think the buffer is not binary.
397 return false; 397 return false;
398 } 398 }
399 399
400 // Next we look to see if any of the bytes "look binary." 400 // Next we look to see if any of the bytes "look binary."
(...skipping 13 matching lines...) Expand all
414 static const char* kUnknownMimeTypes[] = { 414 static const char* kUnknownMimeTypes[] = {
415 // Empty mime types are as unknown as they get. 415 // Empty mime types are as unknown as they get.
416 "", 416 "",
417 // The unknown/unknown type is popular and uninformative 417 // The unknown/unknown type is popular and uninformative
418 "unknown/unknown", 418 "unknown/unknown",
419 // The second most popular unknown mime type is application/unknown 419 // The second most popular unknown mime type is application/unknown
420 "application/unknown", 420 "application/unknown",
421 // Firefox rejects a mime type if it is exactly */* 421 // Firefox rejects a mime type if it is exactly */*
422 "*/*", 422 "*/*",
423 }; 423 };
424 static SnifferHistogram counter(L"mime_sniffer.kUnknownMimeTypes2", 424 static SnifferHistogram counter("mime_sniffer.kUnknownMimeTypes2",
425 arraysize(kUnknownMimeTypes) + 1); 425 arraysize(kUnknownMimeTypes) + 1);
426 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) { 426 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {
427 if (mime_type == kUnknownMimeTypes[i]) { 427 if (mime_type == kUnknownMimeTypes[i]) {
428 counter.Add(i); 428 counter.Add(i);
429 return true; 429 return true;
430 } 430 }
431 } 431 }
432 if (mime_type.find('/') == std::string::npos) { 432 if (mime_type.find('/') == std::string::npos) {
433 // Firefox rejects a mime type if it does not contain a slash 433 // Firefox rejects a mime type if it does not contain a slash
434 counter.Add(arraysize(kUnknownMimeTypes)); 434 counter.Add(arraysize(kUnknownMimeTypes));
435 return true; 435 return true;
436 } 436 }
437 return false; 437 return false;
438 } 438 }
439 439
440 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { 440 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {
441 static SnifferHistogram should_sniff_counter( 441 static SnifferHistogram should_sniff_counter(
442 L"mime_sniffer.ShouldSniffMimeType2", 3); 442 "mime_sniffer.ShouldSniffMimeType2", 3);
443 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP 443 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP
444 bool sniffable_scheme = url.is_empty() || 444 bool sniffable_scheme = url.is_empty() ||
445 url.SchemeIs("http") || 445 url.SchemeIs("http") ||
446 url.SchemeIs("https") || 446 url.SchemeIs("https") ||
447 url.SchemeIs("ftp"); 447 url.SchemeIs("ftp");
448 if (!sniffable_scheme) { 448 if (!sniffable_scheme) {
449 should_sniff_counter.Add(1); 449 should_sniff_counter.Add(1);
450 return false; 450 return false;
451 } 451 }
452 452
453 static const char* kSniffableTypes[] = { 453 static const char* kSniffableTypes[] = {
454 // Many web servers are misconfigured to send text/plain for many 454 // Many web servers are misconfigured to send text/plain for many
455 // different types of content. 455 // different types of content.
456 "text/plain", 456 "text/plain",
457 // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml 457 // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml
458 // files. Firefox 2.0 does not sniff xhtml here, but Safari 3, 458 // files. Firefox 2.0 does not sniff xhtml here, but Safari 3,
459 // Opera 9, and IE do. 459 // Opera 9, and IE do.
460 "application/octet-stream", 460 "application/octet-stream",
461 // XHTML and Atom/RSS feeds are often served as plain xml instead of 461 // XHTML and Atom/RSS feeds are often served as plain xml instead of
462 // their more specific mime types. 462 // their more specific mime types.
463 "text/xml", 463 "text/xml",
464 "application/xml", 464 "application/xml",
465 }; 465 };
466 static SnifferHistogram counter(L"mime_sniffer.kSniffableTypes2", 466 static SnifferHistogram counter("mime_sniffer.kSniffableTypes2",
467 arraysize(kSniffableTypes) + 1); 467 arraysize(kSniffableTypes) + 1);
468 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) { 468 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {
469 if (mime_type == kSniffableTypes[i]) { 469 if (mime_type == kSniffableTypes[i]) {
470 counter.Add(i); 470 counter.Add(i);
471 should_sniff_counter.Add(2); 471 should_sniff_counter.Add(2);
472 return true; 472 return true;
473 } 473 }
474 } 474 }
475 if (IsUnknownMimeType(mime_type)) { 475 if (IsUnknownMimeType(mime_type)) {
476 // The web server didn't specify a content type or specified a mime 476 // The web server didn't specify a content type or specified a mime
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
548 result->assign("text/plain"); 548 result->assign("text/plain");
549 // We could change our mind if a binary-looking byte appears later in 549 // We could change our mind if a binary-looking byte appears later in
550 // the content, so we only have enough content if we have the max. 550 // the content, so we only have enough content if we have the max.
551 return content_size >= kMaxBytesToSniff; 551 return content_size >= kMaxBytesToSniff;
552 } 552 }
553 553
554 return have_enough_content; 554 return have_enough_content;
555 } 555 }
556 556
557 } // namespace net 557 } // namespace net
558
OLDNEW
« no previous file with comments | « net/base/file_stream_win.cc ('k') | net/base/sdch_filter.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698