net/base/mime_sniffer.cc - Issue 28046: Use string for Histogram names since these are all ASCII anyway wide-characte...

Side by Side Diff: net/base/mime_sniffer.cc

Issue 28046: Use string for Histogram names since these are all ASCII anyway wide-characte... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Detecting mime types is a tricky business because we need to balance	5 // Detecting mime types is a tricky business because we need to balance

6 // compatibility concerns with security issues. Here is a survey of how other	6 // compatibility concerns with security issues. Here is a survey of how other

7 // browsers behave and then a description of how we intend to behave.	7 // browsers behave and then a description of how we intend to behave.

8 //	8 //

9 // HTML payload, no Content-Type header:	9 // HTML payload, no Content-Type header:

10 // * IE 7: Render as HTML	10 // * IE 7: Render as HTML

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
96 #include "base/histogram.h"	96 #include "base/histogram.h"

97 #include "base/logging.h"	97 #include "base/logging.h"

98 #include "base/string_util.h"	98 #include "base/string_util.h"

99 #include "googleurl/src/gurl.h"	99 #include "googleurl/src/gurl.h"

100 #include "net/base/mime_util.h"	100 #include "net/base/mime_util.h"

101	101

102 namespace {	102 namespace {

103	103

104 class SnifferHistogram : public LinearHistogram {	104 class SnifferHistogram : public LinearHistogram {

105 public:	105 public:

106 SnifferHistogram(const wchar_t* name, int array_size)	106 SnifferHistogram(const char* name, int array_size)

107 : LinearHistogram(name, 0, array_size - 1, array_size) {	107 : LinearHistogram(name, 0, array_size - 1, array_size) {

108 SetFlags(kUmaTargetedHistogramFlag);	108 SetFlags(kUmaTargetedHistogramFlag);

109 }	109 }

110 };	110 };

111	111

112 } // namespace	112 } // namespace

113	113

114 namespace net {	114 namespace net {

115	115

116 // We aren't interested in looking at more than 512 bytes of content	116 // We aren't interested in looking at more than 512 bytes of content

(...skipping 149 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
266 static bool SniffForHTML(const char* content, size_t size,	266 static bool SniffForHTML(const char* content, size_t size,

267 std::string* result) {	267 std::string* result) {

268 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags,	268 // We adopt a strategy similar to that used by Mozilla to sniff HTML tags,

269 // but with some modifications to better match the HTML5 spec.	269 // but with some modifications to better match the HTML5 spec.

270 const char* const end = content + size;	270 const char* const end = content + size;

271 const char* pos;	271 const char* pos;

272 for (pos = content; pos < end; ++pos) {	272 for (pos = content; pos < end; ++pos) {

273 if (!IsAsciiWhitespace(*pos))	273 if (!IsAsciiWhitespace(*pos))

274 break;	274 break;

275 }	275 }

276 static SnifferHistogram counter(L"mime_sniffer.kSniffableTags2",	276 static SnifferHistogram counter("mime_sniffer.kSniffableTags2",

277 arraysize(kSniffableTags));	277 arraysize(kSniffableTags));

278 // \|pos\| now points to first non-whitespace character (or at end).	278 // \|pos\| now points to first non-whitespace character (or at end).

279 return CheckForMagicNumbers(pos, end - pos,	279 return CheckForMagicNumbers(pos, end - pos,

280 kSniffableTags, arraysize(kSniffableTags),	280 kSniffableTags, arraysize(kSniffableTags),

281 &counter, result);	281 &counter, result);

282 }	282 }

283	283

284 static bool SniffForMagicNumbers(const char* content, size_t size,	284 static bool SniffForMagicNumbers(const char* content, size_t size,

285 std::string* result) {	285 std::string* result) {

286 // Check our big table of Magic Numbers	286 // Check our big table of Magic Numbers

287 static SnifferHistogram counter(L"mime_sniffer.kMagicNumbers2",	287 static SnifferHistogram counter("mime_sniffer.kMagicNumbers2",

288 arraysize(kMagicNumbers));	288 arraysize(kMagicNumbers));

289 return CheckForMagicNumbers(content, size,	289 return CheckForMagicNumbers(content, size,

290 kMagicNumbers, arraysize(kMagicNumbers),	290 kMagicNumbers, arraysize(kMagicNumbers),

291 &counter, result);	291 &counter, result);

292 }	292 }

293	293

294 // Byte order marks	294 // Byte order marks

295 static const MagicNumber kMagicXML[] = {	295 static const MagicNumber kMagicXML[] = {

296 // We want to be very conservative in interpreting text/xml content as	296 // We want to be very conservative in interpreting text/xml content as

297 // XHTML -- we just want to sniff enough to make unit tests pass.	297 // XHTML -- we just want to sniff enough to make unit tests pass.

(...skipping 15 matching lines...) Expand all Loading...
313 // We allow at most kFirstTagBytes bytes of content before we expect the	313 // We allow at most kFirstTagBytes bytes of content before we expect the

314 // opening tag.	314 // opening tag.

315 const size_t kFeedAllowedHeaderBytes = 300;	315 const size_t kFeedAllowedHeaderBytes = 300;

316 const char* const end = content + std::min(size, kFeedAllowedHeaderBytes);	316 const char* const end = content + std::min(size, kFeedAllowedHeaderBytes);

317 const char* pos = content;	317 const char* pos = content;

318	318

319 // This loop iterates through tag-looking offsets in the file.	319 // This loop iterates through tag-looking offsets in the file.

320 // We want to skip XML processing instructions (of the form "<?xml ...")	320 // We want to skip XML processing instructions (of the form "<?xml ...")

321 // and stop at the first "plain" tag, then make a decision on the mime-type	321 // and stop at the first "plain" tag, then make a decision on the mime-type

322 // based on the name (or possibly attributes) of that tag.	322 // based on the name (or possibly attributes) of that tag.

323 static SnifferHistogram counter(L"mime_sniffer.kMagicXML2",	323 static SnifferHistogram counter("mime_sniffer.kMagicXML2",

324 arraysize(kMagicXML));	324 arraysize(kMagicXML));

325 const int kMaxTagIterations = 5;	325 const int kMaxTagIterations = 5;

326 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {	326 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {

327 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));	327 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));

328 if (!pos)	328 if (!pos)

329 return false;	329 return false;

330	330

331 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) {	331 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml")-1) == 0) {

332 // Skip XML declarations.	332 // Skip XML declarations.

333 ++pos;	333 ++pos;

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF	380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF

381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF	381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF

382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF	382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF

383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF	383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF

384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF	384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF

385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF	385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF

386 };	386 };

387	387

388 static bool LooksBinary(const char* content, size_t size) {	388 static bool LooksBinary(const char* content, size_t size) {

389 // First, we look for a BOM.	389 // First, we look for a BOM.

390 static SnifferHistogram counter(L"mime_sniffer.kByteOrderMark2",	390 static SnifferHistogram counter("mime_sniffer.kByteOrderMark2",

391 arraysize(kByteOrderMark));	391 arraysize(kByteOrderMark));

392 std::string unused;	392 std::string unused;

393 if (CheckForMagicNumbers(content, size,	393 if (CheckForMagicNumbers(content, size,

394 kByteOrderMark, arraysize(kByteOrderMark),	394 kByteOrderMark, arraysize(kByteOrderMark),

395 &counter, &unused)) {	395 &counter, &unused)) {

396 // If there is BOM, we think the buffer is not binary.	396 // If there is BOM, we think the buffer is not binary.

397 return false;	397 return false;

398 }	398 }

399	399

400 // Next we look to see if any of the bytes "look binary."	400 // Next we look to see if any of the bytes "look binary."

(...skipping 13 matching lines...) Expand all Loading...
414 static const char* kUnknownMimeTypes[] = {	414 static const char* kUnknownMimeTypes[] = {

415 // Empty mime types are as unknown as they get.	415 // Empty mime types are as unknown as they get.

416 "",	416 "",

417 // The unknown/unknown type is popular and uninformative	417 // The unknown/unknown type is popular and uninformative

418 "unknown/unknown",	418 "unknown/unknown",

419 // The second most popular unknown mime type is application/unknown	419 // The second most popular unknown mime type is application/unknown

420 "application/unknown",	420 "application/unknown",

421 // Firefox rejects a mime type if it is exactly /	421 // Firefox rejects a mime type if it is exactly /

422 "/",	422 "/",

423 };	423 };

424 static SnifferHistogram counter(L"mime_sniffer.kUnknownMimeTypes2",	424 static SnifferHistogram counter("mime_sniffer.kUnknownMimeTypes2",

425 arraysize(kUnknownMimeTypes) + 1);	425 arraysize(kUnknownMimeTypes) + 1);

426 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {	426 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {

427 if (mime_type == kUnknownMimeTypes[i]) {	427 if (mime_type == kUnknownMimeTypes[i]) {

428 counter.Add(i);	428 counter.Add(i);

429 return true;	429 return true;

430 }	430 }

431 }	431 }

432 if (mime_type.find('/') == std::string::npos) {	432 if (mime_type.find('/') == std::string::npos) {

433 // Firefox rejects a mime type if it does not contain a slash	433 // Firefox rejects a mime type if it does not contain a slash

434 counter.Add(arraysize(kUnknownMimeTypes));	434 counter.Add(arraysize(kUnknownMimeTypes));

435 return true;	435 return true;

436 }	436 }

437 return false;	437 return false;

438 }	438 }

439	439

440 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {	440 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {

441 static SnifferHistogram should_sniff_counter(	441 static SnifferHistogram should_sniff_counter(

442 L"mime_sniffer.ShouldSniffMimeType2", 3);	442 "mime_sniffer.ShouldSniffMimeType2", 3);

443 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP	443 // We are willing to sniff the mime type for HTTP, HTTPS, and FTP

444 bool sniffable_scheme = url.is_empty() \|\|	444 bool sniffable_scheme = url.is_empty() \|\|

445 url.SchemeIs("http") \|\|	445 url.SchemeIs("http") \|\|

446 url.SchemeIs("https") \|\|	446 url.SchemeIs("https") \|\|

447 url.SchemeIs("ftp");	447 url.SchemeIs("ftp");

448 if (!sniffable_scheme) {	448 if (!sniffable_scheme) {

449 should_sniff_counter.Add(1);	449 should_sniff_counter.Add(1);

450 return false;	450 return false;

451 }	451 }

452	452

453 static const char* kSniffableTypes[] = {	453 static const char* kSniffableTypes[] = {

454 // Many web servers are misconfigured to send text/plain for many	454 // Many web servers are misconfigured to send text/plain for many

455 // different types of content.	455 // different types of content.

456 "text/plain",	456 "text/plain",

457 // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml	457 // IIS 4.0 and 5.0 send application/octet-stream when serving .xhtml

458 // files. Firefox 2.0 does not sniff xhtml here, but Safari 3,	458 // files. Firefox 2.0 does not sniff xhtml here, but Safari 3,

459 // Opera 9, and IE do.	459 // Opera 9, and IE do.

460 "application/octet-stream",	460 "application/octet-stream",

461 // XHTML and Atom/RSS feeds are often served as plain xml instead of	461 // XHTML and Atom/RSS feeds are often served as plain xml instead of

462 // their more specific mime types.	462 // their more specific mime types.

463 "text/xml",	463 "text/xml",

464 "application/xml",	464 "application/xml",

465 };	465 };

466 static SnifferHistogram counter(L"mime_sniffer.kSniffableTypes2",	466 static SnifferHistogram counter("mime_sniffer.kSniffableTypes2",

467 arraysize(kSniffableTypes) + 1);	467 arraysize(kSniffableTypes) + 1);

468 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {	468 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {

469 if (mime_type == kSniffableTypes[i]) {	469 if (mime_type == kSniffableTypes[i]) {

470 counter.Add(i);	470 counter.Add(i);

471 should_sniff_counter.Add(2);	471 should_sniff_counter.Add(2);

472 return true;	472 return true;

473 }	473 }

474 }	474 }

475 if (IsUnknownMimeType(mime_type)) {	475 if (IsUnknownMimeType(mime_type)) {

476 // The web server didn't specify a content type or specified a mime	476 // The web server didn't specify a content type or specified a mime

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
548 result->assign("text/plain");	548 result->assign("text/plain");

549 // We could change our mind if a binary-looking byte appears later in	549 // We could change our mind if a binary-looking byte appears later in

550 // the content, so we only have enough content if we have the max.	550 // the content, so we only have enough content if we have the max.

551 return content_size >= kMaxBytesToSniff;	551 return content_size >= kMaxBytesToSniff;

552 }	552 }

553	553

554 return have_enough_content;	554 return have_enough_content;

555 }	555 }

556	556

557 } // namespace net	557 } // namespace net

558

OLD	NEW

« no previous file with comments | « net/base/file_stream_win.cc ('k') | net/base/sdch_filter.cc » ('j') | no next file with comments »