Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: net/base/mime_sniffer.cc

Issue 12703012: Have media gallery (through native media file util) use MIME sniffer (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Indentation fix. Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/mime_sniffer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Detecting mime types is a tricky business because we need to balance 5 // Detecting mime types is a tricky business because we need to balance
6 // compatibility concerns with security issues. Here is a survey of how other 6 // compatibility concerns with security issues. Here is a survey of how other
7 // browsers behave and then a description of how we intend to behave. 7 // browsers behave and then a description of how we intend to behave.
8 // 8 //
9 // HTML payload, no Content-Type header: 9 // HTML payload, no Content-Type header:
10 // * IE 7: Render as HTML 10 // * IE 7: Render as HTML
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
107 107
108 // The number of content bytes we need to use all our magic numbers. Feel free 108 // The number of content bytes we need to use all our magic numbers. Feel free
109 // to increase this number if you add a longer magic number. 109 // to increase this number if you add a longer magic number.
110 static const size_t kBytesRequiredForMagic = 42; 110 static const size_t kBytesRequiredForMagic = 42;
111 111
112 struct MagicNumber { 112 struct MagicNumber {
113 const char* mime_type; 113 const char* mime_type;
114 const char* magic; 114 const char* magic;
115 size_t magic_len; 115 size_t magic_len;
116 bool is_string; 116 bool is_string;
117 const char* mask; // if set, must have same length as |magic|
117 }; 118 };
118 119
119 #define MAGIC_NUMBER(mime_type, magic) \ 120 #define MAGIC_NUMBER(mime_type, magic) \
120 { (mime_type), (magic), sizeof(magic)-1, false }, 121 { (mime_type), (magic), sizeof(magic)-1, false, NULL },
122
123 template <int MagicSize, int MaskSize>
124 class VerifySizes {
125 COMPILE_ASSERT(MagicSize == MaskSize, sizes_must_be_equal);
126 public:
127 enum { SIZES = MagicSize };
128 };
129
130 #define verified_sizeof(magic, mask) \
131 VerifySizes<sizeof(magic), sizeof(mask)>::SIZES
132
133 #define MAGIC_MASK(mime_type, magic, mask) \
134 { (mime_type), (magic), verified_sizeof(magic, mask)-1, false, (mask) },
121 135
122 // Magic strings are case insensitive and must not include '\0' characters 136 // Magic strings are case insensitive and must not include '\0' characters
123 #define MAGIC_STRING(mime_type, magic) \ 137 #define MAGIC_STRING(mime_type, magic) \
124 { (mime_type), (magic), sizeof(magic)-1, true }, 138 { (mime_type), (magic), sizeof(magic)-1, true, NULL },
125 139
126 static const MagicNumber kMagicNumbers[] = { 140 static const MagicNumber kMagicNumbers[] = {
127 // Source: HTML 5 specification 141 // Source: HTML 5 specification
128 MAGIC_NUMBER("application/pdf", "%PDF-") 142 MAGIC_NUMBER("application/pdf", "%PDF-")
129 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") 143 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-")
130 MAGIC_NUMBER("image/gif", "GIF87a") 144 MAGIC_NUMBER("image/gif", "GIF87a")
131 MAGIC_NUMBER("image/gif", "GIF89a") 145 MAGIC_NUMBER("image/gif", "GIF89a")
132 MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A") 146 MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A")
133 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") 147 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF")
134 MAGIC_NUMBER("image/bmp", "BM") 148 MAGIC_NUMBER("image/bmp", "BM")
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
169 // 183 //
170 // Cons: 184 // Cons:
171 // * These patterns are fairly weak 185 // * These patterns are fairly weak
172 // * If we mistakenly decide something is Flash, we will execute it 186 // * If we mistakenly decide something is Flash, we will execute it
173 // in the origin of an unsuspecting site. This could be a security 187 // in the origin of an unsuspecting site. This could be a security
174 // vulnerability if the site allows users to upload content. 188 // vulnerability if the site allows users to upload content.
175 // 189 //
176 // On balance, we do not include these patterns. 190 // On balance, we do not include these patterns.
177 }; 191 };
178 192
193 static const MagicNumber kExtraMagicNumbers[] = {
194 MAGIC_NUMBER("image/x-xbitmap", "#define")
195 MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00")
196 MAGIC_NUMBER("image/svg+xml", "<?xml_version=")
197 MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ")
198 MAGIC_NUMBER("video/avi", "RIFF....AVI LIST")
199 MAGIC_NUMBER("audio/ogg", "OggS")
200 MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0")
201 MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0")
202 MAGIC_NUMBER("video/3gpp", "....ftyp3g")
203 MAGIC_NUMBER("video/3gpp", "....ftypavcl")
204 MAGIC_NUMBER("video/mp4", "....ftyp")
205 MAGIC_NUMBER("video/quicktime", "MOVI")
206 MAGIC_NUMBER("application/x-shockwave-flash", "CWS")
207 MAGIC_NUMBER("application/x-shockwave-flash", "FWS")
208 MAGIC_NUMBER("video/x-flv", "FLV")
209 };
210
179 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will 211 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will
180 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is 212 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is
181 // HTML, but we will not. 213 // HTML, but we will not.
182 214
183 #define MAGIC_HTML_TAG(tag) \ 215 #define MAGIC_HTML_TAG(tag) \
184 MAGIC_STRING("text/html", "<" tag) 216 MAGIC_STRING("text/html", "<" tag)
185 217
186 static const MagicNumber kSniffableTags[] = { 218 static const MagicNumber kSniffableTags[] = {
187 // XML processing directive. Although this is not an HTML mime type, we sniff 219 // XML processing directive. Although this is not an HTML mime type, we sniff
188 // for this in the HTML phase because text/xml is just as powerful as HTML and 220 // for this in the HTML phase because text/xml is just as powerful as HTML and
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
223 while (len) { 255 while (len) {
224 if ((*magic_entry != '.') && (*magic_entry != *content)) 256 if ((*magic_entry != '.') && (*magic_entry != *content))
225 return false; 257 return false;
226 ++magic_entry; 258 ++magic_entry;
227 ++content; 259 ++content;
228 --len; 260 --len;
229 } 261 }
230 return true; 262 return true;
231 } 263 }
232 264
233 static bool MatchMagicNumber(const char* content, size_t size, 265 // Like MagicCmp() except that it ANDs each byte with a mask before
266 // the comparison, because there are some bits we don't care about.
267 static bool MagicMaskCmp(const char* magic_entry,
268 const char* content,
269 size_t len,
270 const char* mask) {
271 while (len) {
272 if ((*magic_entry != '.') && (*magic_entry != (*mask & *content)))
273 return false;
274 ++magic_entry;
275 ++content;
276 ++mask;
277 --len;
278 }
279 return true;
280 }
281
282 static bool MatchMagicNumber(const char* content,
283 size_t size,
234 const MagicNumber* magic_entry, 284 const MagicNumber* magic_entry,
235 std::string* result) { 285 std::string* result) {
236 const size_t len = magic_entry->magic_len; 286 const size_t len = magic_entry->magic_len;
237 287
238 // Keep kBytesRequiredForMagic honest. 288 // Keep kBytesRequiredForMagic honest.
239 DCHECK_LE(len, kBytesRequiredForMagic); 289 DCHECK_LE(len, kBytesRequiredForMagic);
240 290
241 // To compare with magic strings, we need to compute strlen(content), but 291 // To compare with magic strings, we need to compute strlen(content), but
242 // content might not actually have a null terminator. In that case, we 292 // content might not actually have a null terminator. In that case, we
243 // pretend the length is content_size. 293 // pretend the length is content_size.
244 const char* end = 294 const char* end =
245 static_cast<const char*>(memchr(content, '\0', size)); 295 static_cast<const char*>(memchr(content, '\0', size));
246 const size_t content_strlen = 296 const size_t content_strlen =
247 (end != NULL) ? static_cast<size_t>(end - content) : size; 297 (end != NULL) ? static_cast<size_t>(end - content) : size;
248 298
249 bool match = false; 299 bool match = false;
250 if (magic_entry->is_string) { 300 if (magic_entry->is_string) {
251 if (content_strlen >= len) { 301 if (content_strlen >= len) {
252 // String comparisons are case-insensitive 302 // String comparisons are case-insensitive
253 match = (base::strncasecmp(magic_entry->magic, content, len) == 0); 303 match = (base::strncasecmp(magic_entry->magic, content, len) == 0);
254 } 304 }
255 } else { 305 } else {
256 if (size >= len) 306 if (size >= len) {
257 match = MagicCmp(magic_entry->magic, content, len); 307 if (!magic_entry->mask) {
308 match = MagicCmp(magic_entry->magic, content, len);
309 } else {
310 match = MagicMaskCmp(magic_entry->magic, content, len,
311 magic_entry->mask);
312 }
313 }
258 } 314 }
259 315
260 if (match) { 316 if (match) {
261 result->assign(magic_entry->mime_type); 317 result->assign(magic_entry->mime_type);
262 return true; 318 return true;
263 } 319 }
264 return false; 320 return false;
265 } 321 }
266 322
267 static bool CheckForMagicNumbers(const char* content, size_t size, 323 static bool CheckForMagicNumbers(const char* content, size_t size,
(...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after
605 // The web server didn't specify a content type or specified a mime 661 // The web server didn't specify a content type or specified a mime
606 // type that we ignore. 662 // type that we ignore.
607 counter->Add(arraysize(kSniffableTypes)); 663 counter->Add(arraysize(kSniffableTypes));
608 should_sniff_counter->Add(2); 664 should_sniff_counter->Add(2);
609 return true; 665 return true;
610 } 666 }
611 should_sniff_counter->Add(1); 667 should_sniff_counter->Add(1);
612 return false; 668 return false;
613 } 669 }
614 670
615 bool SniffMimeType(const char* content, size_t content_size, 671 bool SniffMimeType(const char* content,
616 const GURL& url, const std::string& type_hint, 672 size_t content_size,
673 const GURL& url,
674 const std::string& type_hint,
617 std::string* result) { 675 std::string* result) {
618 DCHECK_LT(content_size, 1000000U); // sanity check 676 DCHECK_LT(content_size, 1000000U); // sanity check
619 DCHECK(content); 677 DCHECK(content);
620 DCHECK(result); 678 DCHECK(result);
621 679
622 // By default, we assume we have enough content. 680 // By default, we assume we have enough content.
623 // Each sniff routine may unset this if it wasn't provided enough content. 681 // Each sniff routine may unset this if it wasn't provided enough content.
624 bool have_enough_content = true; 682 bool have_enough_content = true;
625 683
626 // By default, we'll return the type hint. 684 // By default, we'll return the type hint.
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
679 737
680 // Now we look in our large table of magic numbers to see if we can find 738 // Now we look in our large table of magic numbers to see if we can find
681 // anything that matches the content. 739 // anything that matches the content.
682 if (SniffForMagicNumbers(content, content_size, 740 if (SniffForMagicNumbers(content, content_size,
683 &have_enough_content, result)) 741 &have_enough_content, result))
684 return true; // We've matched a magic number. No more content needed. 742 return true; // We've matched a magic number. No more content needed.
685 743
686 return have_enough_content; 744 return have_enough_content;
687 } 745 }
688 746
747 bool SniffMimeTypeFromLocalData(const char* content,
748 size_t size,
749 std::string* result) {
750 // First check the extra table.
751 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers,
752 arraysize(kExtraMagicNumbers), NULL, result))
753 return true;
754 // Finally check the original table.
755 return CheckForMagicNumbers(content, size, kMagicNumbers,
756 arraysize(kMagicNumbers), NULL, result);
757 }
758
689 } // namespace net 759 } // namespace net
OLDNEW
« no previous file with comments | « net/base/mime_sniffer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698