OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
8 // | 8 // |
9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
107 | 107 |
108 // The number of content bytes we need to use all our magic numbers. Feel free | 108 // The number of content bytes we need to use all our magic numbers. Feel free |
109 // to increase this number if you add a longer magic number. | 109 // to increase this number if you add a longer magic number. |
110 static const size_t kBytesRequiredForMagic = 42; | 110 static const size_t kBytesRequiredForMagic = 42; |
111 | 111 |
112 struct MagicNumber { | 112 struct MagicNumber { |
113 const char* mime_type; | 113 const char* mime_type; |
114 const char* magic; | 114 const char* magic; |
115 size_t magic_len; | 115 size_t magic_len; |
116 bool is_string; | 116 bool is_string; |
117 const char* mask; // if set, must have same length as |magic| | |
117 }; | 118 }; |
118 | 119 |
119 #define MAGIC_NUMBER(mime_type, magic) \ | 120 #define MAGIC_NUMBER(mime_type, magic) \ |
120 { (mime_type), (magic), sizeof(magic)-1, false }, | 121 { (mime_type), (magic), sizeof(magic)-1, false, NULL }, |
122 | |
123 #define MAGIC_MASK(mime_type, magic, mask) \ | |
124 { (mime_type), (magic), sizeof(magic)-1, false, (mask) }, | |
Ryan Sleevi
2013/05/03 19:03:24
Is there any sort of compiler trickery you can use
Kevin Bailey
2013/05/09 17:16:18
I verified that it catches mismatches. Let me know
| |
121 | 125 |
122 // Magic strings are case insensitive and must not include '\0' characters | 126 // Magic strings are case insensitive and must not include '\0' characters |
123 #define MAGIC_STRING(mime_type, magic) \ | 127 #define MAGIC_STRING(mime_type, magic) \ |
124 { (mime_type), (magic), sizeof(magic)-1, true }, | 128 { (mime_type), (magic), sizeof(magic)-1, true, NULL }, |
125 | 129 |
126 static const MagicNumber kMagicNumbers[] = { | 130 static const MagicNumber kMagicNumbers[] = { |
127 // Source: HTML 5 specification | 131 // Source: HTML 5 specification |
128 MAGIC_NUMBER("application/pdf", "%PDF-") | 132 MAGIC_NUMBER("application/pdf", "%PDF-") |
129 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") | 133 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") |
130 MAGIC_NUMBER("image/gif", "GIF87a") | 134 MAGIC_NUMBER("image/gif", "GIF87a") |
131 MAGIC_NUMBER("image/gif", "GIF89a") | 135 MAGIC_NUMBER("image/gif", "GIF89a") |
132 MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A") | 136 MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A") |
133 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") | 137 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") |
134 MAGIC_NUMBER("image/bmp", "BM") | 138 MAGIC_NUMBER("image/bmp", "BM") |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
169 // | 173 // |
170 // Cons: | 174 // Cons: |
171 // * These patterns are fairly weak | 175 // * These patterns are fairly weak |
172 // * If we mistakenly decide something is Flash, we will execute it | 176 // * If we mistakenly decide something is Flash, we will execute it |
173 // in the origin of an unsuspecting site. This could be a security | 177 // in the origin of an unsuspecting site. This could be a security |
174 // vulnerability if the site allows users to upload content. | 178 // vulnerability if the site allows users to upload content. |
175 // | 179 // |
176 // On balance, we do not include these patterns. | 180 // On balance, we do not include these patterns. |
177 }; | 181 }; |
178 | 182 |
183 static const MagicNumber kExtraMagicNumbers[] = { | |
184 MAGIC_NUMBER("image/x-xbitmap", "#define") | |
185 MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00") | |
186 MAGIC_NUMBER("image/svg+xml", "<?xml_version=") | |
187 MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ") | |
188 MAGIC_NUMBER("video/avi", "RIFF....AVI LIST") | |
189 MAGIC_NUMBER("audio/ogg", "OggS") | |
190 MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0") | |
191 MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0") | |
192 MAGIC_NUMBER("video/3gpp", "....ftyp3g") | |
193 MAGIC_NUMBER("video/3gpp", "....ftypavcl") | |
194 MAGIC_NUMBER("video/mp4", "....ftyp") | |
195 MAGIC_NUMBER("video/quicktime", "MOVI") | |
196 MAGIC_NUMBER("application/x-shockwave-flash", "CWS") | |
197 MAGIC_NUMBER("application/x-shockwave-flash", "FWS") | |
198 MAGIC_NUMBER("video/x-flv", "FLV") | |
199 }; | |
200 | |
179 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will | 201 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will |
180 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is | 202 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is |
181 // HTML, but we will not. | 203 // HTML, but we will not. |
182 | 204 |
183 #define MAGIC_HTML_TAG(tag) \ | 205 #define MAGIC_HTML_TAG(tag) \ |
184 MAGIC_STRING("text/html", "<" tag) | 206 MAGIC_STRING("text/html", "<" tag) |
185 | 207 |
186 static const MagicNumber kSniffableTags[] = { | 208 static const MagicNumber kSniffableTags[] = { |
187 // XML processing directive. Although this is not an HTML mime type, we sniff | 209 // XML processing directive. Although this is not an HTML mime type, we sniff |
188 // for this in the HTML phase because text/xml is just as powerful as HTML and | 210 // for this in the HTML phase because text/xml is just as powerful as HTML and |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
223 while (len) { | 245 while (len) { |
224 if ((*magic_entry != '.') && (*magic_entry != *content)) | 246 if ((*magic_entry != '.') && (*magic_entry != *content)) |
225 return false; | 247 return false; |
226 ++magic_entry; | 248 ++magic_entry; |
227 ++content; | 249 ++content; |
228 --len; | 250 --len; |
229 } | 251 } |
230 return true; | 252 return true; |
231 } | 253 } |
232 | 254 |
255 // Like MagicCmp() except that it ANDs each byte with a mask before | |
256 // the comparison, because there are some bits we don't care about. | |
257 static bool MagicMaskCmp(const char* magic_entry, const char* content, | |
258 size_t len, const char* mask) { | |
Ryan Sleevi
2013/05/03 19:03:24
style nit (oh Chromium-dev centi-thread...)
stati
Kevin Bailey
2013/05/09 17:16:18
Done.
| |
259 while (len) { | |
260 if ((*magic_entry != '.') && (*magic_entry != (*mask & *content))) | |
261 return false; | |
262 ++magic_entry; | |
263 ++content; | |
264 ++mask; | |
265 --len; | |
266 } | |
267 return true; | |
268 } | |
269 | |
233 static bool MatchMagicNumber(const char* content, size_t size, | 270 static bool MatchMagicNumber(const char* content, size_t size, |
234 const MagicNumber* magic_entry, | 271 const MagicNumber* magic_entry, |
235 std::string* result) { | 272 std::string* result) { |
236 const size_t len = magic_entry->magic_len; | 273 const size_t len = magic_entry->magic_len; |
237 | 274 |
238 // Keep kBytesRequiredForMagic honest. | 275 // Keep kBytesRequiredForMagic honest. |
239 DCHECK_LE(len, kBytesRequiredForMagic); | 276 DCHECK_LE(len, kBytesRequiredForMagic); |
240 | 277 |
241 // To compare with magic strings, we need to compute strlen(content), but | 278 // To compare with magic strings, we need to compute strlen(content), but |
242 // content might not actually have a null terminator. In that case, we | 279 // content might not actually have a null terminator. In that case, we |
243 // pretend the length is content_size. | 280 // pretend the length is content_size. |
244 const char* end = | 281 const char* end = |
245 static_cast<const char*>(memchr(content, '\0', size)); | 282 static_cast<const char*>(memchr(content, '\0', size)); |
246 const size_t content_strlen = | 283 const size_t content_strlen = |
247 (end != NULL) ? static_cast<size_t>(end - content) : size; | 284 (end != NULL) ? static_cast<size_t>(end - content) : size; |
248 | 285 |
249 bool match = false; | 286 bool match = false; |
250 if (magic_entry->is_string) { | 287 if (magic_entry->is_string) { |
251 if (content_strlen >= len) { | 288 if (content_strlen >= len) { |
252 // String comparisons are case-insensitive | 289 // String comparisons are case-insensitive |
253 match = (base::strncasecmp(magic_entry->magic, content, len) == 0); | 290 match = (base::strncasecmp(magic_entry->magic, content, len) == 0); |
254 } | 291 } |
255 } else { | 292 } else { |
256 if (size >= len) | 293 if (size >= len) { |
257 match = MagicCmp(magic_entry->magic, content, len); | 294 if (!magic_entry->mask) { |
295 match = MagicCmp(magic_entry->magic, content, len); | |
296 } else { | |
297 match = MagicMaskCmp(magic_entry->magic, content, len, | |
298 magic_entry->mask); | |
Ryan Sleevi
2013/05/03 19:03:24
style nit:
match = MagicMaskCmp(magic_entry->magic
Kevin Bailey
2013/05/09 17:16:18
Done.
| |
299 } | |
300 } | |
258 } | 301 } |
259 | 302 |
260 if (match) { | 303 if (match) { |
261 result->assign(magic_entry->mime_type); | 304 result->assign(magic_entry->mime_type); |
262 return true; | 305 return true; |
263 } | 306 } |
264 return false; | 307 return false; |
265 } | 308 } |
266 | 309 |
267 static bool CheckForMagicNumbers(const char* content, size_t size, | 310 static bool CheckForMagicNumbers(const char* content, size_t size, |
(...skipping 411 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
679 | 722 |
680 // Now we look in our large table of magic numbers to see if we can find | 723 // Now we look in our large table of magic numbers to see if we can find |
681 // anything that matches the content. | 724 // anything that matches the content. |
682 if (SniffForMagicNumbers(content, content_size, | 725 if (SniffForMagicNumbers(content, content_size, |
683 &have_enough_content, result)) | 726 &have_enough_content, result)) |
684 return true; // We've matched a magic number. No more content needed. | 727 return true; // We've matched a magic number. No more content needed. |
685 | 728 |
686 return have_enough_content; | 729 return have_enough_content; |
687 } | 730 } |
688 | 731 |
732 bool IdentifyExtraMimeType(const char* content, size_t size, | |
733 std::string* result) { | |
Ryan Sleevi
2013/05/03 19:03:24
style nit: one arg per line (and I'll throw in a c
Kevin Bailey
2013/05/09 17:16:18
Done, assuming that's what you meant.
| |
734 // First check the extra table. | |
735 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, | |
736 sizeof(kExtraMagicNumbers) / sizeof(MagicNumber), | |
737 NULL, result)) | |
738 return true; | |
739 // Finally check the original table. | |
740 return CheckForMagicNumbers(content, size, kMagicNumbers, | |
741 sizeof(kMagicNumbers) / sizeof(MagicNumber), | |
Ryan Sleevi
2013/05/03 19:03:24
Both of these calls should be using arraysize() fr
Kevin Bailey
2013/05/09 17:16:18
Done.
| |
742 NULL, result); | |
743 } | |
744 | |
689 } // namespace net | 745 } // namespace net |
OLD | NEW |