OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Detecting mime types is a tricky business because we need to balance | 5 // Detecting mime types is a tricky business because we need to balance |
6 // compatibility concerns with security issues. Here is a survey of how other | 6 // compatibility concerns with security issues. Here is a survey of how other |
7 // browsers behave and then a description of how we intend to behave. | 7 // browsers behave and then a description of how we intend to behave. |
8 // | 8 // |
9 // HTML payload, no Content-Type header: | 9 // HTML payload, no Content-Type header: |
10 // * IE 7: Render as HTML | 10 // * IE 7: Render as HTML |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
109 // to increase this number if you add a longer magic number. | 109 // to increase this number if you add a longer magic number. |
110 static const size_t kBytesRequiredForMagic = 42; | 110 static const size_t kBytesRequiredForMagic = 42; |
111 | 111 |
112 struct MagicNumber { | 112 struct MagicNumber { |
113 const char* mime_type; | 113 const char* mime_type; |
114 const char* magic; | 114 const char* magic; |
115 size_t magic_len; | 115 size_t magic_len; |
116 bool is_string; | 116 bool is_string; |
117 }; | 117 }; |
118 | 118 |
119 struct MagicMaskNumber { | |
vandebo (ex-Chrome)
2013/04/29 19:43:50
I think it's ok to modify MagicNumber. It's an im
Kevin Bailey
2013/04/30 20:57:56
Done.
| |
120 const char* mime_type; | |
121 const char* magic; | |
122 size_t magic_len; | |
123 bool is_string; | |
124 const char* mask; // must have same length as |magic| | |
125 }; | |
126 | |
119 #define MAGIC_NUMBER(mime_type, magic) \ | 127 #define MAGIC_NUMBER(mime_type, magic) \ |
120 { (mime_type), (magic), sizeof(magic)-1, false }, | 128 { (mime_type), (magic), sizeof(magic)-1, false }, |
121 | 129 |
130 #define MAGIC_MASK(mime_type, magic, mask) \ | |
131 { (mime_type), (magic), sizeof(magic)-1, false, (mask) }, | |
132 | |
122 // Magic strings are case insensitive and must not include '\0' characters | 133 // Magic strings are case insensitive and must not include '\0' characters |
123 #define MAGIC_STRING(mime_type, magic) \ | 134 #define MAGIC_STRING(mime_type, magic) \ |
124 { (mime_type), (magic), sizeof(magic)-1, true }, | 135 { (mime_type), (magic), sizeof(magic)-1, true }, |
125 | 136 |
126 static const MagicNumber kMagicNumbers[] = { | 137 static const MagicNumber kMagicNumbers[] = { |
127 // Source: HTML 5 specification | 138 // Source: HTML 5 specification |
128 MAGIC_NUMBER("application/pdf", "%PDF-") | 139 MAGIC_NUMBER("application/pdf", "%PDF-") |
129 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") | 140 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-") |
130 MAGIC_NUMBER("image/gif", "GIF87a") | 141 MAGIC_NUMBER("image/gif", "GIF87a") |
131 MAGIC_NUMBER("image/gif", "GIF89a") | 142 MAGIC_NUMBER("image/gif", "GIF89a") |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
169 // | 180 // |
170 // Cons: | 181 // Cons: |
171 // * These patterns are fairly weak | 182 // * These patterns are fairly weak |
172 // * If we mistakenly decide something is Flash, we will execute it | 183 // * If we mistakenly decide something is Flash, we will execute it |
173 // in the origin of an unsuspecting site. This could be a security | 184 // in the origin of an unsuspecting site. This could be a security |
174 // vulnerability if the site allows users to upload content. | 185 // vulnerability if the site allows users to upload content. |
175 // | 186 // |
176 // On balance, we do not include these patterns. | 187 // On balance, we do not include these patterns. |
177 }; | 188 }; |
178 | 189 |
190 static const MagicMaskNumber kMagicMaskNumbers[] = { | |
vandebo (ex-Chrome)
2013/04/29 19:43:50
These aren't all mask'd magic numbers... maybe kEx
Kevin Bailey
2013/04/30 20:57:56
Done.
| |
191 MAGIC_NUMBER("image/x-xbitmap", "#define") | |
192 MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00") | |
193 MAGIC_NUMBER("image/svg+xml", "<?xml_version=") | |
194 MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ") | |
195 MAGIC_NUMBER("video/avi", "RIFF....AVI LIST") | |
196 MAGIC_NUMBER("video/x-ms-wmv", "RIFF....AVI LIST") | |
197 MAGIC_NUMBER("audio/ogg", "OggS") | |
198 MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0") | |
199 MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0") | |
200 MAGIC_NUMBER("video/3gpp", "....ftyp3g") | |
201 MAGIC_NUMBER("video/3gpp", "....ftypavcl") | |
202 MAGIC_NUMBER("video/mp4", "....ftyp") | |
203 MAGIC_NUMBER("video/quicktime", "MOVI") | |
204 MAGIC_NUMBER("application/x-shockwave-flash", "CWS") | |
205 MAGIC_NUMBER("application/x-shockwave-flash", "FWS") | |
206 MAGIC_NUMBER("video/x-flv", "FLV") | |
207 }; | |
208 | |
179 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will | 209 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will |
180 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is | 210 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is |
181 // HTML, but we will not. | 211 // HTML, but we will not. |
182 | 212 |
183 #define MAGIC_HTML_TAG(tag) \ | 213 #define MAGIC_HTML_TAG(tag) \ |
184 MAGIC_STRING("text/html", "<" tag) | 214 MAGIC_STRING("text/html", "<" tag) |
185 | 215 |
186 static const MagicNumber kSniffableTags[] = { | 216 static const MagicNumber kSniffableTags[] = { |
187 // XML processing directive. Although this is not an HTML mime type, we sniff | 217 // XML processing directive. Although this is not an HTML mime type, we sniff |
188 // for this in the HTML phase because text/xml is just as powerful as HTML and | 218 // for this in the HTML phase because text/xml is just as powerful as HTML and |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
257 match = MagicCmp(magic_entry->magic, content, len); | 287 match = MagicCmp(magic_entry->magic, content, len); |
258 } | 288 } |
259 | 289 |
260 if (match) { | 290 if (match) { |
261 result->assign(magic_entry->mime_type); | 291 result->assign(magic_entry->mime_type); |
262 return true; | 292 return true; |
263 } | 293 } |
264 return false; | 294 return false; |
265 } | 295 } |
266 | 296 |
297 // Like MagicCmp() except that it ANDs each byte with a mask before | |
298 // the comparison, because there are some bits we don't care about. | |
299 static bool MagicMaskCmp(const char* magic_entry, const char* mask, | |
300 const char* content, size_t len) { | |
301 while (len) { | |
302 if ((*magic_entry != '.') && (*magic_entry != (*mask & *content))) | |
303 return false; | |
304 ++magic_entry; | |
305 ++content; | |
306 ++mask; | |
307 --len; | |
308 } | |
309 return true; | |
310 } | |
311 | |
312 static bool MatchMagicMaskNumber(const char* content, size_t size, | |
313 const MagicMaskNumber* magic_entry, | |
314 std::string* result) { | |
315 const size_t len = magic_entry->magic_len; | |
316 | |
317 // Keep kBytesRequiredForMagic honest. | |
318 DCHECK_LE(len, kBytesRequiredForMagic); | |
319 | |
320 // To compare with magic strings, we need to compute strlen(content), but | |
321 // content might not actually have a null terminator. In that case, we | |
322 // pretend the length is content_size. | |
323 const char* end = | |
324 static_cast<const char*>(memchr(content, '\0', size)); | |
325 const size_t content_strlen = | |
326 (end != NULL) ? static_cast<size_t>(end - content) : size; | |
327 | |
328 bool match = false; | |
329 if (magic_entry->is_string) { | |
330 if (content_strlen >= len) { | |
331 // String comparisons are case-insensitive | |
332 match = (base::strncasecmp(magic_entry->magic, content, len) == 0); | |
333 } | |
334 } else { | |
335 if (size >= len) { | |
336 if (magic_entry->mask) { | |
337 match = MagicMaskCmp(magic_entry->magic, magic_entry->mask, content, | |
338 len); | |
339 } else { | |
340 match = MagicCmp(magic_entry->magic, content, len); | |
341 } | |
342 } | |
343 } | |
344 | |
345 if (match) { | |
346 result->assign(magic_entry->mime_type); | |
347 return true; | |
348 } | |
349 return false; | |
350 } | |
351 | |
267 static bool CheckForMagicNumbers(const char* content, size_t size, | 352 static bool CheckForMagicNumbers(const char* content, size_t size, |
268 const MagicNumber* magic, size_t magic_len, | 353 const MagicNumber* magic, size_t magic_len, |
269 base::HistogramBase* counter, | 354 base::HistogramBase* counter, |
270 std::string* result) { | 355 std::string* result) { |
271 for (size_t i = 0; i < magic_len; ++i) { | 356 for (size_t i = 0; i < magic_len; ++i) { |
272 if (MatchMagicNumber(content, size, &(magic[i]), result)) { | 357 if (MatchMagicNumber(content, size, &(magic[i]), result)) { |
273 if (counter) counter->Add(static_cast<int>(i)); | 358 if (counter) counter->Add(static_cast<int>(i)); |
274 return true; | 359 return true; |
275 } | 360 } |
276 } | 361 } |
277 return false; | 362 return false; |
278 } | 363 } |
279 | 364 |
365 static bool CheckForMagicMaskNumbers(const char* content, size_t size, | |
366 const MagicMaskNumber* magic, | |
367 size_t magic_len, | |
368 std::string* result) { | |
369 for (size_t i = 0; i < magic_len; ++i) { | |
370 if (MatchMagicMaskNumber(content, size, &(magic[i]), result)) | |
371 return true; | |
372 } | |
373 return false; | |
374 } | |
375 | |
376 bool IdentifyExtraMimeType(const char* content, size_t size, | |
377 std::string* result) { | |
378 // First check the extra table. | |
379 if (CheckForMagicMaskNumbers(content, size, kMagicMaskNumbers, | |
380 sizeof(kMagicMaskNumbers) / | |
381 sizeof(MagicMaskNumber), result)) | |
382 return true; | |
383 // Finally check the original table. | |
384 return CheckForMagicNumbers(content, size, kMagicNumbers, | |
385 sizeof(kMagicNumbers) / sizeof(MagicNumber), | |
386 NULL, result); | |
387 } | |
388 | |
280 // Truncates |size| to |max_size| and returns true if |size| is at least | 389 // Truncates |size| to |max_size| and returns true if |size| is at least |
281 // |max_size|. | 390 // |max_size|. |
282 static bool TruncateSize(const size_t max_size, size_t* size) { | 391 static bool TruncateSize(const size_t max_size, size_t* size) { |
283 // Keep kMaxBytesToSniff honest. | 392 // Keep kMaxBytesToSniff honest. |
284 DCHECK_LE(static_cast<int>(max_size), kMaxBytesToSniff); | 393 DCHECK_LE(static_cast<int>(max_size), kMaxBytesToSniff); |
285 | 394 |
286 if (*size >= max_size) { | 395 if (*size >= max_size) { |
287 *size = max_size; | 396 *size = max_size; |
288 return true; | 397 return true; |
289 } | 398 } |
(...skipping 390 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
680 // Now we look in our large table of magic numbers to see if we can find | 789 // Now we look in our large table of magic numbers to see if we can find |
681 // anything that matches the content. | 790 // anything that matches the content. |
682 if (SniffForMagicNumbers(content, content_size, | 791 if (SniffForMagicNumbers(content, content_size, |
683 &have_enough_content, result)) | 792 &have_enough_content, result)) |
684 return true; // We've matched a magic number. No more content needed. | 793 return true; // We've matched a magic number. No more content needed. |
685 | 794 |
686 return have_enough_content; | 795 return have_enough_content; |
687 } | 796 } |
688 | 797 |
689 } // namespace net | 798 } // namespace net |
OLD | NEW |