net/base/mime_sniffer.cc - Issue 266243004: Clang format slam.

Side by Side Diff: net/base/mime_sniffer.cc

Issue 266243004: Clang format slam. Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Detecting mime types is a tricky business because we need to balance	5 // Detecting mime types is a tricky business because we need to balance

6 // compatibility concerns with security issues. Here is a survey of how other	6 // compatibility concerns with security issues. Here is a survey of how other

7 // browsers behave and then a description of how we intend to behave.	7 // browsers behave and then a description of how we intend to behave.

8 //	8 //

9 // HTML payload, no Content-Type header:	9 // HTML payload, no Content-Type header:

10 // * IE 7: Render as HTML	10 // * IE 7: Render as HTML

(...skipping 99 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
110 static const size_t kBytesRequiredForMagic = 42;	110 static const size_t kBytesRequiredForMagic = 42;

111	111

112 struct MagicNumber {	112 struct MagicNumber {

113 const char* mime_type;	113 const char* mime_type;

114 const char* magic;	114 const char* magic;

115 size_t magic_len;	115 size_t magic_len;

116 bool is_string;	116 bool is_string;

117 const char* mask; // if set, must have same length as \|magic\|	117 const char* mask; // if set, must have same length as \|magic\|

118 };	118 };

119	119

120 #define MAGIC_NUMBER(mime_type, magic) \	120 #define MAGIC_NUMBER(mime_type, magic) \

121 { (mime_type), (magic), sizeof(magic)-1, false, NULL },	121 { (mime_type), (magic), sizeof(magic) - 1, false, NULL } \

	122 ,
	mmenke 2014/10/10 18:12:39 Hrm...That comma change is really weird. Hrm...That comma change is really weird.
122	123

123 template <int MagicSize, int MaskSize>	124 template <int MagicSize, int MaskSize>

124 class VerifySizes {	125 class VerifySizes {

125 COMPILE_ASSERT(MagicSize == MaskSize, sizes_must_be_equal);	126 COMPILE_ASSERT(MagicSize == MaskSize, sizes_must_be_equal);

	127

126 public:	128 public:

127 enum { SIZES = MagicSize };	129 enum { SIZES = MagicSize };

128 };	130 };

129	131

130 #define verified_sizeof(magic, mask) \	132 #define verified_sizeof(magic, mask) \

131 VerifySizes<sizeof(magic), sizeof(mask)>::SIZES	133 VerifySizes<sizeof(magic), sizeof(mask)>::SIZES

132	134

133 #define MAGIC_MASK(mime_type, magic, mask) \	135 #define MAGIC_MASK(mime_type, magic, mask) \

134 { (mime_type), (magic), verified_sizeof(magic, mask)-1, false, (mask) },	136 { (mime_type), (magic), verified_sizeof(magic, mask) - 1, false, (mask) } \

	137 ,

135	138

136 // Magic strings are case insensitive and must not include '\0' characters	139 // Magic strings are case insensitive and must not include '\0' characters

137 #define MAGIC_STRING(mime_type, magic) \	140 #define MAGIC_STRING(mime_type, magic) \

138 { (mime_type), (magic), sizeof(magic)-1, true, NULL },	141 { (mime_type), (magic), sizeof(magic) - 1, true, NULL } \

	142 ,

139	143

140 static const MagicNumber kMagicNumbers[] = {	144 static const MagicNumber kMagicNumbers[] = {

141 // Source: HTML 5 specification	145 // Source: HTML 5 specification

142 MAGIC_NUMBER("application/pdf", "%PDF-")	146 MAGIC_NUMBER("application/pdf", "%PDF-")

143 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-")	147 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-")

144 MAGIC_NUMBER("image/gif", "GIF87a")	148 MAGIC_NUMBER("image/gif", "GIF87a") MAGIC_NUMBER("image/gif", "GIF89a")

145 MAGIC_NUMBER("image/gif", "GIF89a")	149 MAGIC_NUMBER("image/png",

146 MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A")	150 "\x89"

147 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF")	151 "PNG\x0D\x0A\x1A\x0A")

148 MAGIC_NUMBER("image/bmp", "BM")	152 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF") MAGIC_NUMBER("image/bmp", "BM")

149 // Source: Mozilla	153 // Source: Mozilla

150 MAGIC_NUMBER("text/plain", "#!") // Script	154 MAGIC_NUMBER("text/plain", "#!") // Script

151 MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS	155 MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS

152 MAGIC_NUMBER("text/plain", "From")	156 MAGIC_NUMBER("text/plain", "From") MAGIC_NUMBER("text/plain", ">From")

153 MAGIC_NUMBER("text/plain", ">From")	157 // Chrome specific

154 // Chrome specific	158 MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08")

155 MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08")	159 MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46") MAGIC_NUMBER(

156 MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46")	160 "video/x-ms-asf",

157 MAGIC_NUMBER("video/x-ms-asf",	161 "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C")

158 "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C")	162 MAGIC_NUMBER("image/tiff", "I I") MAGIC_NUMBER("image/tiff", "II*")

159 MAGIC_NUMBER("image/tiff", "I I")	163 MAGIC_NUMBER("image/tiff", "MM\x00*") MAGIC_NUMBER("audio/mpeg", "ID3")

160 MAGIC_NUMBER("image/tiff", "II*")	164 MAGIC_NUMBER("image/webp", "RIFF....WEBPVP8 ")

161 MAGIC_NUMBER("image/tiff", "MM\x00*")	165 MAGIC_NUMBER("video/webm", "\x1A\x45\xDF\xA3")

162 MAGIC_NUMBER("audio/mpeg", "ID3")	166 // TODO(abarth): we don't handle partial byte matches yet

163 MAGIC_NUMBER("image/webp", "RIFF....WEBPVP8 ")	167 // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB")

164 MAGIC_NUMBER("video/webm", "\x1A\x45\xDF\xA3")	168 // MAGIC_NUMBER("audio/mpeg", "\xFF\xE")

165 // TODO(abarth): we don't handle partial byte matches yet	169 // MAGIC_NUMBER("audio/mpeg", "\xFF\xF")

166 // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB")	170 MAGIC_NUMBER("application/zip", "PK\x03\x04")

167 // MAGIC_NUMBER("audio/mpeg", "\xFF\xE")	171 MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00")

168 // MAGIC_NUMBER("audio/mpeg", "\xFF\xF")	172 MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A")

169 MAGIC_NUMBER("application/zip", "PK\x03\x04")	173 MAGIC_NUMBER("application/octet-stream", "MZ") // EXE

170 MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00")	174 // Sniffing for Flash:

171 MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A")	175 //

172 MAGIC_NUMBER("application/octet-stream", "MZ") // EXE	176 // MAGIC_NUMBER("application/x-shockwave-flash", "CWS")

173 // Sniffing for Flash:	177 // MAGIC_NUMBER("application/x-shockwave-flash", "FLV")

174 //	178 // MAGIC_NUMBER("application/x-shockwave-flash", "FWS")

175 // MAGIC_NUMBER("application/x-shockwave-flash", "CWS")	179 //

176 // MAGIC_NUMBER("application/x-shockwave-flash", "FLV")	180 // Including these magic number for Flash is a trade off.

177 // MAGIC_NUMBER("application/x-shockwave-flash", "FWS")	181 //

178 //	182 // Pros:

179 // Including these magic number for Flash is a trade off.	183 // * Flash is an important and popular file format

180 //	184 //

181 // Pros:	185 // Cons:

182 // * Flash is an important and popular file format	186 // * These patterns are fairly weak

183 //	187 // * If we mistakenly decide something is Flash, we will execute it

184 // Cons:	188 // in the origin of an unsuspecting site. This could be a security

185 // * These patterns are fairly weak	189 // vulnerability if the site allows users to upload content.

186 // * If we mistakenly decide something is Flash, we will execute it	190 //

187 // in the origin of an unsuspecting site. This could be a security	191 // On balance, we do not include these patterns.

188 // vulnerability if the site allows users to upload content.

189 //

190 // On balance, we do not include these patterns.

191 };	192 };

192	193

193 // The number of content bytes we need to use all our Microsoft Office magic	194 // The number of content bytes we need to use all our Microsoft Office magic

194 // numbers.	195 // numbers.

195 static const size_t kBytesRequiredForOfficeMagic = 8;	196 static const size_t kBytesRequiredForOfficeMagic = 8;

196	197

197 static const MagicNumber kOfficeMagicNumbers[] = {	198 static const MagicNumber kOfficeMagicNumbers[] = {

198 MAGIC_NUMBER("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")	199 MAGIC_NUMBER("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")

199 MAGIC_NUMBER("OOXML", "PK\x03\x04")	200 MAGIC_NUMBER("OOXML", "PK\x03\x04")};
	mmenke 2014/10/10 18:12:39 Not putting the close brace on its own line seems Not putting the close brace on its own line seems like a pretty big change.
200 };

201	201

202 enum OfficeDocType {	202 enum OfficeDocType {

203 DOC_TYPE_WORD,	203 DOC_TYPE_WORD,

204 DOC_TYPE_EXCEL,	204 DOC_TYPE_EXCEL,

205 DOC_TYPE_POWERPOINT,	205 DOC_TYPE_POWERPOINT,

206 DOC_TYPE_NONE	206 DOC_TYPE_NONE

207 };	207 };

208	208

209 struct OfficeExtensionType {	209 struct OfficeExtensionType {

210 OfficeDocType doc_type;	210 OfficeDocType doc_type;

211 const char* extension;	211 const char* extension;

212 size_t extension_len;	212 size_t extension_len;

213 };	213 };

214	214

215 #define OFFICE_EXTENSION(type, extension) \	215 #define OFFICE_EXTENSION(type, extension) \

216 { (type), (extension), sizeof(extension) - 1 },	216 { (type), (extension), sizeof(extension) - 1 } \

	217 ,

217	218

218 static const OfficeExtensionType kOfficeExtensionTypes[] = {	219 static const OfficeExtensionType kOfficeExtensionTypes[] = {

219 OFFICE_EXTENSION(DOC_TYPE_WORD, ".doc")	220 OFFICE_EXTENSION(DOC_TYPE_WORD, ".doc")

220 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xls")	221 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xls")

221 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".ppt")	222 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".ppt")

222 OFFICE_EXTENSION(DOC_TYPE_WORD, ".docx")	223 OFFICE_EXTENSION(DOC_TYPE_WORD, ".docx")

223 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xlsx")	224 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xlsx")

224 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".pptx")	225 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".pptx")};

225 };

226	226

227 static const MagicNumber kExtraMagicNumbers[] = {	227 static const MagicNumber kExtraMagicNumbers[] = {

228 MAGIC_NUMBER("image/x-xbitmap", "#define")	228 MAGIC_NUMBER("image/x-xbitmap", "#define")

229 MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00")	229 MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00")

230 MAGIC_NUMBER("image/svg+xml", "<?xml_version=")	230 MAGIC_NUMBER("image/svg+xml", "<?xml_version=")

231 MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ")	231 MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ")

232 MAGIC_NUMBER("video/avi", "RIFF....AVI LIST")	232 MAGIC_NUMBER("video/avi", "RIFF....AVI LIST")

233 MAGIC_NUMBER("audio/ogg", "OggS")	233 MAGIC_NUMBER("audio/ogg", "OggS")

234 MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0")	234 MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0")

235 MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0")	235 MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0")

236 MAGIC_NUMBER("video/3gpp", "....ftyp3g")	236 MAGIC_NUMBER("video/3gpp", "....ftyp3g")

237 MAGIC_NUMBER("video/3gpp", "....ftypavcl")	237 MAGIC_NUMBER("video/3gpp", "....ftypavcl")

238 MAGIC_NUMBER("video/mp4", "....ftyp")	238 MAGIC_NUMBER("video/mp4", "....ftyp")

239 MAGIC_NUMBER("video/quicktime", "....moov")	239 MAGIC_NUMBER("video/quicktime", "....moov")

240 MAGIC_NUMBER("application/x-shockwave-flash", "CWS")	240 MAGIC_NUMBER("application/x-shockwave-flash", "CWS")

241 MAGIC_NUMBER("application/x-shockwave-flash", "FWS")	241 MAGIC_NUMBER("application/x-shockwave-flash", "FWS")

242 MAGIC_NUMBER("video/x-flv", "FLV")	242 MAGIC_NUMBER("video/x-flv", "FLV") MAGIC_NUMBER("audio/x-flac", "fLaC")

243 MAGIC_NUMBER("audio/x-flac", "fLaC")

244	243

245 // RAW image types.	244 // RAW image types.

246 MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR")	245 MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR")

247 MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR")	246 MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR")

248 MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM")	247 MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM")

249 MAGIC_NUMBER("image/x-olympus-orf", "MMOR") // big-endian	248 MAGIC_NUMBER("image/x-olympus-orf", "MMOR") // big-endian

250 MAGIC_NUMBER("image/x-olympus-orf", "IIRO") // little-endian	249 MAGIC_NUMBER("image/x-olympus-orf", "IIRO") // little-endian

251 MAGIC_NUMBER("image/x-olympus-orf", "IIRS") // little-endian	250 MAGIC_NUMBER("image/x-olympus-orf", "IIRS") // little-endian

252 MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW ")	251 MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW ")

253 MAGIC_NUMBER("image/x-panasonic-raw",	252 MAGIC_NUMBER("image/x-panasonic-raw",

254 "IIU\x00\x08\x00\x00\x00") // Panasonic .raw	253 "IIU\x00\x08\x00\x00\x00") // Panasonic .raw

255 MAGIC_NUMBER("image/x-panasonic-raw",	254 MAGIC_NUMBER("image/x-panasonic-raw",

256 "IIU\x00\x18\x00\x00\x00") // Panasonic .rw2	255 "IIU\x00\x18\x00\x00\x00") // Panasonic .rw2

257 MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw")	256 MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw")

258 MAGIC_NUMBER("image/x-x3f", "FOVb")	257 MAGIC_NUMBER("image/x-x3f", "FOVb")};

259 };

260	258

261 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will	259 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will

262 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is	260 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is

263 // HTML, but we will not.	261 // HTML, but we will not.

264	262

265 #define MAGIC_HTML_TAG(tag) \	263 #define MAGIC_HTML_TAG(tag) MAGIC_STRING("text/html", "<" tag)

266 MAGIC_STRING("text/html", "<" tag)

267	264

268 static const MagicNumber kSniffableTags[] = {	265 static const MagicNumber kSniffableTags[] = {

269 // XML processing directive. Although this is not an HTML mime type, we sniff	266 // XML processing directive. Although this is not an HTML mime type, we

270 // for this in the HTML phase because text/xml is just as powerful as HTML and	267 // sniff

271 // we want to leverage our white space skipping technology.	268 // for this in the HTML phase because text/xml is just as powerful as HTML

272 MAGIC_NUMBER("text/xml", "<?xml") // Mozilla	269 // and

273 // DOCTYPEs	270 // we want to leverage our white space skipping technology.

274 MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec	271 MAGIC_NUMBER("text/xml", "<?xml") // Mozilla

275 // Sniffable tags, ordered by how often they occur in sniffable documents.	272 // DOCTYPEs

276 MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla	273 MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec

277 MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla	274 // Sniffable tags, ordered by how often they occur in sniffable documents.

278 MAGIC_HTML_TAG("!--")	275 MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla

279 MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla	276 MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla

280 MAGIC_HTML_TAG("iframe") // Mozilla	277 MAGIC_HTML_TAG("!--") MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla

281 MAGIC_HTML_TAG("h1") // Mozilla	278 MAGIC_HTML_TAG("iframe") // Mozilla

282 MAGIC_HTML_TAG("div") // Mozilla	279 MAGIC_HTML_TAG("h1") // Mozilla

283 MAGIC_HTML_TAG("font") // Mozilla	280 MAGIC_HTML_TAG("div") // Mozilla

284 MAGIC_HTML_TAG("table") // Mozilla	281 MAGIC_HTML_TAG("font") // Mozilla

285 MAGIC_HTML_TAG("a") // Mozilla	282 MAGIC_HTML_TAG("table") // Mozilla

286 MAGIC_HTML_TAG("style") // Mozilla	283 MAGIC_HTML_TAG("a") // Mozilla

287 MAGIC_HTML_TAG("title") // Mozilla	284 MAGIC_HTML_TAG("style") // Mozilla

288 MAGIC_HTML_TAG("b") // Mozilla	285 MAGIC_HTML_TAG("title") // Mozilla

289 MAGIC_HTML_TAG("body") // Mozilla	286 MAGIC_HTML_TAG("b") // Mozilla

290 MAGIC_HTML_TAG("br")	287 MAGIC_HTML_TAG("body") // Mozilla

291 MAGIC_HTML_TAG("p") // Mozilla	288 MAGIC_HTML_TAG("br") MAGIC_HTML_TAG("p") // Mozilla

292 };	289 };

293	290

294 static base::HistogramBase* UMASnifferHistogramGet(const char* name,	291 static base::HistogramBase* UMASnifferHistogramGet(const char* name,

295 int array_size) {	292 int array_size) {

296 base::HistogramBase* counter =	293 base::HistogramBase* counter = base::LinearHistogram::FactoryGet(

297 base::LinearHistogram::FactoryGet(name, 1, array_size - 1, array_size,	294 name,

298 base::HistogramBase::kUmaTargetedHistogramFlag);	295 1,

	296 array_size - 1,

	297 array_size,

	298 base::HistogramBase::kUmaTargetedHistogramFlag);

299 return counter;	299 return counter;

300 }	300 }

301	301

302 // Compare content header to a magic number where magic_entry can contain '.'	302 // Compare content header to a magic number where magic_entry can contain '.'

303 // for single character of anything, allowing some bytes to be skipped.	303 // for single character of anything, allowing some bytes to be skipped.

304 static bool MagicCmp(const char* magic_entry, const char* content, size_t len) {	304 static bool MagicCmp(const char* magic_entry, const char* content, size_t len) {

305 while (len) {	305 while (len) {

306 if ((magic_entry != '.') && (magic_entry != *content))	306 if ((magic_entry != '.') && (magic_entry != *content))

307 return false;	307 return false;

308 ++magic_entry;	308 ++magic_entry;

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
361 }	361 }

362 }	362 }

363	363

364 if (match) {	364 if (match) {

365 result->assign(magic_entry.mime_type);	365 result->assign(magic_entry.mime_type);

366 return true;	366 return true;

367 }	367 }

368 return false;	368 return false;

369 }	369 }

370	370

371 static bool CheckForMagicNumbers(const char* content, size_t size,	371 static bool CheckForMagicNumbers(const char* content,

372 const MagicNumber* magic, size_t magic_len,	372 size_t size,

	373 const MagicNumber* magic,

	374 size_t magic_len,

373 base::HistogramBase* counter,	375 base::HistogramBase* counter,

374 std::string* result) {	376 std::string* result) {

375 for (size_t i = 0; i < magic_len; ++i) {	377 for (size_t i = 0; i < magic_len; ++i) {

376 if (MatchMagicNumber(content, size, magic[i], result)) {	378 if (MatchMagicNumber(content, size, magic[i], result)) {

377 if (counter) counter->Add(static_cast<int>(i));	379 if (counter)

	380 counter->Add(static_cast<int>(i));

378 return true;	381 return true;

379 }	382 }

380 }	383 }

381 return false;	384 return false;

382 }	385 }

383	386

384 // Truncates \|size\| to \|max_size\| and returns true if \|size\| is at least	387 // Truncates \|size\| to \|max_size\| and returns true if \|size\| is at least

385 // \|max_size\|.	388 // \|max_size\|.

386 static bool TruncateSize(const size_t max_size, size_t* size) {	389 static bool TruncateSize(const size_t max_size, size_t* size) {

387 // Keep kMaxBytesToSniff honest.	390 // Keep kMaxBytesToSniff honest.

(...skipping 23 matching lines...) Expand all Loading...
411 for (pos = content; pos < end; ++pos) {	414 for (pos = content; pos < end; ++pos) {

412 if (!IsAsciiWhitespace(*pos))	415 if (!IsAsciiWhitespace(*pos))

413 break;	416 break;

414 }	417 }

415 static base::HistogramBase* counter(NULL);	418 static base::HistogramBase* counter(NULL);

416 if (!counter) {	419 if (!counter) {

417 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2",	420 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2",

418 arraysize(kSniffableTags));	421 arraysize(kSniffableTags));

419 }	422 }

420 // \|pos\| now points to first non-whitespace character (or at end).	423 // \|pos\| now points to first non-whitespace character (or at end).

421 return CheckForMagicNumbers(pos, end - pos,	424 return CheckForMagicNumbers(pos,

422 kSniffableTags, arraysize(kSniffableTags),	425 end - pos,

423 counter, result);	426 kSniffableTags,

	427 arraysize(kSniffableTags),

	428 counter,

	429 result);

424 }	430 }

425	431

426 // Returns true and sets result if the content matches any of kMagicNumbers.	432 // Returns true and sets result if the content matches any of kMagicNumbers.

427 // Clears have_enough_content if more data could possibly change the result.	433 // Clears have_enough_content if more data could possibly change the result.

428 static bool SniffForMagicNumbers(const char* content,	434 static bool SniffForMagicNumbers(const char* content,

429 size_t size,	435 size_t size,

430 bool* have_enough_content,	436 bool* have_enough_content,

431 std::string* result) {	437 std::string* result) {

432 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);	438 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);

433	439

434 // Check our big table of Magic Numbers	440 // Check our big table of Magic Numbers

435 static base::HistogramBase* counter(NULL);	441 static base::HistogramBase* counter(NULL);

436 if (!counter) {	442 if (!counter) {

437 counter = UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2",	443 counter = UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2",

438 arraysize(kMagicNumbers));	444 arraysize(kMagicNumbers));

439 }	445 }

440 return CheckForMagicNumbers(content, size,	446 return CheckForMagicNumbers(

441 kMagicNumbers, arraysize(kMagicNumbers),	447 content, size, kMagicNumbers, arraysize(kMagicNumbers), counter, result);

442 counter, result);

443 }	448 }

444	449

445 // Returns true and sets result if the content matches any of	450 // Returns true and sets result if the content matches any of

446 // kOfficeMagicNumbers, and the URL has the proper extension.	451 // kOfficeMagicNumbers, and the URL has the proper extension.

447 // Clears \|have_enough_content\| if more data could possibly change the result.	452 // Clears \|have_enough_content\| if more data could possibly change the result.

448 static bool SniffForOfficeDocs(const char* content,	453 static bool SniffForOfficeDocs(const char* content,

449 size_t size,	454 size_t size,

450 const GURL& url,	455 const GURL& url,

451 bool* have_enough_content,	456 bool* have_enough_content,

452 std::string* result) {	457 std::string* result) {

453 *have_enough_content &= TruncateSize(kBytesRequiredForOfficeMagic, &size);	458 *have_enough_content &= TruncateSize(kBytesRequiredForOfficeMagic, &size);

454	459

455 // Check our table of magic numbers for Office file types.	460 // Check our table of magic numbers for Office file types.

456 std::string office_version;	461 std::string office_version;

457 if (!CheckForMagicNumbers(content, size,	462 if (!CheckForMagicNumbers(content,

458 kOfficeMagicNumbers, arraysize(kOfficeMagicNumbers),	463 size,

459 NULL, &office_version))	464 kOfficeMagicNumbers,

	465 arraysize(kOfficeMagicNumbers),

	466 NULL,

	467 &office_version))

460 return false;	468 return false;

461	469

462 OfficeDocType type = DOC_TYPE_NONE;	470 OfficeDocType type = DOC_TYPE_NONE;

463 for (size_t i = 0; i < arraysize(kOfficeExtensionTypes); ++i) {	471 for (size_t i = 0; i < arraysize(kOfficeExtensionTypes); ++i) {

464 std::string url_path = url.path();	472 std::string url_path = url.path();

465	473

466 if (url_path.length() < kOfficeExtensionTypes[i].extension_len)	474 if (url_path.length() < kOfficeExtensionTypes[i].extension_len)

467 continue;	475 continue;

468	476

469 const char* extension =	477 const char* extension =

470 &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len];	478 &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len];

471	479

472 if (0 == base::strncasecmp(extension, kOfficeExtensionTypes[i].extension,	480 if (0 == base::strncasecmp(extension,

	481 kOfficeExtensionTypes[i].extension,

473 kOfficeExtensionTypes[i].extension_len)) {	482 kOfficeExtensionTypes[i].extension_len)) {

474 type = kOfficeExtensionTypes[i].doc_type;	483 type = kOfficeExtensionTypes[i].doc_type;

475 break;	484 break;

476 }	485 }

477 }	486 }

478	487

479 if (type == DOC_TYPE_NONE)	488 if (type == DOC_TYPE_NONE)

480 return false;	489 return false;

481	490

482 if (office_version == "CFB") {	491 if (office_version == "CFB") {

483 switch (type) {	492 switch (type) {

484 case DOC_TYPE_WORD:	493 case DOC_TYPE_WORD:

485 *result = "application/msword";	494 *result = "application/msword";

486 return true;	495 return true;

487 case DOC_TYPE_EXCEL:	496 case DOC_TYPE_EXCEL:

488 *result = "application/vnd.ms-excel";	497 *result = "application/vnd.ms-excel";

489 return true;	498 return true;

490 case DOC_TYPE_POWERPOINT:	499 case DOC_TYPE_POWERPOINT:

491 *result = "application/vnd.ms-powerpoint";	500 *result = "application/vnd.ms-powerpoint";

492 return true;	501 return true;

493 case DOC_TYPE_NONE:	502 case DOC_TYPE_NONE:

494 NOTREACHED();	503 NOTREACHED();

495 return false;	504 return false;

496 }	505 }

497 } else if (office_version == "OOXML") {	506 } else if (office_version == "OOXML") {

498 switch (type) {	507 switch (type) {

499 case DOC_TYPE_WORD:	508 case DOC_TYPE_WORD:

500 *result = "application/vnd.openxmlformats-officedocument."	509 *result =

501 "wordprocessingml.document";	510 "application/vnd.openxmlformats-officedocument."

	511 "wordprocessingml.document";

502 return true;	512 return true;

503 case DOC_TYPE_EXCEL:	513 case DOC_TYPE_EXCEL:

504 *result = "application/vnd.openxmlformats-officedocument."	514 *result =

505 "spreadsheetml.sheet";	515 "application/vnd.openxmlformats-officedocument."

	516 "spreadsheetml.sheet";

506 return true;	517 return true;

507 case DOC_TYPE_POWERPOINT:	518 case DOC_TYPE_POWERPOINT:

508 *result = "application/vnd.openxmlformats-officedocument."	519 *result =

509 "presentationml.presentation";	520 "application/vnd.openxmlformats-officedocument."

	521 "presentationml.presentation";

510 return true;	522 return true;

511 case DOC_TYPE_NONE:	523 case DOC_TYPE_NONE:

512 NOTREACHED();	524 NOTREACHED();

513 return false;	525 return false;

514 }	526 }

515 }	527 }

516	528

517 NOTREACHED();	529 NOTREACHED();

518 return false;	530 return false;

519 }	531 }

520	532

521 static bool IsOfficeType(const std::string& type_hint) {	533 static bool IsOfficeType(const std::string& type_hint) {

522 return (type_hint == "application/msword" \|\|	534 return (type_hint == "application/msword" \|\|

523 type_hint == "application/vnd.ms-excel" \|\|	535 type_hint == "application/vnd.ms-excel" \|\|

524 type_hint == "application/vnd.ms-powerpoint" \|\|	536 type_hint == "application/vnd.ms-powerpoint" \|\|

525 type_hint == "application/vnd.openxmlformats-officedocument."	537 type_hint ==

526 "wordprocessingml.document" \|\|	538 "application/vnd.openxmlformats-officedocument."

527 type_hint == "application/vnd.openxmlformats-officedocument."	539 "wordprocessingml.document" \|\|

528 "spreadsheetml.sheet" \|\|	540 type_hint ==

529 type_hint == "application/vnd.openxmlformats-officedocument."	541 "application/vnd.openxmlformats-officedocument."

530 "presentationml.presentation" \|\|	542 "spreadsheetml.sheet" \|\|

	543 type_hint ==

	544 "application/vnd.openxmlformats-officedocument."

	545 "presentationml.presentation" \|\|

531 type_hint == "application/vnd.ms-excel.sheet.macroenabled.12" \|\|	546 type_hint == "application/vnd.ms-excel.sheet.macroenabled.12" \|\|

532 type_hint == "application/vnd.ms-word.document.macroenabled.12" \|\|	547 type_hint == "application/vnd.ms-word.document.macroenabled.12" \|\|

533 type_hint == "application/vnd.ms-powerpoint.presentation."	548 type_hint ==

534 "macroenabled.12" \|\|	549 "application/vnd.ms-powerpoint.presentation."

	550 "macroenabled.12" \|\|

535 type_hint == "application/mspowerpoint" \|\|	551 type_hint == "application/mspowerpoint" \|\|

536 type_hint == "application/msexcel" \|\|	552 type_hint == "application/msexcel" \|\|

537 type_hint == "application/vnd.ms-word" \|\|	553 type_hint == "application/vnd.ms-word" \|\|

538 type_hint == "application/vnd.ms-word.document.12" \|\|	554 type_hint == "application/vnd.ms-word.document.12" \|\|

539 type_hint == "application/vnd.msword");	555 type_hint == "application/vnd.msword");

540 }	556 }

541	557

542 // This function checks for files that have a Microsoft Office MIME type	558 // This function checks for files that have a Microsoft Office MIME type

543 // set, but are not actually Office files.	559 // set, but are not actually Office files.

544 //	560 //

545 // If this is not actually an Office file, \|*result\| is set to	561 // If this is not actually an Office file, \|*result\| is set to

546 // "application/octet-stream", otherwise it is not modified.	562 // "application/octet-stream", otherwise it is not modified.

547 //	563 //

548 // Returns false if additional data is required to determine the file type, or	564 // Returns false if additional data is required to determine the file type, or

549 // true if there is enough data to make a decision.	565 // true if there is enough data to make a decision.

550 static bool SniffForInvalidOfficeDocs(const char* content,	566 static bool SniffForInvalidOfficeDocs(const char* content,

551 size_t size,	567 size_t size,

552 const GURL& url,	568 const GURL& url,

553 std::string* result) {	569 std::string* result) {

554 if (!TruncateSize(kBytesRequiredForOfficeMagic, &size))	570 if (!TruncateSize(kBytesRequiredForOfficeMagic, &size))

555 return false;	571 return false;

556	572

557 // Check our table of magic numbers for Office file types. If it does not	573 // Check our table of magic numbers for Office file types. If it does not

558 // match one, the MIME type was invalid. Set it instead to a safe value.	574 // match one, the MIME type was invalid. Set it instead to a safe value.

559 std::string office_version;	575 std::string office_version;

560 if (!CheckForMagicNumbers(content, size,	576 if (!CheckForMagicNumbers(content,

561 kOfficeMagicNumbers, arraysize(kOfficeMagicNumbers),	577 size,

562 NULL, &office_version)) {	578 kOfficeMagicNumbers,

	579 arraysize(kOfficeMagicNumbers),

	580 NULL,

	581 &office_version)) {

563 *result = "application/octet-stream";	582 *result = "application/octet-stream";

564 }	583 }

565	584

566 // We have enough information to determine if this was a Microsoft Office	585 // We have enough information to determine if this was a Microsoft Office

567 // document or not, so sniffing is completed.	586 // document or not, so sniffing is completed.

568 return true;	587 return true;

569 }	588 }

570	589

571 // Byte order marks	590 // Byte order marks

572 static const MagicNumber kMagicXML[] = {	591 static const MagicNumber kMagicXML[] = {

573 // We want to be very conservative in interpreting text/xml content as	592 // We want to be very conservative in interpreting text/xml content as

574 // XHTML -- we just want to sniff enough to make unit tests pass.	593 // XHTML -- we just want to sniff enough to make unit tests pass.

575 // So we match explicitly on this, and don't match other ways of writing	594 // So we match explicitly on this, and don't match other ways of writing

576 // it in semantically-equivalent ways.	595 // it in semantically-equivalent ways.

577 MAGIC_STRING("application/xhtml+xml",	596 MAGIC_STRING("application/xhtml+xml",

578 "<html xmlns=\"http://www.w3.org/1999/xhtml\"")	597 "<html xmlns=\"http://www.w3.org/1999/xhtml\"")

579 MAGIC_STRING("application/atom+xml", "<feed")	598 MAGIC_STRING("application/atom+xml", "<feed")

580 MAGIC_STRING("application/rss+xml", "<rss") // UTF-8	599 MAGIC_STRING("application/rss+xml", "<rss") // UTF-8

581 };	600 };

582	601

583 // Returns true and sets result if the content appears to contain XHTML or a	602 // Returns true and sets result if the content appears to contain XHTML or a

584 // feed.	603 // feed.

585 // Clears have_enough_content if more data could possibly change the result.	604 // Clears have_enough_content if more data could possibly change the result.

586 //	605 //

587 // TODO(evanm): this is similar but more conservative than what Safari does,	606 // TODO(evanm): this is similar but more conservative than what Safari does,

588 // while HTML5 has a different recommendation -- what should we do?	607 // while HTML5 has a different recommendation -- what should we do?

589 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset	608 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset

590 // of ASCII -- do we care?	609 // of ASCII -- do we care?

591 static bool SniffXML(const char* content,	610 static bool SniffXML(const char* content,

592 size_t size,	611 size_t size,

593 bool* have_enough_content,	612 bool* have_enough_content,

594 std::string* result) {	613 std::string* result) {

595 // We allow at most 300 bytes of content before we expect the opening tag.	614 // We allow at most 300 bytes of content before we expect the opening tag.

596 *have_enough_content &= TruncateSize(300, &size);	615 *have_enough_content &= TruncateSize(300, &size);

597 const char* pos = content;	616 const char* pos = content;

598 const char* const end = content + size;	617 const char* const end = content + size;

599	618

600 // This loop iterates through tag-looking offsets in the file.	619 // This loop iterates through tag-looking offsets in the file.

601 // We want to skip XML processing instructions (of the form "<?xml ...")	620 // We want to skip XML processing instructions (of the form "<?xml ...")

602 // and stop at the first "plain" tag, then make a decision on the mime-type	621 // and stop at the first "plain" tag, then make a decision on the mime-type

603 // based on the name (or possibly attributes) of that tag.	622 // based on the name (or possibly attributes) of that tag.

604 static base::HistogramBase* counter(NULL);	623 static base::HistogramBase* counter(NULL);

605 if (!counter) {	624 if (!counter) {

606 counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2",	625 counter =

607 arraysize(kMagicXML));	626 UMASnifferHistogramGet("mime_sniffer.kMagicXML2", arraysize(kMagicXML));

608 }	627 }

609 const int kMaxTagIterations = 5;	628 const int kMaxTagIterations = 5;

610 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {	629 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {

611 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));	630 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));

612 if (!pos)	631 if (!pos)

613 return false;	632 return false;

614	633

615 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml") - 1) == 0) {	634 if (base::strncasecmp(pos, "<?xml", sizeof("<?xml") - 1) == 0) {

616 // Skip XML declarations.	635 // Skip XML declarations.

617 ++pos;	636 ++pos;

618 continue;	637 continue;

619 } else if (base::strncasecmp(pos, "<!DOCTYPE",	638 } else if (base::strncasecmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE") - 1) ==

620 sizeof("<!DOCTYPE") - 1) == 0) {	639 0) {
	mmenke 2014/10/10 18:12:39 Think this is pretty ugly - I find no extra indent Think this is pretty ugly - I find no extra indent in cases like these, particularly when there's both an == and \|\| in the if body, very confusing.
621 // Skip DOCTYPE declarations.	640 // Skip DOCTYPE declarations.

622 ++pos;	641 ++pos;

623 continue;	642 continue;

624 }	643 }

625	644

626 if (CheckForMagicNumbers(pos, end - pos,	645 if (CheckForMagicNumbers(

627 kMagicXML, arraysize(kMagicXML),	646 pos, end - pos, kMagicXML, arraysize(kMagicXML), counter, result))

628 counter, result))

629 return true;	647 return true;
	mmenke 2014/10/10 18:12:39 This is a style violation - when an if body takes This is a style violation - when an if body takes up multiple lines, there should be braces. Or at least that was the consensus on Chromium dev.
630	648

631 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult	649 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult

632 // to identify.	650 // to identify.

633	651

634 // If we get here, we've hit an initial tag that hasn't matched one of the	652 // If we get here, we've hit an initial tag that hasn't matched one of the

635 // above tests. Abort.	653 // above tests. Abort.

636 return true;	654 return true;

637 }	655 }

638	656

639 // We iterated too far without finding a start tag.	657 // We iterated too far without finding a start tag.

640 // If we have more content to look at, we aren't going to change our mind by	658 // If we have more content to look at, we aren't going to change our mind by

641 // seeing more bytes from the network.	659 // seeing more bytes from the network.

642 return pos < end;	660 return pos < end;

643 }	661 }

644	662

645 // Byte order marks	663 // Byte order marks

646 static const MagicNumber kByteOrderMark[] = {	664 static const MagicNumber kByteOrderMark[] = {

647 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE	665 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE

648 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE	666 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE

649 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8	667 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8

650 };	668 };

651	669

652 // Whether a given byte looks like it might be part of binary content.	670 // Whether a given byte looks like it might be part of binary content.

653 // Source: HTML5 spec	671 // Source: HTML5 spec

654 static char kByteLooksBinary[] = {	672 static char kByteLooksBinary[] = {

655 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F	673 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F

656 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F	674 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F

657 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F	675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F

658 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F	676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F

659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F	677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F

660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F	678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F

661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F	679 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F

662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F	680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F

663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F	681 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F

664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F	682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F

665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF	683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF

666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF	684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF

667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF	685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF

668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF	686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF

669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF	687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF

670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF	688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF

671 };	689 };

672	690

673 // Returns true and sets result to "application/octet-stream" if the content	691 // Returns true and sets result to "application/octet-stream" if the content

674 // appears to be binary data. Otherwise, returns false and sets "text/plain".	692 // appears to be binary data. Otherwise, returns false and sets "text/plain".

675 // Clears have_enough_content if more data could possibly change the result.	693 // Clears have_enough_content if more data could possibly change the result.

676 static bool SniffBinary(const char* content,	694 static bool SniffBinary(const char* content,

677 size_t size,	695 size_t size,

678 bool* have_enough_content,	696 bool* have_enough_content,

679 std::string* result) {	697 std::string* result) {

680 // There is no concensus about exactly how to sniff for binary content.	698 // There is no concensus about exactly how to sniff for binary content.

681 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension.	699 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension.

682 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte.	700 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte.

683 // Here, we side with FF, but with a smaller buffer. This size was chosen	701 // Here, we side with FF, but with a smaller buffer. This size was chosen

684 // because it is small enough to comfortably fit into a single packet (after	702 // because it is small enough to comfortably fit into a single packet (after

685 // allowing for headers) and yet large enough to account for binary formats	703 // allowing for headers) and yet large enough to account for binary formats

686 // that have a significant amount of ASCII at the beginning (crbug.com/15314).	704 // that have a significant amount of ASCII at the beginning (crbug.com/15314).

687 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size);	705 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size);

688	706

689 // First, we look for a BOM.	707 // First, we look for a BOM.

690 static base::HistogramBase* counter(NULL);	708 static base::HistogramBase* counter(NULL);

691 if (!counter) {	709 if (!counter) {

692 counter = UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2",	710 counter = UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2",

693 arraysize(kByteOrderMark));	711 arraysize(kByteOrderMark));

694 }	712 }

695 std::string unused;	713 std::string unused;

696 if (CheckForMagicNumbers(content, size,	714 if (CheckForMagicNumbers(content,

697 kByteOrderMark, arraysize(kByteOrderMark),	715 size,

698 counter, &unused)) {	716 kByteOrderMark,

	717 arraysize(kByteOrderMark),

	718 counter,

	719 &unused)) {

699 // If there is BOM, we think the buffer is not binary.	720 // If there is BOM, we think the buffer is not binary.

700 result->assign("text/plain");	721 result->assign("text/plain");

701 return false;	722 return false;

702 }	723 }

703	724

704 // Next we look to see if any of the bytes "look binary."	725 // Next we look to see if any of the bytes "look binary."

705 for (size_t i = 0; i < size; ++i) {	726 for (size_t i = 0; i < size; ++i) {

706 // If we a see a binary-looking byte, we think the content is binary.	727 // If we a see a binary-looking byte, we think the content is binary.

707 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) {	728 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) {

708 result->assign("application/octet-stream");	729 result->assign("application/octet-stream");

709 return true;	730 return true;

710 }	731 }

711 }	732 }

712	733

713 // No evidence either way. Default to non-binary and, if truncated, clear	734 // No evidence either way. Default to non-binary and, if truncated, clear

714 // have_enough_content because there could be a binary looking byte in the	735 // have_enough_content because there could be a binary looking byte in the

715 // truncated data.	736 // truncated data.

716 *have_enough_content &= is_truncated;	737 *have_enough_content &= is_truncated;

717 result->assign("text/plain");	738 result->assign("text/plain");

718 return false;	739 return false;

719 }	740 }

720	741

721 static bool IsUnknownMimeType(const std::string& mime_type) {	742 static bool IsUnknownMimeType(const std::string& mime_type) {

722 // TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here.	743 // TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here.

723 // If we do, please be careful not to alter the semantics at all.	744 // If we do, please be careful not to alter the semantics at all.

724 static const char* kUnknownMimeTypes[] = {	745 static const char* kUnknownMimeTypes[] = {

725 // Empty mime types are as unknown as they get.	746 // Empty mime types are as unknown as they get.

726 "",	747 "",

727 // The unknown/unknown type is popular and uninformative	748 // The unknown/unknown type is popular and uninformative

728 "unknown/unknown",	749 "unknown/unknown",

729 // The second most popular unknown mime type is application/unknown	750 // The second most popular unknown mime type is application/unknown

730 "application/unknown",	751 "application/unknown",

731 // Firefox rejects a mime type if it is exactly /	752 // Firefox rejects a mime type if it is exactly /

732 "/",	753 "/",

733 };	754 };

734 static base::HistogramBase* counter(NULL);	755 static base::HistogramBase* counter(NULL);

735 if (!counter) {	756 if (!counter) {

736 counter = UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2",	757 counter = UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2",

737 arraysize(kUnknownMimeTypes) + 1);	758 arraysize(kUnknownMimeTypes) + 1);

738 }	759 }

739 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {	760 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {

740 if (mime_type == kUnknownMimeTypes[i]) {	761 if (mime_type == kUnknownMimeTypes[i]) {

741 counter->Add(i);	762 counter->Add(i);

742 return true;	763 return true;

(...skipping 21 matching lines...) Expand all Loading...
764 counter = UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3);	785 counter = UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3);

765	786

766 // Technically, the crx magic number is just Cr24, but the bytes after that	787 // Technically, the crx magic number is just Cr24, but the bytes after that

767 // are a version number which changes infrequently. Including it in the	788 // are a version number which changes infrequently. Including it in the

768 // sniffing gives us less room for error. If the version number ever changes,	789 // sniffing gives us less room for error. If the version number ever changes,

769 // we can just add an entry to this list.	790 // we can just add an entry to this list.

770 //	791 //

771 // TODO(aa): If we ever have another magic number, we'll want to pass a	792 // TODO(aa): If we ever have another magic number, we'll want to pass a

772 // histogram into CheckForMagicNumbers(), below, to see which one matched.	793 // histogram into CheckForMagicNumbers(), below, to see which one matched.

773 static const struct MagicNumber kCRXMagicNumbers[] = {	794 static const struct MagicNumber kCRXMagicNumbers[] = {

774 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")	795 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")};

775 };

776	796

777 // Only consider files that have the extension ".crx".	797 // Only consider files that have the extension ".crx".

778 static const char kCRXExtension[] = ".crx";	798 static const char kCRXExtension[] = ".crx";

779 // Ignore null by subtracting 1.	799 // Ignore null by subtracting 1.

780 static const int kExtensionLength = arraysize(kCRXExtension) - 1;	800 static const int kExtensionLength = arraysize(kCRXExtension) - 1;

781 if (url.path().rfind(kCRXExtension, std::string::npos, kExtensionLength) ==	801 if (url.path().rfind(kCRXExtension, std::string::npos, kExtensionLength) ==

782 url.path().size() - kExtensionLength) {	802 url.path().size() - kExtensionLength) {

783 counter->Add(1);	803 counter->Add(1);

784 } else {	804 } else {

785 return false;	805 return false;

786 }	806 }

787	807

788 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);	808 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);

789 if (CheckForMagicNumbers(content, size,	809 if (CheckForMagicNumbers(content,

790 kCRXMagicNumbers, arraysize(kCRXMagicNumbers),	810 size,

791 NULL, result)) {	811 kCRXMagicNumbers,

	812 arraysize(kCRXMagicNumbers),

	813 NULL,

	814 result)) {

792 counter->Add(2);	815 counter->Add(2);

793 } else {	816 } else {

794 return false;	817 return false;

795 }	818 }

796	819

797 return true;	820 return true;

798 }	821 }

799	822

800 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {	823 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {

801 static base::HistogramBase* should_sniff_counter(NULL);	824 static base::HistogramBase* should_sniff_counter(NULL);

802 if (!should_sniff_counter) {	825 if (!should_sniff_counter) {

803 should_sniff_counter =	826 should_sniff_counter =

804 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3);	827 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3);

805 }	828 }

806 bool sniffable_scheme = url.is_empty() \|\|	829 bool sniffable_scheme = url.is_empty() \|\| url.SchemeIsHTTPOrHTTPS() \|\|

807 url.SchemeIsHTTPOrHTTPS() \|\|

808 url.SchemeIs("ftp") \|\|	830 url.SchemeIs("ftp") \|\|

809 #if defined(OS_ANDROID)	831 #if defined(OS_ANDROID)

810 url.SchemeIs("content") \|\|	832 url.SchemeIs("content") \|\|

811 #endif	833 #endif

812 url.SchemeIsFile() \|\|	834 url.SchemeIsFile() \|\| url.SchemeIsFileSystem();

813 url.SchemeIsFileSystem();

814 if (!sniffable_scheme) {	835 if (!sniffable_scheme) {

815 should_sniff_counter->Add(1);	836 should_sniff_counter->Add(1);

816 return false;	837 return false;

817 }	838 }

818	839

819 static const char* kSniffableTypes[] = {	840 static const char*

820 // Many web servers are misconfigured to send text/plain for many	841 kSniffableTypes

821 // different types of content.	842 [] = {// Many web servers are misconfigured to send text/plain for
	mmenke 2014/10/10 18:12:39 Just no. Just no.
822 "text/plain",	843 // many

823 // We want to sniff application/octet-stream for	844 // different types of content.

824 // application/x-chrome-extension, but nothing else.	845 "text/plain",

825 "application/octet-stream",	846 // We want to sniff application/octet-stream for

826 // XHTML and Atom/RSS feeds are often served as plain xml instead of	847 // application/x-chrome-extension, but nothing else.

827 // their more specific mime types.	848 "application/octet-stream",

828 "text/xml",	849 // XHTML and Atom/RSS feeds are often served as plain xml

829 "application/xml",	850 // instead of

830 // Check for false Microsoft Office MIME types.	851 // their more specific mime types.

831 "application/msword",	852 "text/xml",

832 "application/vnd.ms-excel",	853 "application/xml",

833 "application/vnd.ms-powerpoint",	854 // Check for false Microsoft Office MIME types.

834 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",	855 "application/msword",

835 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",	856 "application/vnd.ms-excel",

836 "application/vnd.openxmlformats-officedocument.presentationml.presentation",	857 "application/vnd.ms-powerpoint",

837 "application/vnd.ms-excel.sheet.macroenabled.12",	858 "application/"

838 "application/vnd.ms-word.document.macroenabled.12",	859 "vnd.openxmlformats-officedocument.wordprocessingml.document",

839 "application/vnd.ms-powerpoint.presentation.macroenabled.12",	860 "application/"

840 "application/mspowerpoint",	861 "vnd.openxmlformats-officedocument.spreadsheetml.sheet",

841 "application/msexcel",	862 "application/"

842 "application/vnd.ms-word",	863 "vnd.openxmlformats-officedocument.presentationml.presentation",

843 "application/vnd.ms-word.document.12",	864 "application/vnd.ms-excel.sheet.macroenabled.12",

844 "application/vnd.msword",	865 "application/vnd.ms-word.document.macroenabled.12",

845 };	866 "application/vnd.ms-powerpoint.presentation.macroenabled.12",

	867 "application/mspowerpoint",

	868 "application/msexcel",

	869 "application/vnd.ms-word",

	870 "application/vnd.ms-word.document.12",

	871 "application/vnd.msword",

	872 };

846 static base::HistogramBase* counter(NULL);	873 static base::HistogramBase* counter(NULL);

847 if (!counter) {	874 if (!counter) {

848 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2",	875 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2",

849 arraysize(kSniffableTypes) + 1);	876 arraysize(kSniffableTypes) + 1);

850 }	877 }

851 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {	878 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {

852 if (mime_type == kSniffableTypes[i]) {	879 if (mime_type == kSniffableTypes[i]) {

853 counter->Add(i);	880 counter->Add(i);

854 should_sniff_counter->Add(2);	881 should_sniff_counter->Add(2);

855 return true;	882 return true;

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
923 // We're not interested in sniffing these types for images and the like.	950 // We're not interested in sniffing these types for images and the like.

924 // Instead, we're looking explicitly for a feed. If we don't find one	951 // Instead, we're looking explicitly for a feed. If we don't find one

925 // we're done and return early.	952 // we're done and return early.

926 if (SniffXML(content, content_size, &have_enough_content, result))	953 if (SniffXML(content, content_size, &have_enough_content, result))

927 return true;	954 return true;

928 return have_enough_content;	955 return have_enough_content;

929 }	956 }

930	957

931 // CRX files (Chrome extensions) have a special sniffing algorithm. It is	958 // CRX files (Chrome extensions) have a special sniffing algorithm. It is

932 // tighter than the others because we don't have to match legacy behavior.	959 // tighter than the others because we don't have to match legacy behavior.

933 if (SniffCRX(content, content_size, url, type_hint,	960 if (SniffCRX(

934 &have_enough_content, result))	961 content, content_size, url, type_hint, &have_enough_content, result))

935 return true;	962 return true;

936	963

937 // Check the file extension and magic numbers to see if this is an Office	964 // Check the file extension and magic numbers to see if this is an Office

938 // document. This needs to be checked before the general magic numbers	965 // document. This needs to be checked before the general magic numbers

939 // because zip files and Office documents (OOXML) have the same magic number.	966 // because zip files and Office documents (OOXML) have the same magic number.

940 if (SniffForOfficeDocs(content, content_size, url,	967 if (SniffForOfficeDocs(

941 &have_enough_content, result))	968 content, content_size, url, &have_enough_content, result))

942 return true; // We've matched a magic number. No more content needed.	969 return true; // We've matched a magic number. No more content needed.

943	970

944 // We're not interested in sniffing for magic numbers when the type_hint	971 // We're not interested in sniffing for magic numbers when the type_hint

945 // is application/octet-stream. Time to bail out.	972 // is application/octet-stream. Time to bail out.

946 if (type_hint == "application/octet-stream")	973 if (type_hint == "application/octet-stream")

947 return have_enough_content;	974 return have_enough_content;

948	975

949 // Now we look in our large table of magic numbers to see if we can find	976 // Now we look in our large table of magic numbers to see if we can find

950 // anything that matches the content.	977 // anything that matches the content.

951 if (SniffForMagicNumbers(content, content_size,	978 if (SniffForMagicNumbers(content, content_size, &have_enough_content, result))

952 &have_enough_content, result))

953 return true; // We've matched a magic number. No more content needed.	979 return true; // We've matched a magic number. No more content needed.

954	980

955 return have_enough_content;	981 return have_enough_content;

956 }	982 }

957	983

958 bool SniffMimeTypeFromLocalData(const char* content,	984 bool SniffMimeTypeFromLocalData(const char* content,

959 size_t size,	985 size_t size,

960 std::string* result) {	986 std::string* result) {

961 // First check the extra table.	987 // First check the extra table.

962 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers,	988 if (CheckForMagicNumbers(content,

963 arraysize(kExtraMagicNumbers), NULL, result))	989 size,

	990 kExtraMagicNumbers,

	991 arraysize(kExtraMagicNumbers),

	992 NULL,

	993 result))

964 return true;	994 return true;

965 // Finally check the original table.	995 // Finally check the original table.

966 return CheckForMagicNumbers(content, size, kMagicNumbers,	996 return CheckForMagicNumbers(

967 arraysize(kMagicNumbers), NULL, result);	997 content, size, kMagicNumbers, arraysize(kMagicNumbers), NULL, result);

968 }	998 }

969	999

970 } // namespace net	1000 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/mime_sniffer.h ('k') | net/base/mime_sniffer_unittest.cc » ('j') | net/base/mime_sniffer_unittest.cc » ('J')