net/base/mime_sniffer.cc - Issue 649763002: git cl format the second third of the net/base directory

Side by Side Diff: net/base/mime_sniffer.cc

Issue 649763002: git cl format the second third of the net/base directory (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Detecting mime types is a tricky business because we need to balance	5 // Detecting mime types is a tricky business because we need to balance

6 // compatibility concerns with security issues. Here is a survey of how other	6 // compatibility concerns with security issues. Here is a survey of how other

7 // browsers behave and then a description of how we intend to behave.	7 // browsers behave and then a description of how we intend to behave.

8 //	8 //

9 // HTML payload, no Content-Type header:	9 // HTML payload, no Content-Type header:

10 // * IE 7: Render as HTML	10 // * IE 7: Render as HTML

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
109 // to increase this number if you add a longer magic number.	109 // to increase this number if you add a longer magic number.

110 static const size_t kBytesRequiredForMagic = 42;	110 static const size_t kBytesRequiredForMagic = 42;

111	111

112 struct MagicNumber {	112 struct MagicNumber {

113 const char* mime_type;	113 const char* mime_type;

114 const char* magic;	114 const char* magic;

115 size_t magic_len;	115 size_t magic_len;

116 bool is_string;	116 bool is_string;

117 const char* mask; // if set, must have same length as \|magic\|	117 const char* mask; // if set, must have same length as \|magic\|

118 };	118 };

119	119
	davidben 2014/10/10 20:24:16 Okay, the tables in this file are pretty badly mes Okay, the tables in this file are pretty badly messed up. :-) Here's an idea: remove the trailing comma from each of the macros and move them to the array itself. That'll probably unconfuse clang-format. We could maybe also file a bug under the "don't muck about with things you can't parse" category, but I think the comma should be removed from the macros anyway.
120 #define MAGIC_NUMBER(mime_type, magic) \	120 #define MAGIC_NUMBER(mime_type, magic) \

121 { (mime_type), (magic), sizeof(magic)-1, false, NULL },	121 { (mime_type), (magic), sizeof(magic) - 1, false, NULL } \

	122 ,

122	123

123 template <int MagicSize, int MaskSize>	124 template <int MagicSize, int MaskSize>

124 class VerifySizes {	125 class VerifySizes {

125 COMPILE_ASSERT(MagicSize == MaskSize, sizes_must_be_equal);	126 COMPILE_ASSERT(MagicSize == MaskSize, sizes_must_be_equal);

	127

126 public:	128 public:

127 enum { SIZES = MagicSize };	129 enum { SIZES = MagicSize };

128 };	130 };

129	131

130 #define verified_sizeof(magic, mask) \	132 #define verified_sizeof(magic, mask) \

131 VerifySizes<sizeof(magic), sizeof(mask)>::SIZES	133 VerifySizes<sizeof(magic), sizeof(mask)>::SIZES

132	134

133 #define MAGIC_MASK(mime_type, magic, mask) \	135 #define MAGIC_MASK(mime_type, magic, mask) \

134 { (mime_type), (magic), verified_sizeof(magic, mask)-1, false, (mask) },	136 { (mime_type), (magic), verified_sizeof(magic, mask) - 1, false, (mask) } \

	137 ,

135	138

136 // Magic strings are case insensitive and must not include '\0' characters	139 // Magic strings are case insensitive and must not include '\0' characters

137 #define MAGIC_STRING(mime_type, magic) \	140 #define MAGIC_STRING(mime_type, magic) \

138 { (mime_type), (magic), sizeof(magic)-1, true, NULL },	141 { (mime_type), (magic), sizeof(magic) - 1, true, NULL } \

	142 ,

139	143

140 static const MagicNumber kMagicNumbers[] = {	144 static const MagicNumber kMagicNumbers[] = {

141 // Source: HTML 5 specification	145 // Source: HTML 5 specification

142 MAGIC_NUMBER("application/pdf", "%PDF-")	146 MAGIC_NUMBER("application/pdf",

143 MAGIC_NUMBER("application/postscript", "%!PS-Adobe-")	147 "%PDF-") MAGIC_NUMBER("application/postscript", "%!PS-Adobe-")

144 MAGIC_NUMBER("image/gif", "GIF87a")	148 MAGIC_NUMBER("image/gif", "GIF87a") MAGIC_NUMBER("image/gif", "GIF89a")

145 MAGIC_NUMBER("image/gif", "GIF89a")	149 MAGIC_NUMBER("image/png",

146 MAGIC_NUMBER("image/png", "\x89" "PNG\x0D\x0A\x1A\x0A")	150 "\x89"

147 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF")	151 "PNG\x0D\x0A\x1A\x0A")

148 MAGIC_NUMBER("image/bmp", "BM")	152 MAGIC_NUMBER("image/jpeg", "\xFF\xD8\xFF")

149 // Source: Mozilla	153 MAGIC_NUMBER("image/bmp", "BM")

150 MAGIC_NUMBER("text/plain", "#!") // Script	154 // Source: Mozilla

151 MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS	155 MAGIC_NUMBER("text/plain", "#!") // Script

152 MAGIC_NUMBER("text/plain", "From")	156 MAGIC_NUMBER("text/plain", "%!") // Script, similar to PS

153 MAGIC_NUMBER("text/plain", ">From")	157 MAGIC_NUMBER("text/plain", "From") MAGIC_NUMBER("text/plain", ">From")

154 // Chrome specific	158 // Chrome specific

155 MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08")	159 MAGIC_NUMBER("application/x-gzip", "\x1F\x8B\x08")

156 MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46")	160 MAGIC_NUMBER("audio/x-pn-realaudio", "\x2E\x52\x4D\x46") MAGIC_NUMBER(

157 MAGIC_NUMBER("video/x-ms-asf",	161 "video/x-ms-asf",

158 "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C")	162 "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C")

159 MAGIC_NUMBER("image/tiff", "I I")	163 MAGIC_NUMBER("image/tiff", "I I") MAGIC_NUMBER(

160 MAGIC_NUMBER("image/tiff", "II*")	164 "image/tiff",

161 MAGIC_NUMBER("image/tiff", "MM\x00*")	165 "II*") MAGIC_NUMBER("image/tiff",

162 MAGIC_NUMBER("audio/mpeg", "ID3")	166 "MM\x00*") MAGIC_NUMBER("audio/mpeg", "ID3")

163 MAGIC_NUMBER("image/webp", "RIFF....WEBPVP8 ")	167 MAGIC_NUMBER("image/webp", "RIFF....WEBPVP8 ") MAGIC_NUMBER(

164 MAGIC_NUMBER("video/webm", "\x1A\x45\xDF\xA3")	168 "video/webm",

165 // TODO(abarth): we don't handle partial byte matches yet	169 "\x1A\x45\xDF\xA3")

166 // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB")	170 // TODO(abarth): we don't handle partial byte matches yet

167 // MAGIC_NUMBER("audio/mpeg", "\xFF\xE")	171 // MAGIC_NUMBER("video/mpeg", "\x00\x00\x01\xB")

168 // MAGIC_NUMBER("audio/mpeg", "\xFF\xF")	172 // MAGIC_NUMBER("audio/mpeg", "\xFF\xE")

169 MAGIC_NUMBER("application/zip", "PK\x03\x04")	173 // MAGIC_NUMBER("audio/mpeg", "\xFF\xF")

170 MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00")	174 MAGIC_NUMBER("application/zip", "PK\x03\x04")

171 MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A")	175 MAGIC_NUMBER("application/x-rar-compressed", "Rar!\x1A\x07\x00")

172 MAGIC_NUMBER("application/octet-stream", "MZ") // EXE	176 MAGIC_NUMBER("application/x-msmetafile", "\xD7\xCD\xC6\x9A")

173 // Sniffing for Flash:	177 MAGIC_NUMBER("application/octet-stream", "MZ") // EXE

174 //	178 // Sniffing for Flash:

175 // MAGIC_NUMBER("application/x-shockwave-flash", "CWS")	179 //

176 // MAGIC_NUMBER("application/x-shockwave-flash", "FLV")	180 // MAGIC_NUMBER("application/x-shockwave-flash", "CWS")

177 // MAGIC_NUMBER("application/x-shockwave-flash", "FWS")	181 // MAGIC_NUMBER("application/x-shockwave-flash", "FLV")

178 //	182 // MAGIC_NUMBER("application/x-shockwave-flash", "FWS")

179 // Including these magic number for Flash is a trade off.	183 //

180 //	184 // Including these magic number for Flash is a trade off.

181 // Pros:	185 //

182 // * Flash is an important and popular file format	186 // Pros:

183 //	187 // * Flash is an important and popular file format

184 // Cons:	188 //

185 // * These patterns are fairly weak	189 // Cons:

186 // * If we mistakenly decide something is Flash, we will execute it	190 // * These patterns are fairly weak

187 // in the origin of an unsuspecting site. This could be a security	191 // * If we mistakenly decide something is Flash, we will execute it

188 // vulnerability if the site allows users to upload content.	192 // in the origin of an unsuspecting site. This could be a security

189 //	193 // vulnerability if the site allows users to upload content.

190 // On balance, we do not include these patterns.	194 //

	195 // On balance, we do not include these patterns.

191 };	196 };

192	197

193 // The number of content bytes we need to use all our Microsoft Office magic	198 // The number of content bytes we need to use all our Microsoft Office magic

194 // numbers.	199 // numbers.

195 static const size_t kBytesRequiredForOfficeMagic = 8;	200 static const size_t kBytesRequiredForOfficeMagic = 8;

196	201

197 static const MagicNumber kOfficeMagicNumbers[] = {	202 static const MagicNumber kOfficeMagicNumbers[] = {

198 MAGIC_NUMBER("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")	203 MAGIC_NUMBER("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")

199 MAGIC_NUMBER("OOXML", "PK\x03\x04")	204 MAGIC_NUMBER("OOXML", "PK\x03\x04")};

200 };

201	205

202 enum OfficeDocType {	206 enum OfficeDocType {

203 DOC_TYPE_WORD,	207 DOC_TYPE_WORD,

204 DOC_TYPE_EXCEL,	208 DOC_TYPE_EXCEL,

205 DOC_TYPE_POWERPOINT,	209 DOC_TYPE_POWERPOINT,

206 DOC_TYPE_NONE	210 DOC_TYPE_NONE

207 };	211 };

208	212

209 struct OfficeExtensionType {	213 struct OfficeExtensionType {

210 OfficeDocType doc_type;	214 OfficeDocType doc_type;

211 const char* extension;	215 const char* extension;

212 size_t extension_len;	216 size_t extension_len;

213 };	217 };

214	218

215 #define OFFICE_EXTENSION(type, extension) \	219 #define OFFICE_EXTENSION(type, extension) \

216 { (type), (extension), sizeof(extension) - 1 },	220 { (type), (extension), sizeof(extension) - 1 } \

	221 ,

217	222

218 static const OfficeExtensionType kOfficeExtensionTypes[] = {	223 static const OfficeExtensionType kOfficeExtensionTypes[] = {

219 OFFICE_EXTENSION(DOC_TYPE_WORD, ".doc")	224 OFFICE_EXTENSION(DOC_TYPE_WORD, ".doc")

220 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xls")	225 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xls")

221 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".ppt")	226 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".ppt")

222 OFFICE_EXTENSION(DOC_TYPE_WORD, ".docx")	227 OFFICE_EXTENSION(DOC_TYPE_WORD, ".docx")

223 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xlsx")	228 OFFICE_EXTENSION(DOC_TYPE_EXCEL, ".xlsx")

224 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".pptx")	229 OFFICE_EXTENSION(DOC_TYPE_POWERPOINT, ".pptx")};

225 };

226	230

227 static const MagicNumber kExtraMagicNumbers[] = {	231 static const MagicNumber kExtraMagicNumbers[] = {

228 MAGIC_NUMBER("image/x-xbitmap", "#define")	232 MAGIC_NUMBER("image/x-xbitmap", "#define") MAGIC_NUMBER(

229 MAGIC_NUMBER("image/x-icon", "\x00\x00\x01\x00")	233 "image/x-icon",

230 MAGIC_NUMBER("image/svg+xml", "<?xml_version=")	234 "\x00\x00\x01\x00") MAGIC_NUMBER("image/svg+xml", "<?xml_version=")

231 MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ")	235 MAGIC_NUMBER("audio/wav", "RIFF....WAVEfmt ") MAGIC_NUMBER(

232 MAGIC_NUMBER("video/avi", "RIFF....AVI LIST")	236 "video/avi",

233 MAGIC_NUMBER("audio/ogg", "OggS")	237 "RIFF....AVI LIST") MAGIC_NUMBER("audio/ogg", "OggS")

234 MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0")	238 MAGIC_MASK("video/mpeg", "\x00\x00\x01\xB0", "\xFF\xFF\xFF\xF0")

235 MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0")	239 MAGIC_MASK("audio/mpeg", "\xFF\xE0", "\xFF\xE0") MAGIC_NUMBER(

236 MAGIC_NUMBER("video/3gpp", "....ftyp3g")	240 "video/3gpp",

237 MAGIC_NUMBER("video/3gpp", "....ftypavcl")	241 "....ftyp3g") MAGIC_NUMBER("video/3gpp", "....ftypavcl")

238 MAGIC_NUMBER("video/mp4", "....ftyp")	242 MAGIC_NUMBER("video/mp4", "....ftyp")

239 MAGIC_NUMBER("video/quicktime", "....moov")	243 MAGIC_NUMBER("video/quicktime", "....moov")

240 MAGIC_NUMBER("application/x-shockwave-flash", "CWS")	244 MAGIC_NUMBER("application/x-shockwave-flash", "CWS")

241 MAGIC_NUMBER("application/x-shockwave-flash", "FWS")	245 MAGIC_NUMBER("application/x-shockwave-flash",

242 MAGIC_NUMBER("video/x-flv", "FLV")	246 "FWS")

243 MAGIC_NUMBER("audio/x-flac", "fLaC")	247 MAGIC_NUMBER("video/x-flv", "FLV")

	248 MAGIC_NUMBER("audio/x-flac", "fLaC")

244	249

245 // RAW image types.	250 // RAW image types.

246 MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR")	251 MAGIC_NUMBER("image/x-canon-cr2", "II\x2a\x00\x10\x00\x00\x00CR")

247 MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR")	252 MAGIC_NUMBER("image/x-canon-crw", "II\x1a\x00\x00\x00HEAPCCDR")

248 MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM")	253 MAGIC_NUMBER("image/x-minolta-mrw", "\x00MRM")

249 MAGIC_NUMBER("image/x-olympus-orf", "MMOR") // big-endian	254 MAGIC_NUMBER("image/x-olympus-orf", "MMOR") // big-endian

250 MAGIC_NUMBER("image/x-olympus-orf", "IIRO") // little-endian	255 MAGIC_NUMBER("image/x-olympus-orf", "IIRO") // little-endian

251 MAGIC_NUMBER("image/x-olympus-orf", "IIRS") // little-endian	256 MAGIC_NUMBER("image/x-olympus-orf", "IIRS") // little-endian

252 MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW ")	257 MAGIC_NUMBER("image/x-fuji-raf", "FUJIFILMCCD-RAW ")

253 MAGIC_NUMBER("image/x-panasonic-raw",	258 MAGIC_NUMBER("image/x-panasonic-raw",

254 "IIU\x00\x08\x00\x00\x00") // Panasonic .raw	259 "IIU\x00\x08\x00\x00\x00") // Panasonic .raw

255 MAGIC_NUMBER("image/x-panasonic-raw",	260 MAGIC_NUMBER("image/x-panasonic-raw",

256 "IIU\x00\x18\x00\x00\x00") // Panasonic .rw2	261 "IIU\x00\x18\x00\x00\x00") // Panasonic .rw2

257 MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw")	262 MAGIC_NUMBER("image/x-phaseone-raw", "MMMMRaw")

258 MAGIC_NUMBER("image/x-x3f", "FOVb")	263 MAGIC_NUMBER("image/x-x3f", "FOVb")};

259 };

260	264

261 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will	265 // Our HTML sniffer differs slightly from Mozilla. For example, Mozilla will

262 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is	266 // decide that a document that begins "<!DOCTYPE SOAP-ENV:Envelope PUBLIC " is

263 // HTML, but we will not.	267 // HTML, but we will not.

264	268

265 #define MAGIC_HTML_TAG(tag) \	269 #define MAGIC_HTML_TAG(tag) MAGIC_STRING("text/html", "<" tag)

266 MAGIC_STRING("text/html", "<" tag)

267	270

268 static const MagicNumber kSniffableTags[] = {	271 static const MagicNumber kSniffableTags[] = {

269 // XML processing directive. Although this is not an HTML mime type, we sniff	272 // XML processing directive. Although this is not an HTML mime type, we

270 // for this in the HTML phase because text/xml is just as powerful as HTML and	273 // sniff

271 // we want to leverage our white space skipping technology.	274 // for this in the HTML phase because text/xml is just as powerful as HTML

272 MAGIC_NUMBER("text/xml", "<?xml") // Mozilla	275 // and

273 // DOCTYPEs	276 // we want to leverage our white space skipping technology.

274 MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec	277 MAGIC_NUMBER("text/xml", "<?xml") // Mozilla

275 // Sniffable tags, ordered by how often they occur in sniffable documents.	278 // DOCTYPEs

276 MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla	279 MAGIC_HTML_TAG("!DOCTYPE html") // HTML5 spec

277 MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla	280 // Sniffable tags, ordered by how often they occur in sniffable documents.

278 MAGIC_HTML_TAG("!--")	281 MAGIC_HTML_TAG("script") // HTML5 spec, Mozilla

279 MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla	282 MAGIC_HTML_TAG("html") // HTML5 spec, Mozilla

280 MAGIC_HTML_TAG("iframe") // Mozilla	283 MAGIC_HTML_TAG("!--") MAGIC_HTML_TAG("head") // HTML5 spec, Mozilla

281 MAGIC_HTML_TAG("h1") // Mozilla	284 MAGIC_HTML_TAG("iframe") // Mozilla

282 MAGIC_HTML_TAG("div") // Mozilla	285 MAGIC_HTML_TAG("h1") // Mozilla

283 MAGIC_HTML_TAG("font") // Mozilla	286 MAGIC_HTML_TAG("div") // Mozilla

284 MAGIC_HTML_TAG("table") // Mozilla	287 MAGIC_HTML_TAG("font") // Mozilla

285 MAGIC_HTML_TAG("a") // Mozilla	288 MAGIC_HTML_TAG("table") // Mozilla

286 MAGIC_HTML_TAG("style") // Mozilla	289 MAGIC_HTML_TAG("a") // Mozilla

287 MAGIC_HTML_TAG("title") // Mozilla	290 MAGIC_HTML_TAG("style") // Mozilla

288 MAGIC_HTML_TAG("b") // Mozilla	291 MAGIC_HTML_TAG("title") // Mozilla

289 MAGIC_HTML_TAG("body") // Mozilla	292 MAGIC_HTML_TAG("b") // Mozilla

290 MAGIC_HTML_TAG("br")	293 MAGIC_HTML_TAG("body") // Mozilla

291 MAGIC_HTML_TAG("p") // Mozilla	294 MAGIC_HTML_TAG("br") MAGIC_HTML_TAG("p") // Mozilla

292 };	295 };

293	296

294 static base::HistogramBase* UMASnifferHistogramGet(const char* name,	297 static base::HistogramBase* UMASnifferHistogramGet(const char* name,

295 int array_size) {	298 int array_size) {

296 base::HistogramBase* counter =	299 base::HistogramBase* counter = base::LinearHistogram::FactoryGet(

297 base::LinearHistogram::FactoryGet(name, 1, array_size - 1, array_size,	300 name,

298 base::HistogramBase::kUmaTargetedHistogramFlag);	301 1,

	302 array_size - 1,

	303 array_size,

	304 base::HistogramBase::kUmaTargetedHistogramFlag);

299 return counter;	305 return counter;

300 }	306 }

301	307

302 // Compare content header to a magic number where magic_entry can contain '.'	308 // Compare content header to a magic number where magic_entry can contain '.'

303 // for single character of anything, allowing some bytes to be skipped.	309 // for single character of anything, allowing some bytes to be skipped.

304 static bool MagicCmp(const char* magic_entry, const char* content, size_t len) {	310 static bool MagicCmp(const char* magic_entry, const char* content, size_t len) {

305 while (len) {	311 while (len) {

306 if ((magic_entry != '.') && (magic_entry != *content))	312 if ((magic_entry != '.') && (magic_entry != *content))

307 return false;	313 return false;

308 ++magic_entry;	314 ++magic_entry;

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
361 }	367 }

362 }	368 }

363	369

364 if (match) {	370 if (match) {

365 result->assign(magic_entry.mime_type);	371 result->assign(magic_entry.mime_type);

366 return true;	372 return true;

367 }	373 }

368 return false;	374 return false;

369 }	375 }

370	376

371 static bool CheckForMagicNumbers(const char* content, size_t size,	377 static bool CheckForMagicNumbers(const char* content,

372 const MagicNumber* magic, size_t magic_len,	378 size_t size,

	379 const MagicNumber* magic,

	380 size_t magic_len,

373 base::HistogramBase* counter,	381 base::HistogramBase* counter,

374 std::string* result) {	382 std::string* result) {

375 for (size_t i = 0; i < magic_len; ++i) {	383 for (size_t i = 0; i < magic_len; ++i) {

376 if (MatchMagicNumber(content, size, magic[i], result)) {	384 if (MatchMagicNumber(content, size, magic[i], result)) {

377 if (counter) counter->Add(static_cast<int>(i));	385 if (counter)

	386 counter->Add(static_cast<int>(i));

378 return true;	387 return true;

379 }	388 }

380 }	389 }

381 return false;	390 return false;

382 }	391 }

383	392

384 // Truncates \|size\| to \|max_size\| and returns true if \|size\| is at least	393 // Truncates \|size\| to \|max_size\| and returns true if \|size\| is at least

385 // \|max_size\|.	394 // \|max_size\|.

386 static bool TruncateSize(const size_t max_size, size_t* size) {	395 static bool TruncateSize(const size_t max_size, size_t* size) {

387 // Keep kMaxBytesToSniff honest.	396 // Keep kMaxBytesToSniff honest.

(...skipping 23 matching lines...) Expand all Loading...
411 for (pos = content; pos < end; ++pos) {	420 for (pos = content; pos < end; ++pos) {

412 if (!IsAsciiWhitespace(*pos))	421 if (!IsAsciiWhitespace(*pos))

413 break;	422 break;

414 }	423 }

415 static base::HistogramBase* counter(NULL);	424 static base::HistogramBase* counter(NULL);

416 if (!counter) {	425 if (!counter) {

417 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2",	426 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTags2",

418 arraysize(kSniffableTags));	427 arraysize(kSniffableTags));

419 }	428 }

420 // \|pos\| now points to first non-whitespace character (or at end).	429 // \|pos\| now points to first non-whitespace character (or at end).

421 return CheckForMagicNumbers(pos, end - pos,	430 return CheckForMagicNumbers(pos,

422 kSniffableTags, arraysize(kSniffableTags),	431 end - pos,

423 counter, result);	432 kSniffableTags,

	433 arraysize(kSniffableTags),

	434 counter,

	435 result);

424 }	436 }

425	437

426 // Returns true and sets result if the content matches any of kMagicNumbers.	438 // Returns true and sets result if the content matches any of kMagicNumbers.

427 // Clears have_enough_content if more data could possibly change the result.	439 // Clears have_enough_content if more data could possibly change the result.

428 static bool SniffForMagicNumbers(const char* content,	440 static bool SniffForMagicNumbers(const char* content,

429 size_t size,	441 size_t size,

430 bool* have_enough_content,	442 bool* have_enough_content,

431 std::string* result) {	443 std::string* result) {

432 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);	444 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);

433	445

434 // Check our big table of Magic Numbers	446 // Check our big table of Magic Numbers

435 static base::HistogramBase* counter(NULL);	447 static base::HistogramBase* counter(NULL);

436 if (!counter) {	448 if (!counter) {

437 counter = UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2",	449 counter = UMASnifferHistogramGet("mime_sniffer.kMagicNumbers2",

438 arraysize(kMagicNumbers));	450 arraysize(kMagicNumbers));

439 }	451 }

440 return CheckForMagicNumbers(content, size,	452 return CheckForMagicNumbers(

441 kMagicNumbers, arraysize(kMagicNumbers),	453 content, size, kMagicNumbers, arraysize(kMagicNumbers), counter, result);

442 counter, result);

443 }	454 }

444	455

445 // Returns true and sets result if the content matches any of	456 // Returns true and sets result if the content matches any of

446 // kOfficeMagicNumbers, and the URL has the proper extension.	457 // kOfficeMagicNumbers, and the URL has the proper extension.

447 // Clears \|have_enough_content\| if more data could possibly change the result.	458 // Clears \|have_enough_content\| if more data could possibly change the result.

448 static bool SniffForOfficeDocs(const char* content,	459 static bool SniffForOfficeDocs(const char* content,

449 size_t size,	460 size_t size,

450 const GURL& url,	461 const GURL& url,

451 bool* have_enough_content,	462 bool* have_enough_content,

452 std::string* result) {	463 std::string* result) {

453 *have_enough_content &= TruncateSize(kBytesRequiredForOfficeMagic, &size);	464 *have_enough_content &= TruncateSize(kBytesRequiredForOfficeMagic, &size);

454	465

455 // Check our table of magic numbers for Office file types.	466 // Check our table of magic numbers for Office file types.

456 std::string office_version;	467 std::string office_version;

457 if (!CheckForMagicNumbers(content, size,	468 if (!CheckForMagicNumbers(content,

458 kOfficeMagicNumbers, arraysize(kOfficeMagicNumbers),	469 size,

459 NULL, &office_version))	470 kOfficeMagicNumbers,

	471 arraysize(kOfficeMagicNumbers),

	472 NULL,

	473 &office_version))

460 return false;	474 return false;

461	475

462 OfficeDocType type = DOC_TYPE_NONE;	476 OfficeDocType type = DOC_TYPE_NONE;

463 for (size_t i = 0; i < arraysize(kOfficeExtensionTypes); ++i) {	477 for (size_t i = 0; i < arraysize(kOfficeExtensionTypes); ++i) {

464 std::string url_path = url.path();	478 std::string url_path = url.path();

465	479

466 if (url_path.length() < kOfficeExtensionTypes[i].extension_len)	480 if (url_path.length() < kOfficeExtensionTypes[i].extension_len)

467 continue;	481 continue;

468	482

469 const char* extension =	483 const char* extension =

470 &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len];	484 &url_path[url_path.length() - kOfficeExtensionTypes[i].extension_len];

471	485

472 if (0 == base::strncasecmp(extension, kOfficeExtensionTypes[i].extension,	486 if (0 == base::strncasecmp(extension,

	487 kOfficeExtensionTypes[i].extension,

473 kOfficeExtensionTypes[i].extension_len)) {	488 kOfficeExtensionTypes[i].extension_len)) {

474 type = kOfficeExtensionTypes[i].doc_type;	489 type = kOfficeExtensionTypes[i].doc_type;

475 break;	490 break;

476 }	491 }

477 }	492 }

478	493

479 if (type == DOC_TYPE_NONE)	494 if (type == DOC_TYPE_NONE)

480 return false;	495 return false;

481	496

482 if (office_version == "CFB") {	497 if (office_version == "CFB") {

483 switch (type) {	498 switch (type) {

484 case DOC_TYPE_WORD:	499 case DOC_TYPE_WORD:

485 *result = "application/msword";	500 *result = "application/msword";

486 return true;	501 return true;

487 case DOC_TYPE_EXCEL:	502 case DOC_TYPE_EXCEL:

488 *result = "application/vnd.ms-excel";	503 *result = "application/vnd.ms-excel";

489 return true;	504 return true;

490 case DOC_TYPE_POWERPOINT:	505 case DOC_TYPE_POWERPOINT:

491 *result = "application/vnd.ms-powerpoint";	506 *result = "application/vnd.ms-powerpoint";

492 return true;	507 return true;

493 case DOC_TYPE_NONE:	508 case DOC_TYPE_NONE:

494 NOTREACHED();	509 NOTREACHED();

495 return false;	510 return false;

496 }	511 }

497 } else if (office_version == "OOXML") {	512 } else if (office_version == "OOXML") {

498 switch (type) {	513 switch (type) {

499 case DOC_TYPE_WORD:	514 case DOC_TYPE_WORD:

500 *result = "application/vnd.openxmlformats-officedocument."	515 *result =

501 "wordprocessingml.document";	516 "application/vnd.openxmlformats-officedocument."

	517 "wordprocessingml.document";

502 return true;	518 return true;

503 case DOC_TYPE_EXCEL:	519 case DOC_TYPE_EXCEL:

504 *result = "application/vnd.openxmlformats-officedocument."	520 *result =

505 "spreadsheetml.sheet";	521 "application/vnd.openxmlformats-officedocument."

	522 "spreadsheetml.sheet";

506 return true;	523 return true;

507 case DOC_TYPE_POWERPOINT:	524 case DOC_TYPE_POWERPOINT:

508 *result = "application/vnd.openxmlformats-officedocument."	525 *result =

509 "presentationml.presentation";	526 "application/vnd.openxmlformats-officedocument."

	527 "presentationml.presentation";

510 return true;	528 return true;

511 case DOC_TYPE_NONE:	529 case DOC_TYPE_NONE:

512 NOTREACHED();	530 NOTREACHED();

513 return false;	531 return false;

514 }	532 }

515 }	533 }

516	534

517 NOTREACHED();	535 NOTREACHED();

518 return false;	536 return false;

519 }	537 }

520	538

521 static bool IsOfficeType(const std::string& type_hint) {	539 static bool IsOfficeType(const std::string& type_hint) {

522 return (type_hint == "application/msword" \|\|	540 return (type_hint == "application/msword" \|\|

523 type_hint == "application/vnd.ms-excel" \|\|	541 type_hint == "application/vnd.ms-excel" \|\|

524 type_hint == "application/vnd.ms-powerpoint" \|\|	542 type_hint == "application/vnd.ms-powerpoint" \|\|

525 type_hint == "application/vnd.openxmlformats-officedocument."	543 type_hint ==

526 "wordprocessingml.document" \|\|	544 "application/vnd.openxmlformats-officedocument."

527 type_hint == "application/vnd.openxmlformats-officedocument."	545 "wordprocessingml.document" \|\|

528 "spreadsheetml.sheet" \|\|	546 type_hint ==

529 type_hint == "application/vnd.openxmlformats-officedocument."	547 "application/vnd.openxmlformats-officedocument."

530 "presentationml.presentation" \|\|	548 "spreadsheetml.sheet" \|\|

	549 type_hint ==

	550 "application/vnd.openxmlformats-officedocument."

	551 "presentationml.presentation" \|\|

531 type_hint == "application/vnd.ms-excel.sheet.macroenabled.12" \|\|	552 type_hint == "application/vnd.ms-excel.sheet.macroenabled.12" \|\|

532 type_hint == "application/vnd.ms-word.document.macroenabled.12" \|\|	553 type_hint == "application/vnd.ms-word.document.macroenabled.12" \|\|

533 type_hint == "application/vnd.ms-powerpoint.presentation."	554 type_hint ==

534 "macroenabled.12" \|\|	555 "application/vnd.ms-powerpoint.presentation."

	556 "macroenabled.12" \|\|

535 type_hint == "application/mspowerpoint" \|\|	557 type_hint == "application/mspowerpoint" \|\|

536 type_hint == "application/msexcel" \|\|	558 type_hint == "application/msexcel" \|\|

537 type_hint == "application/vnd.ms-word" \|\|	559 type_hint == "application/vnd.ms-word" \|\|

538 type_hint == "application/vnd.ms-word.document.12" \|\|	560 type_hint == "application/vnd.ms-word.document.12" \|\|

539 type_hint == "application/vnd.msword");	561 type_hint == "application/vnd.msword");

540 }	562 }

541	563

542 // This function checks for files that have a Microsoft Office MIME type	564 // This function checks for files that have a Microsoft Office MIME type

543 // set, but are not actually Office files.	565 // set, but are not actually Office files.

544 //	566 //

545 // If this is not actually an Office file, \|*result\| is set to	567 // If this is not actually an Office file, \|*result\| is set to

546 // "application/octet-stream", otherwise it is not modified.	568 // "application/octet-stream", otherwise it is not modified.

547 //	569 //

548 // Returns false if additional data is required to determine the file type, or	570 // Returns false if additional data is required to determine the file type, or

549 // true if there is enough data to make a decision.	571 // true if there is enough data to make a decision.

550 static bool SniffForInvalidOfficeDocs(const char* content,	572 static bool SniffForInvalidOfficeDocs(const char* content,

551 size_t size,	573 size_t size,

552 const GURL& url,	574 const GURL& url,

553 std::string* result) {	575 std::string* result) {

554 if (!TruncateSize(kBytesRequiredForOfficeMagic, &size))	576 if (!TruncateSize(kBytesRequiredForOfficeMagic, &size))

555 return false;	577 return false;

556	578

557 // Check our table of magic numbers for Office file types. If it does not	579 // Check our table of magic numbers for Office file types. If it does not

558 // match one, the MIME type was invalid. Set it instead to a safe value.	580 // match one, the MIME type was invalid. Set it instead to a safe value.

559 std::string office_version;	581 std::string office_version;

560 if (!CheckForMagicNumbers(content, size,	582 if (!CheckForMagicNumbers(content,

561 kOfficeMagicNumbers, arraysize(kOfficeMagicNumbers),	583 size,

562 NULL, &office_version)) {	584 kOfficeMagicNumbers,

	585 arraysize(kOfficeMagicNumbers),

	586 NULL,

	587 &office_version)) {

563 *result = "application/octet-stream";	588 *result = "application/octet-stream";

564 }	589 }

565	590

566 // We have enough information to determine if this was a Microsoft Office	591 // We have enough information to determine if this was a Microsoft Office

567 // document or not, so sniffing is completed.	592 // document or not, so sniffing is completed.

568 return true;	593 return true;

569 }	594 }

570	595

571 // Byte order marks	596 // Byte order marks

572 static const MagicNumber kMagicXML[] = {	597 static const MagicNumber kMagicXML[] = {

573 // We want to be very conservative in interpreting text/xml content as	598 // We want to be very conservative in interpreting text/xml content as

574 // XHTML -- we just want to sniff enough to make unit tests pass.	599 // XHTML -- we just want to sniff enough to make unit tests pass.

575 // So we match explicitly on this, and don't match other ways of writing	600 // So we match explicitly on this, and don't match other ways of writing

576 // it in semantically-equivalent ways.	601 // it in semantically-equivalent ways.

577 MAGIC_STRING("application/xhtml+xml",	602 MAGIC_STRING("application/xhtml+xml",

578 "<html xmlns=\"http://www.w3.org/1999/xhtml\"")	603 "<html xmlns=\"http://www.w3.org/1999/xhtml\"")

579 MAGIC_STRING("application/atom+xml", "<feed")	604 MAGIC_STRING("application/atom+xml", "<feed")

580 MAGIC_STRING("application/rss+xml", "<rss") // UTF-8	605 MAGIC_STRING("application/rss+xml", "<rss") // UTF-8

581 };	606 };

582	607

583 // Returns true and sets result if the content appears to contain XHTML or a	608 // Returns true and sets result if the content appears to contain XHTML or a

584 // feed.	609 // feed.

585 // Clears have_enough_content if more data could possibly change the result.	610 // Clears have_enough_content if more data could possibly change the result.

586 //	611 //

587 // TODO(evanm): this is similar but more conservative than what Safari does,	612 // TODO(evanm): this is similar but more conservative than what Safari does,

588 // while HTML5 has a different recommendation -- what should we do?	613 // while HTML5 has a different recommendation -- what should we do?

589 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset	614 // TODO(evanm): this is incorrect for documents whose encoding isn't a superset

590 // of ASCII -- do we care?	615 // of ASCII -- do we care?

591 static bool SniffXML(const char* content,	616 static bool SniffXML(const char* content,

592 size_t size,	617 size_t size,

593 bool* have_enough_content,	618 bool* have_enough_content,

594 std::string* result) {	619 std::string* result) {

595 // We allow at most 300 bytes of content before we expect the opening tag.	620 // We allow at most 300 bytes of content before we expect the opening tag.

596 *have_enough_content &= TruncateSize(300, &size);	621 *have_enough_content &= TruncateSize(300, &size);

597 const char* pos = content;	622 const char* pos = content;

598 const char* const end = content + size;	623 const char* const end = content + size;

599	624

600 // This loop iterates through tag-looking offsets in the file.	625 // This loop iterates through tag-looking offsets in the file.

601 // We want to skip XML processing instructions (of the form "<?xml ...")	626 // We want to skip XML processing instructions (of the form "<?xml ...")

602 // and stop at the first "plain" tag, then make a decision on the mime-type	627 // and stop at the first "plain" tag, then make a decision on the mime-type

603 // based on the name (or possibly attributes) of that tag.	628 // based on the name (or possibly attributes) of that tag.

604 static base::HistogramBase* counter(NULL);	629 static base::HistogramBase* counter(NULL);

605 if (!counter) {	630 if (!counter) {

606 counter = UMASnifferHistogramGet("mime_sniffer.kMagicXML2",	631 counter =

607 arraysize(kMagicXML));	632 UMASnifferHistogramGet("mime_sniffer.kMagicXML2", arraysize(kMagicXML));

608 }	633 }

609 const int kMaxTagIterations = 5;	634 const int kMaxTagIterations = 5;

610 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {	635 for (int i = 0; i < kMaxTagIterations && pos < end; ++i) {

611 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));	636 pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos));

612 if (!pos)	637 if (!pos)

613 return false;	638 return false;

614	639

615 if ((pos + sizeof("<?xml") - 1 <= end) &&	640 if ((pos + sizeof("<?xml") - 1 <= end) &&

616 (base::strncasecmp(pos, "<?xml", sizeof("<?xml") - 1) == 0)) {	641 (base::strncasecmp(pos, "<?xml", sizeof("<?xml") - 1) == 0)) {

617 // Skip XML declarations.	642 // Skip XML declarations.

618 ++pos;	643 ++pos;

619 continue;	644 continue;

620 } else if ((pos + sizeof("<!DOCTYPE") - 1 <= end) &&	645 } else if ((pos + sizeof("<!DOCTYPE") - 1 <= end) &&

621 (base::strncasecmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE") - 1) ==	646 (base::strncasecmp(pos, "<!DOCTYPE", sizeof("<!DOCTYPE") - 1) ==

622 0)) {	647 0)) {

623 // Skip DOCTYPE declarations.	648 // Skip DOCTYPE declarations.

624 ++pos;	649 ++pos;

625 continue;	650 continue;

626 }	651 }

627	652

628 if (CheckForMagicNumbers(pos, end - pos,	653 if (CheckForMagicNumbers(

629 kMagicXML, arraysize(kMagicXML),	654 pos, end - pos, kMagicXML, arraysize(kMagicXML), counter, result))

630 counter, result))

631 return true;	655 return true;

632	656

633 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult	657 // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult

634 // to identify.	658 // to identify.

635	659

636 // If we get here, we've hit an initial tag that hasn't matched one of the	660 // If we get here, we've hit an initial tag that hasn't matched one of the

637 // above tests. Abort.	661 // above tests. Abort.

638 return true;	662 return true;

639 }	663 }

640	664

641 // We iterated too far without finding a start tag.	665 // We iterated too far without finding a start tag.

642 // If we have more content to look at, we aren't going to change our mind by	666 // If we have more content to look at, we aren't going to change our mind by

643 // seeing more bytes from the network.	667 // seeing more bytes from the network.

644 return pos < end;	668 return pos < end;

645 }	669 }

646	670

647 // Byte order marks	671 // Byte order marks

648 static const MagicNumber kByteOrderMark[] = {	672 static const MagicNumber kByteOrderMark[] = {

649 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE	673 MAGIC_NUMBER("text/plain", "\xFE\xFF") // UTF-16BE

650 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE	674 MAGIC_NUMBER("text/plain", "\xFF\xFE") // UTF-16LE

651 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8	675 MAGIC_NUMBER("text/plain", "\xEF\xBB\xBF") // UTF-8

652 };	676 };

653	677

654 // Whether a given byte looks like it might be part of binary content.	678 // Whether a given byte looks like it might be part of binary content.

655 // Source: HTML5 spec	679 // Source: HTML5 spec

656 static char kByteLooksBinary[] = {	680 static char kByteLooksBinary[] = {

657 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F	681 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, // 0x00 - 0x0F
	davidben 2014/10/10 20:24:16 Is this a clang-format bug? google-c-style.el says Is this a clang-format bug? google-c-style.el says this is two-space. Our existing code seems to mostly do this two-space as well, though not uniformly. I see some four-space in v8. https://code.google.com/p/chromium/codesearch#search/&q=%5C%5B%5C%5D%5C%20=%5...
658 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F	682 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, // 0x10 - 0x1F

659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F	683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2F

660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F	684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F

661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F	685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4F

662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F	686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5F

663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F	687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6F

664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F	688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7F

665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F	689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F

666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F	690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F

667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF	691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF

668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF	692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF

669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF	693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF

670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF	694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF

671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF	695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF

672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF	696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF

673 };	697 };

674	698

675 // Returns true and sets result to "application/octet-stream" if the content	699 // Returns true and sets result to "application/octet-stream" if the content

676 // appears to be binary data. Otherwise, returns false and sets "text/plain".	700 // appears to be binary data. Otherwise, returns false and sets "text/plain".

677 // Clears have_enough_content if more data could possibly change the result.	701 // Clears have_enough_content if more data could possibly change the result.

678 static bool SniffBinary(const char* content,	702 static bool SniffBinary(const char* content,

679 size_t size,	703 size_t size,

680 bool* have_enough_content,	704 bool* have_enough_content,

681 std::string* result) {	705 std::string* result) {

682 // There is no concensus about exactly how to sniff for binary content.	706 // There is no concensus about exactly how to sniff for binary content.

683 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension.	707 // * IE 7: Don't sniff for binary looking bytes, but trust the file extension.

684 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte.	708 // * Firefox 3.5: Sniff first 4096 bytes for a binary looking byte.

685 // Here, we side with FF, but with a smaller buffer. This size was chosen	709 // Here, we side with FF, but with a smaller buffer. This size was chosen

686 // because it is small enough to comfortably fit into a single packet (after	710 // because it is small enough to comfortably fit into a single packet (after

687 // allowing for headers) and yet large enough to account for binary formats	711 // allowing for headers) and yet large enough to account for binary formats

688 // that have a significant amount of ASCII at the beginning (crbug.com/15314).	712 // that have a significant amount of ASCII at the beginning (crbug.com/15314).

689 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size);	713 const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size);

690	714

691 // First, we look for a BOM.	715 // First, we look for a BOM.

692 static base::HistogramBase* counter(NULL);	716 static base::HistogramBase* counter(NULL);

693 if (!counter) {	717 if (!counter) {

694 counter = UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2",	718 counter = UMASnifferHistogramGet("mime_sniffer.kByteOrderMark2",

695 arraysize(kByteOrderMark));	719 arraysize(kByteOrderMark));

696 }	720 }

697 std::string unused;	721 std::string unused;

698 if (CheckForMagicNumbers(content, size,	722 if (CheckForMagicNumbers(content,

699 kByteOrderMark, arraysize(kByteOrderMark),	723 size,

700 counter, &unused)) {	724 kByteOrderMark,

	725 arraysize(kByteOrderMark),

	726 counter,

	727 &unused)) {

701 // If there is BOM, we think the buffer is not binary.	728 // If there is BOM, we think the buffer is not binary.

702 result->assign("text/plain");	729 result->assign("text/plain");

703 return false;	730 return false;

704 }	731 }

705	732

706 // Next we look to see if any of the bytes "look binary."	733 // Next we look to see if any of the bytes "look binary."

707 for (size_t i = 0; i < size; ++i) {	734 for (size_t i = 0; i < size; ++i) {

708 // If we a see a binary-looking byte, we think the content is binary.	735 // If we a see a binary-looking byte, we think the content is binary.

709 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) {	736 if (kByteLooksBinary[static_cast<unsigned char>(content[i])]) {

710 result->assign("application/octet-stream");	737 result->assign("application/octet-stream");

711 return true;	738 return true;

712 }	739 }

713 }	740 }

714	741

715 // No evidence either way. Default to non-binary and, if truncated, clear	742 // No evidence either way. Default to non-binary and, if truncated, clear

716 // have_enough_content because there could be a binary looking byte in the	743 // have_enough_content because there could be a binary looking byte in the

717 // truncated data.	744 // truncated data.

718 *have_enough_content &= is_truncated;	745 *have_enough_content &= is_truncated;

719 result->assign("text/plain");	746 result->assign("text/plain");

720 return false;	747 return false;

721 }	748 }

722	749

723 static bool IsUnknownMimeType(const std::string& mime_type) {	750 static bool IsUnknownMimeType(const std::string& mime_type) {

724 // TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here.	751 // TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here.

725 // If we do, please be careful not to alter the semantics at all.	752 // If we do, please be careful not to alter the semantics at all.

726 static const char* kUnknownMimeTypes[] = {	753 static const char* kUnknownMimeTypes[] = {

727 // Empty mime types are as unknown as they get.	754 // Empty mime types are as unknown as they get.

728 "",	755 "",

729 // The unknown/unknown type is popular and uninformative	756 // The unknown/unknown type is popular and uninformative

730 "unknown/unknown",	757 "unknown/unknown",

731 // The second most popular unknown mime type is application/unknown	758 // The second most popular unknown mime type is application/unknown

732 "application/unknown",	759 "application/unknown",

733 // Firefox rejects a mime type if it is exactly /	760 // Firefox rejects a mime type if it is exactly /

734 "/",	761 "/",

735 };	762 };

736 static base::HistogramBase* counter(NULL);	763 static base::HistogramBase* counter(NULL);

737 if (!counter) {	764 if (!counter) {

738 counter = UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2",	765 counter = UMASnifferHistogramGet("mime_sniffer.kUnknownMimeTypes2",

739 arraysize(kUnknownMimeTypes) + 1);	766 arraysize(kUnknownMimeTypes) + 1);

740 }	767 }

741 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {	768 for (size_t i = 0; i < arraysize(kUnknownMimeTypes); ++i) {

742 if (mime_type == kUnknownMimeTypes[i]) {	769 if (mime_type == kUnknownMimeTypes[i]) {

743 counter->Add(i);	770 counter->Add(i);

744 return true;	771 return true;

(...skipping 21 matching lines...) Expand all Loading...
766 counter = UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3);	793 counter = UMASnifferHistogramGet("mime_sniffer.kSniffCRX", 3);

767	794

768 // Technically, the crx magic number is just Cr24, but the bytes after that	795 // Technically, the crx magic number is just Cr24, but the bytes after that

769 // are a version number which changes infrequently. Including it in the	796 // are a version number which changes infrequently. Including it in the

770 // sniffing gives us less room for error. If the version number ever changes,	797 // sniffing gives us less room for error. If the version number ever changes,

771 // we can just add an entry to this list.	798 // we can just add an entry to this list.

772 //	799 //

773 // TODO(aa): If we ever have another magic number, we'll want to pass a	800 // TODO(aa): If we ever have another magic number, we'll want to pass a

774 // histogram into CheckForMagicNumbers(), below, to see which one matched.	801 // histogram into CheckForMagicNumbers(), below, to see which one matched.

775 static const struct MagicNumber kCRXMagicNumbers[] = {	802 static const struct MagicNumber kCRXMagicNumbers[] = {

776 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")	803 MAGIC_NUMBER("application/x-chrome-extension", "Cr24\x02\x00\x00\x00")};
	davidben 2014/10/10 20:24:16 I feel like the newline between 776 and 777 should I feel like the newline between 776 and 777 should have stayed, but I don't care strongly. I suspect doing the trailing comma thing will fix it though.
777 };

778	804

779 // Only consider files that have the extension ".crx".	805 // Only consider files that have the extension ".crx".

780 static const char kCRXExtension[] = ".crx";	806 static const char kCRXExtension[] = ".crx";

781 // Ignore null by subtracting 1.	807 // Ignore null by subtracting 1.

782 static const int kExtensionLength = arraysize(kCRXExtension) - 1;	808 static const int kExtensionLength = arraysize(kCRXExtension) - 1;

783 if (url.path().rfind(kCRXExtension, std::string::npos, kExtensionLength) ==	809 if (url.path().rfind(kCRXExtension, std::string::npos, kExtensionLength) ==

784 url.path().size() - kExtensionLength) {	810 url.path().size() - kExtensionLength) {

785 counter->Add(1);	811 counter->Add(1);

786 } else {	812 } else {

787 return false;	813 return false;

788 }	814 }

789	815

790 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);	816 *have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size);

791 if (CheckForMagicNumbers(content, size,	817 if (CheckForMagicNumbers(content,

792 kCRXMagicNumbers, arraysize(kCRXMagicNumbers),	818 size,

793 NULL, result)) {	819 kCRXMagicNumbers,

	820 arraysize(kCRXMagicNumbers),

	821 NULL,

	822 result)) {

794 counter->Add(2);	823 counter->Add(2);

795 } else {	824 } else {

796 return false;	825 return false;

797 }	826 }

798	827

799 return true;	828 return true;

800 }	829 }

801	830

802 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {	831 bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {

803 static base::HistogramBase* should_sniff_counter(NULL);	832 static base::HistogramBase* should_sniff_counter(NULL);

804 if (!should_sniff_counter) {	833 if (!should_sniff_counter) {

805 should_sniff_counter =	834 should_sniff_counter =

806 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3);	835 UMASnifferHistogramGet("mime_sniffer.ShouldSniffMimeType2", 3);

807 }	836 }

808 bool sniffable_scheme = url.is_empty() \|\|	837 bool sniffable_scheme = url.is_empty() \|\| url.SchemeIsHTTPOrHTTPS() \|\|

809 url.SchemeIsHTTPOrHTTPS() \|\|

810 url.SchemeIs("ftp") \|\|	838 url.SchemeIs("ftp") \|\|

811 #if defined(OS_ANDROID)	839 #if defined(OS_ANDROID)

812 url.SchemeIs("content") \|\|	840 url.SchemeIs("content") \|\|

813 #endif	841 #endif

814 url.SchemeIsFile() \|\|	842 url.SchemeIsFile() \|\| url.SchemeIsFileSystem();

815 url.SchemeIsFileSystem();

816 if (!sniffable_scheme) {	843 if (!sniffable_scheme) {

817 should_sniff_counter->Add(1);	844 should_sniff_counter->Add(1);

818 return false;	845 return false;

819 }	846 }

820	847

821 static const char* kSniffableTypes[] = {	848 static const char* kSniffableTypes[] = {

822 // Many web servers are misconfigured to send text/plain for many	849 // Many web servers are misconfigured to send text/plain for many

823 // different types of content.	850 // different types of content.

824 "text/plain",	851 "text/plain",

825 // We want to sniff application/octet-stream for	852 // We want to sniff application/octet-stream for

826 // application/x-chrome-extension, but nothing else.	853 // application/x-chrome-extension, but nothing else.

827 "application/octet-stream",	854 "application/octet-stream",

828 // XHTML and Atom/RSS feeds are often served as plain xml instead of	855 // XHTML and Atom/RSS feeds are often served as plain xml instead of

829 // their more specific mime types.	856 // their more specific mime types.

830 "text/xml",	857 "text/xml",

831 "application/xml",	858 "application/xml",

832 // Check for false Microsoft Office MIME types.	859 // Check for false Microsoft Office MIME types.

833 "application/msword",	860 "application/msword",

834 "application/vnd.ms-excel",	861 "application/vnd.ms-excel",

835 "application/vnd.ms-powerpoint",	862 "application/vnd.ms-powerpoint",

836 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",	863 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

837 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",	864 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",

838 "application/vnd.openxmlformats-officedocument.presentationml.presentation",	865 "application/"

839 "application/vnd.ms-excel.sheet.macroenabled.12",	866 "vnd.openxmlformats-officedocument.presentationml.presentation",
	davidben 2014/10/10 20:24:16 This is kind of unfortunate. Worth a clang-format This is kind of unfortunate. Worth a clang-format bug to see if they could indent line 866 some? Alternatively, if the two-space/four-space question above is resolved in favor of two-space, I guess it wouldn't need to wrap this.
840 "application/vnd.ms-word.document.macroenabled.12",	867 "application/vnd.ms-excel.sheet.macroenabled.12",

841 "application/vnd.ms-powerpoint.presentation.macroenabled.12",	868 "application/vnd.ms-word.document.macroenabled.12",

842 "application/mspowerpoint",	869 "application/vnd.ms-powerpoint.presentation.macroenabled.12",

843 "application/msexcel",	870 "application/mspowerpoint",

844 "application/vnd.ms-word",	871 "application/msexcel",

845 "application/vnd.ms-word.document.12",	872 "application/vnd.ms-word",

846 "application/vnd.msword",	873 "application/vnd.ms-word.document.12",

	874 "application/vnd.msword",

847 };	875 };

848 static base::HistogramBase* counter(NULL);	876 static base::HistogramBase* counter(NULL);

849 if (!counter) {	877 if (!counter) {

850 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2",	878 counter = UMASnifferHistogramGet("mime_sniffer.kSniffableTypes2",

851 arraysize(kSniffableTypes) + 1);	879 arraysize(kSniffableTypes) + 1);

852 }	880 }

853 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {	881 for (size_t i = 0; i < arraysize(kSniffableTypes); ++i) {

854 if (mime_type == kSniffableTypes[i]) {	882 if (mime_type == kSniffableTypes[i]) {

855 counter->Add(i);	883 counter->Add(i);

856 should_sniff_counter->Add(2);	884 should_sniff_counter->Add(2);

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
925 // We're not interested in sniffing these types for images and the like.	953 // We're not interested in sniffing these types for images and the like.

926 // Instead, we're looking explicitly for a feed. If we don't find one	954 // Instead, we're looking explicitly for a feed. If we don't find one

927 // we're done and return early.	955 // we're done and return early.

928 if (SniffXML(content, content_size, &have_enough_content, result))	956 if (SniffXML(content, content_size, &have_enough_content, result))

929 return true;	957 return true;

930 return have_enough_content;	958 return have_enough_content;

931 }	959 }

932	960

933 // CRX files (Chrome extensions) have a special sniffing algorithm. It is	961 // CRX files (Chrome extensions) have a special sniffing algorithm. It is

934 // tighter than the others because we don't have to match legacy behavior.	962 // tighter than the others because we don't have to match legacy behavior.

935 if (SniffCRX(content, content_size, url, type_hint,	963 if (SniffCRX(

936 &have_enough_content, result))	964 content, content_size, url, type_hint, &have_enough_content, result))

937 return true;	965 return true;

938	966

939 // Check the file extension and magic numbers to see if this is an Office	967 // Check the file extension and magic numbers to see if this is an Office

940 // document. This needs to be checked before the general magic numbers	968 // document. This needs to be checked before the general magic numbers

941 // because zip files and Office documents (OOXML) have the same magic number.	969 // because zip files and Office documents (OOXML) have the same magic number.

942 if (SniffForOfficeDocs(content, content_size, url,	970 if (SniffForOfficeDocs(

943 &have_enough_content, result))	971 content, content_size, url, &have_enough_content, result))

944 return true; // We've matched a magic number. No more content needed.	972 return true; // We've matched a magic number. No more content needed.

945	973

946 // We're not interested in sniffing for magic numbers when the type_hint	974 // We're not interested in sniffing for magic numbers when the type_hint

947 // is application/octet-stream. Time to bail out.	975 // is application/octet-stream. Time to bail out.

948 if (type_hint == "application/octet-stream")	976 if (type_hint == "application/octet-stream")

949 return have_enough_content;	977 return have_enough_content;

950	978

951 // Now we look in our large table of magic numbers to see if we can find	979 // Now we look in our large table of magic numbers to see if we can find

952 // anything that matches the content.	980 // anything that matches the content.

953 if (SniffForMagicNumbers(content, content_size,	981 if (SniffForMagicNumbers(content, content_size, &have_enough_content, result))

954 &have_enough_content, result))

955 return true; // We've matched a magic number. No more content needed.	982 return true; // We've matched a magic number. No more content needed.

956	983

957 return have_enough_content;	984 return have_enough_content;

958 }	985 }

959	986

960 bool SniffMimeTypeFromLocalData(const char* content,	987 bool SniffMimeTypeFromLocalData(const char* content,

961 size_t size,	988 size_t size,

962 std::string* result) {	989 std::string* result) {

963 // First check the extra table.	990 // First check the extra table.

964 if (CheckForMagicNumbers(content, size, kExtraMagicNumbers,	991 if (CheckForMagicNumbers(content,

965 arraysize(kExtraMagicNumbers), NULL, result))	992 size,

	993 kExtraMagicNumbers,

	994 arraysize(kExtraMagicNumbers),

	995 NULL,

	996 result))

966 return true;	997 return true;

967 // Finally check the original table.	998 // Finally check the original table.

968 return CheckForMagicNumbers(content, size, kMagicNumbers,	999 return CheckForMagicNumbers(

969 arraysize(kMagicNumbers), NULL, result);	1000 content, size, kMagicNumbers, arraysize(kMagicNumbers), NULL, result);

970 }	1001 }

971	1002

972 } // namespace net	1003 } // namespace net

OLD	NEW

« net/base/load_timing_info_test_util.h ('K') | « net/base/load_timing_info_test_util.h ('k') | net/base/mime_sniffer_unittest.cc » ('j') | net/base/mime_sniffer_unittest.cc » ('J')