third_party/WebKit/Source/platform/mhtml/MHTMLParser.cpp - Issue 2199493002: libFuzzer for blink::MHTMLParser

Side by Side Diff: third_party/WebKit/Source/platform/mhtml/MHTMLParser.cpp

Issue 2199493002: libFuzzer for blink::MHTMLParser (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Don't call base::i18n::InitializeICU during setup of regular unit tests. Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« third_party/WebKit/Source/platform/mhtml/MHTMLFuzzer.cpp ('K') | « third_party/WebKit/Source/platform/mhtml/MHTMLFuzzer.cpp ('k') | third_party/WebKit/Source/platform/network/ParsedContentType.cpp » ('j') | third_party/WebKit/Source/platform/testing/TestingPlatformSupport.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2011 Google Inc. All rights reserved.	2 * Copyright (C) 2011 Google Inc. All rights reserved.

3 *	3 *

4 * Redistribution and use in source and binary forms, with or without	4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions are	5 * modification, are permitted provided that the following conditions are

6 * met:	6 * met:

7 *	7 *

8 * * Redistributions of source code must retain the above copyright	8 * * Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	9 * notice, this list of conditions and the following disclaimer.

10 * * Redistributions in binary form must reproduce the above	10 * * Redistributions in binary form must reproduce the above

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
96 static KeyValueMap retrieveKeyValuePairs(SharedBufferChunkReader* buffer)	96 static KeyValueMap retrieveKeyValuePairs(SharedBufferChunkReader* buffer)

97 {	97 {

98 KeyValueMap keyValuePairs;	98 KeyValueMap keyValuePairs;

99 String line;	99 String line;

100 String key;	100 String key;

101 StringBuilder value;	101 StringBuilder value;

102 while (!(line = buffer->nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {	102 while (!(line = buffer->nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {

103 if (line.isEmpty())	103 if (line.isEmpty())

104 break; // Empty line means end of key/value section.	104 break; // Empty line means end of key/value section.

105 if (line[0] == '\t') {	105 if (line[0] == '\t') {

106 ASSERT(!key.isEmpty());

107 value.append(line.substring(1));	106 value.append(line.substring(1));

108 continue;	107 continue;

109 }	108 }

110 // New key/value, store the previous one if any.	109 // New key/value, store the previous one if any.

111 if (!key.isEmpty()) {	110 if (!key.isEmpty()) {

112 if (keyValuePairs.find(key) != keyValuePairs.end())	111 if (keyValuePairs.find(key) != keyValuePairs.end())

113 DLOG(ERROR) << "Key duplicate found in MIME header. Key is '" << key << "', previous value replaced.";	112 DVLOG(1) << "Key duplicate found in MIME header. Key is '" << ke y << "', previous value replaced.";
	esprehn 2016/08/04 04:37:53 Why did you need to change these? Why did you need to change these? Łukasz Anforowicz 2016/08/04 17:37:30 To make debugging of libFuzzer functionality easie Show quoted text On 2016/08/04 04:37:53, esprehn wrote: > Why did you need to change these? To make debugging of libFuzzer functionality easier - without this change, the output from libFuzzer is drowned in the output from the DLOG(ERROR) statements. I think this is an okay change to make: 1) these were debug-only logging statements, so the change has no impact on the product 2) dev impact is minimal - it is easy to selectively turn these logging statements back on. hmmm... I thought that something like --vmodule=MHTMLFuzzer=1 would work, but it doesn't (I've never used this for Blink side of things - not sure how logging infrastructure extracts the name of a module in case of blink...). WDYT? I can turn these back into DLOG(ERROR)-s if you feel strongly about it.
114 keyValuePairs.add(key, value.toString().stripWhiteSpace());	113 keyValuePairs.add(key, value.toString().stripWhiteSpace());

115 key = String();	114 key = String();

116 value.clear();	115 value.clear();

117 }	116 }

118 size_t semiColonIndex = line.find(':');	117 size_t semiColonIndex = line.find(':');

119 if (semiColonIndex == kNotFound) {	118 if (semiColonIndex == kNotFound) {

120 // This is not a key value pair, ignore.	119 // This is not a key value pair, ignore.

121 continue;	120 continue;

122 }	121 }

123 key = line.substring(0, semiColonIndex).lower().stripWhiteSpace();	122 key = line.substring(0, semiColonIndex).lower().stripWhiteSpace();

(...skipping 12 matching lines...) Expand all Loading...
136 KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-t ype");	135 KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-t ype");

137 if (mimeParametersIterator != keyValuePairs.end()) {	136 if (mimeParametersIterator != keyValuePairs.end()) {

138 ParsedContentType parsedContentType(mimeParametersIterator->value);	137 ParsedContentType parsedContentType(mimeParametersIterator->value);

139 mimeHeader->m_contentType = parsedContentType.mimeType();	138 mimeHeader->m_contentType = parsedContentType.mimeType();

140 if (!mimeHeader->isMultipart()) {	139 if (!mimeHeader->isMultipart()) {

141 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace( );	140 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace( );

142 } else {	141 } else {

143 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam e("type");	142 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam e("type");

144 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo rName("boundary");	143 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo rName("boundary");

145 if (mimeHeader->m_endOfPartBoundary.isNull()) {	144 if (mimeHeader->m_endOfPartBoundary.isNull()) {

146 DLOG(ERROR) << "No boundary found in multipart MIME header.";	145 DVLOG(1) << "No boundary found in multipart MIME header.";

147 return nullptr;	146 return nullptr;

148 }	147 }

149 mimeHeader->m_endOfPartBoundary.insert("--", 0);	148 mimeHeader->m_endOfPartBoundary.insert("--", 0);

150 mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundar y;	149 mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundar y;

151 mimeHeader->m_endOfDocumentBoundary.append("--");	150 mimeHeader->m_endOfDocumentBoundary.append("--");

152 }	151 }

153 }	152 }

154	153

155 mimeParametersIterator = keyValuePairs.find("content-transfer-encoding");	154 mimeParametersIterator = keyValuePairs.find("content-transfer-encoding");

156 if (mimeParametersIterator != keyValuePairs.end())	155 if (mimeParametersIterator != keyValuePairs.end())

(...skipping 17 matching lines...) Expand all Loading...
174 if (encoding == "base64")	173 if (encoding == "base64")

175 return Base64;	174 return Base64;

176 if (encoding == "quoted-printable")	175 if (encoding == "quoted-printable")

177 return QuotedPrintable;	176 return QuotedPrintable;

178 if (encoding == "8bit")	177 if (encoding == "8bit")

179 return EightBit;	178 return EightBit;

180 if (encoding == "7bit")	179 if (encoding == "7bit")

181 return SevenBit;	180 return SevenBit;

182 if (encoding == "binary")	181 if (encoding == "binary")

183 return Binary;	182 return Binary;

184 DLOG(ERROR) << "Unknown encoding '" << text << "' found in MIME header.";	183 DVLOG(1) << "Unknown encoding '" << text << "' found in MIME header.";

185 return Unknown;	184 return Unknown;

186 }	185 }

187	186

188 MIMEHeader::MIMEHeader()	187 MIMEHeader::MIMEHeader()

189 : m_contentTransferEncoding(Unknown)	188 : m_contentTransferEncoding(Unknown)

190 {	189 {

191 }	190 }

192	191

193 static bool skipLinesUntilBoundaryFound(SharedBufferChunkReader& lineReader, con st String& boundary)	192 static bool skipLinesUntilBoundaryFound(SharedBufferChunkReader& lineReader, con st String& boundary)

194 {	193 {

(...skipping 15 matching lines...) Expand all Loading...
210 MIMEHeader* header = MIMEHeader::parseHeader(&m_lineReader);	209 MIMEHeader* header = MIMEHeader::parseHeader(&m_lineReader);

211 HeapVector<Member<ArchiveResource>> resources;	210 HeapVector<Member<ArchiveResource>> resources;

212 if (!parseArchiveWithHeader(header, resources))	211 if (!parseArchiveWithHeader(header, resources))

213 resources.clear();	212 resources.clear();

214 return resources;	213 return resources;

215 }	214 }

216	215

217 bool MHTMLParser::parseArchiveWithHeader(MIMEHeader* header, HeapVector<Member<A rchiveResource>>& resources)	216 bool MHTMLParser::parseArchiveWithHeader(MIMEHeader* header, HeapVector<Member<A rchiveResource>>& resources)

218 {	217 {

219 if (!header) {	218 if (!header) {

220 DLOG(ERROR) << "Failed to parse MHTML part: no header.";	219 DVLOG(1) << "Failed to parse MHTML part: no header.";

221 return false;	220 return false;

222 }	221 }

223	222

224 if (!header->isMultipart()) {	223 if (!header->isMultipart()) {

225 // With IE a page with no resource is not multi-part.	224 // With IE a page with no resource is not multi-part.

226 bool endOfArchiveReached = false;	225 bool endOfArchiveReached = false;

227 ArchiveResource* resource = parseNextPart(*header, String(), String(), e ndOfArchiveReached);	226 ArchiveResource* resource = parseNextPart(*header, String(), String(), e ndOfArchiveReached);

228 if (!resource)	227 if (!resource)

229 return false;	228 return false;

230 resources.append(resource);	229 resources.append(resource);

231 return true;	230 return true;

232 }	231 }

233	232

234 // Skip the message content (it's a generic browser specific message).	233 // Skip the message content (it's a generic browser specific message).

235 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());	234 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());

236	235

237 bool endOfArchive = false;	236 bool endOfArchive = false;

238 while (!endOfArchive) {	237 while (!endOfArchive) {

239 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader);	238 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader);

240 if (!resourceHeader) {	239 if (!resourceHeader) {

241 DLOG(ERROR) << "Failed to parse MHTML, invalid MIME header.";	240 DVLOG(1) << "Failed to parse MHTML, invalid MIME header.";

242 return false;	241 return false;

243 }	242 }

244 if (resourceHeader->contentType() == "multipart/alternative") {	243 if (resourceHeader->contentType() == "multipart/alternative") {

245 // Ignore IE nesting which makes little sense (IE seems to nest only some of the frames).	244 // Ignore IE nesting which makes little sense (IE seems to nest only some of the frames).

246 if (!parseArchiveWithHeader(resourceHeader, resources)) {	245 if (!parseArchiveWithHeader(resourceHeader, resources)) {

247 DLOG(ERROR) << "Failed to parse MHTML subframe.";	246 DVLOG(1) << "Failed to parse MHTML subframe.";

248 return false;	247 return false;

249 }	248 }

250 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he ader->endOfPartBoundary());	249 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary( ));

251 ASSERT_UNUSED(endOfPartReached, endOfPartReached);

252 continue;	250 continue;

253 }	251 }

254	252

255 ArchiveResource* resource = parseNextPart(*resourceHeader, header->endOf PartBoundary(), header->endOfDocumentBoundary(), endOfArchive);	253 ArchiveResource* resource = parseNextPart(*resourceHeader, header->endOf PartBoundary(), header->endOfDocumentBoundary(), endOfArchive);

256 if (!resource) {	254 if (!resource) {

257 DLOG(ERROR) << "Failed to parse MHTML part.";	255 DVLOG(1) << "Failed to parse MHTML part.";

258 return false;	256 return false;

259 }	257 }

260 resources.append(resource);	258 resources.append(resource);

261 }	259 }

262 return true;	260 return true;

263 }	261 }

264	262

265	263

266 ArchiveResource* MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchi veReached)	264 ArchiveResource* MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchi veReached)

267 {	265 {

268 ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty());	266 ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty());

269	267

270 // If no content transfer encoding is specified, default to binary encoding.	268 // If no content transfer encoding is specified, default to binary encoding.

271 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc oding();	269 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc oding();

272 if (contentTransferEncoding == MIMEHeader::Unknown)	270 if (contentTransferEncoding == MIMEHeader::Unknown)

273 contentTransferEncoding = MIMEHeader::Binary;	271 contentTransferEncoding = MIMEHeader::Binary;

274	272

275 RefPtr<SharedBuffer> content = SharedBuffer::create();	273 RefPtr<SharedBuffer> content = SharedBuffer::create();

276 const bool checkBoundary = !endOfPartBoundary.isEmpty();	274 const bool checkBoundary = !endOfPartBoundary.isEmpty();

277 bool endOfPartReached = false;	275 bool endOfPartReached = false;

278 if (contentTransferEncoding == MIMEHeader::Binary) {	276 if (contentTransferEncoding == MIMEHeader::Binary) {

279 if (!checkBoundary) {	277 if (!checkBoundary) {

280 DLOG(ERROR) << "Binary contents requires end of part";	278 DVLOG(1) << "Binary contents requires end of part";

281 return nullptr;	279 return nullptr;

282 }	280 }

283 m_lineReader.setSeparator(endOfPartBoundary.utf8().data());	281 m_lineReader.setSeparator(endOfPartBoundary.utf8().data());

284 Vector<char> part;	282 Vector<char> part;

285 if (!m_lineReader.nextChunk(part)) {	283 if (!m_lineReader.nextChunk(part)) {

286 DLOG(ERROR) << "Binary contents requires end of part";	284 DVLOG(1) << "Binary contents requires end of part";

287 return nullptr;	285 return nullptr;

288 }	286 }

289 content->append(part);	287 content->append(part);

290 m_lineReader.setSeparator("\r\n");	288 m_lineReader.setSeparator("\r\n");

291 Vector<char> nextChars;	289 Vector<char> nextChars;

292 if (m_lineReader.peek(nextChars, 2) != 2) {	290 if (m_lineReader.peek(nextChars, 2) != 2) {

293 DLOG(ERROR) << "Invalid seperator.";	291 DVLOG(1) << "Invalid seperator.";

294 return nullptr;	292 return nullptr;

295 }	293 }

296 endOfPartReached = true;	294 endOfPartReached = true;

297 ASSERT(nextChars.size() == 2);	295 ASSERT(nextChars.size() == 2);

298 endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');	296 endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');

299 if (!endOfArchiveReached) {	297 if (!endOfArchiveReached) {

300 String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback() ;	298 String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback() ;

301 if (!line.isEmpty()) {	299 if (!line.isEmpty()) {

302 DLOG(ERROR) << "No CRLF at end of binary section.";	300 DVLOG(1) << "No CRLF at end of binary section.";

303 return nullptr;	301 return nullptr;

304 }	302 }

305 }	303 }

306 } else {	304 } else {

307 String line;	305 String line;

308 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()). isNull()) {	306 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()). isNull()) {

309 endOfArchiveReached = (line == endOfDocumentBoundary);	307 endOfArchiveReached = (line == endOfDocumentBoundary);

310 if (checkBoundary && (line == endOfPartBoundary \|\| endOfArchiveReach ed)) {	308 if (checkBoundary && (line == endOfPartBoundary \|\| endOfArchiveReach ed)) {

311 endOfPartReached = true;	309 endOfPartReached = true;

312 break;	310 break;

313 }	311 }

314 // Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.	312 // Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.

315 content->append(line.utf8().data(), line.length());	313 content->append(line.utf8().data(), line.length());

316 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) {	314 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) {

317 // The line reader removes the \r\n, but we need them for the co ntent in this case as the QuotedPrintable decoder expects CR-LF terminated lines .	315 // The line reader removes the \r\n, but we need them for the co ntent in this case as the QuotedPrintable decoder expects CR-LF terminated lines .

318 content->append("\r\n", 2u);	316 content->append("\r\n", 2u);

319 }	317 }

320 }	318 }

321 }	319 }

322 if (!endOfPartReached && checkBoundary) {	320 if (!endOfPartReached && checkBoundary) {

323 DLOG(ERROR) << "No bounday found for MHTML part.";	321 DVLOG(1) << "No boundary found for MHTML part.";

324 return nullptr;	322 return nullptr;

325 }	323 }

326	324

327 Vector<char> data;	325 Vector<char> data;

328 switch (contentTransferEncoding) {	326 switch (contentTransferEncoding) {

329 case MIMEHeader::Base64:	327 case MIMEHeader::Base64:

330 if (!base64Decode(content->data(), content->size(), data)) {	328 if (!base64Decode(content->data(), content->size(), data)) {

331 DLOG(ERROR) << "Invalid base64 content for MHTML part.";	329 DVLOG(1) << "Invalid base64 content for MHTML part.";

332 return nullptr;	330 return nullptr;

333 }	331 }

334 break;	332 break;

335 case MIMEHeader::QuotedPrintable:	333 case MIMEHeader::QuotedPrintable:

336 quotedPrintableDecode(content->data(), content->size(), data);	334 quotedPrintableDecode(content->data(), content->size(), data);

337 break;	335 break;

338 case MIMEHeader::EightBit:	336 case MIMEHeader::EightBit:

339 case MIMEHeader::SevenBit:	337 case MIMEHeader::SevenBit:

340 case MIMEHeader::Binary:	338 case MIMEHeader::Binary:

341 data.append(content->data(), content->size());	339 data.append(content->data(), content->size());

342 break;	340 break;

343 default:	341 default:

344 DLOG(ERROR) << "Invalid encoding for MHTML part.";	342 DVLOG(1) << "Invalid encoding for MHTML part.";

345 return nullptr;	343 return nullptr;

346 }	344 }

347 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data);	345 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data);

348 // FIXME: the URL in the MIME header could be relative, we should resolve it if it is.	346 // FIXME: the URL in the MIME header could be relative, we should resolve it if it is.

349 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rf c2557#section-5	347 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rf c2557#section-5

350 // IE and Firefox (UNMht) seem to generate only absolute URLs.	348 // IE and Firefox (UNMht) seem to generate only absolute URLs.

351 KURL location = KURL(KURL(), mimeHeader.contentLocation());	349 KURL location = KURL(KURL(), mimeHeader.contentLocation());

352 return ArchiveResource::create(	350 return ArchiveResource::create(

353 contentBuffer, location, mimeHeader.contentID(), AtomicString(mimeHeader .contentType()), AtomicString(mimeHeader.charset()));	351 contentBuffer, location, mimeHeader.contentID(), AtomicString(mimeHeader .contentType()), AtomicString(mimeHeader.charset()));

354 }	352 }

(...skipping 14 matching lines...) Expand all Loading...
369 if (!contentID.startsWith('<') \|\| !contentID.endsWith('>'))	367 if (!contentID.startsWith('<') \|\| !contentID.endsWith('>'))

370 return KURL();	368 return KURL();

371	369

372 StringBuilder uriBuilder;	370 StringBuilder uriBuilder;

373 uriBuilder.append("cid:");	371 uriBuilder.append("cid:");

374 uriBuilder.append(contentID, 1, contentID.length() - 2);	372 uriBuilder.append(contentID, 1, contentID.length() - 2);

375 return KURL(KURL(), uriBuilder.toString());	373 return KURL(KURL(), uriBuilder.toString());

376 }	374 }

377	375

378 } // namespace blink	376 } // namespace blink

OLD	NEW