Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: third_party/WebKit/Source/platform/mhtml/MHTMLParser.cpp

Issue 2199493002: libFuzzer for blink::MHTMLParser (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressed CR feedback from esprehn@. Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved. 2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
96 static KeyValueMap retrieveKeyValuePairs(SharedBufferChunkReader* buffer) 96 static KeyValueMap retrieveKeyValuePairs(SharedBufferChunkReader* buffer)
97 { 97 {
98 KeyValueMap keyValuePairs; 98 KeyValueMap keyValuePairs;
99 String line; 99 String line;
100 String key; 100 String key;
101 StringBuilder value; 101 StringBuilder value;
102 while (!(line = buffer->nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) { 102 while (!(line = buffer->nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
103 if (line.isEmpty()) 103 if (line.isEmpty())
104 break; // Empty line means end of key/value section. 104 break; // Empty line means end of key/value section.
105 if (line[0] == '\t') { 105 if (line[0] == '\t') {
106 ASSERT(!key.isEmpty());
107 value.append(line.substring(1)); 106 value.append(line.substring(1));
108 continue; 107 continue;
109 } 108 }
110 // New key/value, store the previous one if any. 109 // New key/value, store the previous one if any.
111 if (!key.isEmpty()) { 110 if (!key.isEmpty()) {
112 if (keyValuePairs.find(key) != keyValuePairs.end()) 111 if (keyValuePairs.find(key) != keyValuePairs.end())
113 DLOG(ERROR) << "Key duplicate found in MIME header. Key is '" << key << "', previous value replaced."; 112 DVLOG(1) << "Key duplicate found in MIME header. Key is '" << ke y << "', previous value replaced.";
114 keyValuePairs.add(key, value.toString().stripWhiteSpace()); 113 keyValuePairs.add(key, value.toString().stripWhiteSpace());
115 key = String(); 114 key = String();
116 value.clear(); 115 value.clear();
117 } 116 }
118 size_t semiColonIndex = line.find(':'); 117 size_t semiColonIndex = line.find(':');
119 if (semiColonIndex == kNotFound) { 118 if (semiColonIndex == kNotFound) {
120 // This is not a key value pair, ignore. 119 // This is not a key value pair, ignore.
121 continue; 120 continue;
122 } 121 }
123 key = line.substring(0, semiColonIndex).lower().stripWhiteSpace(); 122 key = line.substring(0, semiColonIndex).lower().stripWhiteSpace();
(...skipping 12 matching lines...) Expand all
136 KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-t ype"); 135 KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-t ype");
137 if (mimeParametersIterator != keyValuePairs.end()) { 136 if (mimeParametersIterator != keyValuePairs.end()) {
138 ParsedContentType parsedContentType(mimeParametersIterator->value); 137 ParsedContentType parsedContentType(mimeParametersIterator->value);
139 mimeHeader->m_contentType = parsedContentType.mimeType(); 138 mimeHeader->m_contentType = parsedContentType.mimeType();
140 if (!mimeHeader->isMultipart()) { 139 if (!mimeHeader->isMultipart()) {
141 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace( ); 140 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace( );
142 } else { 141 } else {
143 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam e("type"); 142 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam e("type");
144 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo rName("boundary"); 143 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo rName("boundary");
145 if (mimeHeader->m_endOfPartBoundary.isNull()) { 144 if (mimeHeader->m_endOfPartBoundary.isNull()) {
146 DLOG(ERROR) << "No boundary found in multipart MIME header."; 145 DVLOG(1) << "No boundary found in multipart MIME header.";
147 return nullptr; 146 return nullptr;
148 } 147 }
149 mimeHeader->m_endOfPartBoundary.insert("--", 0); 148 mimeHeader->m_endOfPartBoundary.insert("--", 0);
150 mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundar y; 149 mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundar y;
151 mimeHeader->m_endOfDocumentBoundary.append("--"); 150 mimeHeader->m_endOfDocumentBoundary.append("--");
152 } 151 }
153 } 152 }
154 153
155 mimeParametersIterator = keyValuePairs.find("content-transfer-encoding"); 154 mimeParametersIterator = keyValuePairs.find("content-transfer-encoding");
156 if (mimeParametersIterator != keyValuePairs.end()) 155 if (mimeParametersIterator != keyValuePairs.end())
(...skipping 17 matching lines...) Expand all
174 if (encoding == "base64") 173 if (encoding == "base64")
175 return Base64; 174 return Base64;
176 if (encoding == "quoted-printable") 175 if (encoding == "quoted-printable")
177 return QuotedPrintable; 176 return QuotedPrintable;
178 if (encoding == "8bit") 177 if (encoding == "8bit")
179 return EightBit; 178 return EightBit;
180 if (encoding == "7bit") 179 if (encoding == "7bit")
181 return SevenBit; 180 return SevenBit;
182 if (encoding == "binary") 181 if (encoding == "binary")
183 return Binary; 182 return Binary;
184 DLOG(ERROR) << "Unknown encoding '" << text << "' found in MIME header."; 183 DVLOG(1) << "Unknown encoding '" << text << "' found in MIME header.";
185 return Unknown; 184 return Unknown;
186 } 185 }
187 186
188 MIMEHeader::MIMEHeader() 187 MIMEHeader::MIMEHeader()
189 : m_contentTransferEncoding(Unknown) 188 : m_contentTransferEncoding(Unknown)
190 { 189 {
191 } 190 }
192 191
193 static bool skipLinesUntilBoundaryFound(SharedBufferChunkReader& lineReader, con st String& boundary) 192 static bool skipLinesUntilBoundaryFound(SharedBufferChunkReader& lineReader, con st String& boundary)
194 { 193 {
(...skipping 15 matching lines...) Expand all
210 MIMEHeader* header = MIMEHeader::parseHeader(&m_lineReader); 209 MIMEHeader* header = MIMEHeader::parseHeader(&m_lineReader);
211 HeapVector<Member<ArchiveResource>> resources; 210 HeapVector<Member<ArchiveResource>> resources;
212 if (!parseArchiveWithHeader(header, resources)) 211 if (!parseArchiveWithHeader(header, resources))
213 resources.clear(); 212 resources.clear();
214 return resources; 213 return resources;
215 } 214 }
216 215
217 bool MHTMLParser::parseArchiveWithHeader(MIMEHeader* header, HeapVector<Member<A rchiveResource>>& resources) 216 bool MHTMLParser::parseArchiveWithHeader(MIMEHeader* header, HeapVector<Member<A rchiveResource>>& resources)
218 { 217 {
219 if (!header) { 218 if (!header) {
220 DLOG(ERROR) << "Failed to parse MHTML part: no header."; 219 DVLOG(1) << "Failed to parse MHTML part: no header.";
221 return false; 220 return false;
222 } 221 }
223 222
224 if (!header->isMultipart()) { 223 if (!header->isMultipart()) {
225 // With IE a page with no resource is not multi-part. 224 // With IE a page with no resource is not multi-part.
226 bool endOfArchiveReached = false; 225 bool endOfArchiveReached = false;
227 ArchiveResource* resource = parseNextPart(*header, String(), String(), e ndOfArchiveReached); 226 ArchiveResource* resource = parseNextPart(*header, String(), String(), e ndOfArchiveReached);
228 if (!resource) 227 if (!resource)
229 return false; 228 return false;
230 resources.append(resource); 229 resources.append(resource);
231 return true; 230 return true;
232 } 231 }
233 232
234 // Skip the message content (it's a generic browser specific message). 233 // Skip the message content (it's a generic browser specific message).
235 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); 234 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());
236 235
237 bool endOfArchive = false; 236 bool endOfArchive = false;
238 while (!endOfArchive) { 237 while (!endOfArchive) {
239 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader); 238 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader);
240 if (!resourceHeader) { 239 if (!resourceHeader) {
241 DLOG(ERROR) << "Failed to parse MHTML, invalid MIME header."; 240 DVLOG(1) << "Failed to parse MHTML, invalid MIME header.";
242 return false; 241 return false;
243 } 242 }
244 if (resourceHeader->contentType() == "multipart/alternative") { 243 if (resourceHeader->contentType() == "multipart/alternative") {
245 // Ignore IE nesting which makes little sense (IE seems to nest only some of the frames). 244 // Ignore IE nesting which makes little sense (IE seems to nest only some of the frames).
246 if (!parseArchiveWithHeader(resourceHeader, resources)) { 245 if (!parseArchiveWithHeader(resourceHeader, resources)) {
247 DLOG(ERROR) << "Failed to parse MHTML subframe."; 246 DVLOG(1) << "Failed to parse MHTML subframe.";
248 return false; 247 return false;
249 } 248 }
250 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he ader->endOfPartBoundary()); 249 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary( ));
251 ASSERT_UNUSED(endOfPartReached, endOfPartReached);
252 continue; 250 continue;
253 } 251 }
254 252
255 ArchiveResource* resource = parseNextPart(*resourceHeader, header->endOf PartBoundary(), header->endOfDocumentBoundary(), endOfArchive); 253 ArchiveResource* resource = parseNextPart(*resourceHeader, header->endOf PartBoundary(), header->endOfDocumentBoundary(), endOfArchive);
256 if (!resource) { 254 if (!resource) {
257 DLOG(ERROR) << "Failed to parse MHTML part."; 255 DVLOG(1) << "Failed to parse MHTML part.";
258 return false; 256 return false;
259 } 257 }
260 resources.append(resource); 258 resources.append(resource);
261 } 259 }
262 return true; 260 return true;
263 } 261 }
264 262
265 263
266 ArchiveResource* MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchi veReached) 264 ArchiveResource* MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchi veReached)
267 { 265 {
268 ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty()); 266 ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty());
269 267
270 // If no content transfer encoding is specified, default to binary encoding. 268 // If no content transfer encoding is specified, default to binary encoding.
271 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc oding(); 269 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc oding();
272 if (contentTransferEncoding == MIMEHeader::Unknown) 270 if (contentTransferEncoding == MIMEHeader::Unknown)
273 contentTransferEncoding = MIMEHeader::Binary; 271 contentTransferEncoding = MIMEHeader::Binary;
274 272
275 RefPtr<SharedBuffer> content = SharedBuffer::create(); 273 RefPtr<SharedBuffer> content = SharedBuffer::create();
276 const bool checkBoundary = !endOfPartBoundary.isEmpty(); 274 const bool checkBoundary = !endOfPartBoundary.isEmpty();
277 bool endOfPartReached = false; 275 bool endOfPartReached = false;
278 if (contentTransferEncoding == MIMEHeader::Binary) { 276 if (contentTransferEncoding == MIMEHeader::Binary) {
279 if (!checkBoundary) { 277 if (!checkBoundary) {
280 DLOG(ERROR) << "Binary contents requires end of part"; 278 DVLOG(1) << "Binary contents requires end of part";
281 return nullptr; 279 return nullptr;
282 } 280 }
283 m_lineReader.setSeparator(endOfPartBoundary.utf8().data()); 281 m_lineReader.setSeparator(endOfPartBoundary.utf8().data());
284 Vector<char> part; 282 Vector<char> part;
285 if (!m_lineReader.nextChunk(part)) { 283 if (!m_lineReader.nextChunk(part)) {
286 DLOG(ERROR) << "Binary contents requires end of part"; 284 DVLOG(1) << "Binary contents requires end of part";
287 return nullptr; 285 return nullptr;
288 } 286 }
289 content->append(part); 287 content->append(part);
290 m_lineReader.setSeparator("\r\n"); 288 m_lineReader.setSeparator("\r\n");
291 Vector<char> nextChars; 289 Vector<char> nextChars;
292 if (m_lineReader.peek(nextChars, 2) != 2) { 290 if (m_lineReader.peek(nextChars, 2) != 2) {
293 DLOG(ERROR) << "Invalid seperator."; 291 DVLOG(1) << "Invalid seperator.";
294 return nullptr; 292 return nullptr;
295 } 293 }
296 endOfPartReached = true; 294 endOfPartReached = true;
297 ASSERT(nextChars.size() == 2); 295 ASSERT(nextChars.size() == 2);
298 endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-'); 296 endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');
299 if (!endOfArchiveReached) { 297 if (!endOfArchiveReached) {
300 String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback() ; 298 String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback() ;
301 if (!line.isEmpty()) { 299 if (!line.isEmpty()) {
302 DLOG(ERROR) << "No CRLF at end of binary section."; 300 DVLOG(1) << "No CRLF at end of binary section.";
303 return nullptr; 301 return nullptr;
304 } 302 }
305 } 303 }
306 } else { 304 } else {
307 String line; 305 String line;
308 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()). isNull()) { 306 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()). isNull()) {
309 endOfArchiveReached = (line == endOfDocumentBoundary); 307 endOfArchiveReached = (line == endOfDocumentBoundary);
310 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReach ed)) { 308 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReach ed)) {
311 endOfPartReached = true; 309 endOfPartReached = true;
312 break; 310 break;
313 } 311 }
314 // Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'. 312 // Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.
315 content->append(line.utf8().data(), line.length()); 313 content->append(line.utf8().data(), line.length());
316 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { 314 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) {
317 // The line reader removes the \r\n, but we need them for the co ntent in this case as the QuotedPrintable decoder expects CR-LF terminated lines . 315 // The line reader removes the \r\n, but we need them for the co ntent in this case as the QuotedPrintable decoder expects CR-LF terminated lines .
318 content->append("\r\n", 2u); 316 content->append("\r\n", 2u);
319 } 317 }
320 } 318 }
321 } 319 }
322 if (!endOfPartReached && checkBoundary) { 320 if (!endOfPartReached && checkBoundary) {
323 DLOG(ERROR) << "No bounday found for MHTML part."; 321 DVLOG(1) << "No boundary found for MHTML part.";
324 return nullptr; 322 return nullptr;
325 } 323 }
326 324
327 Vector<char> data; 325 Vector<char> data;
328 switch (contentTransferEncoding) { 326 switch (contentTransferEncoding) {
329 case MIMEHeader::Base64: 327 case MIMEHeader::Base64:
330 if (!base64Decode(content->data(), content->size(), data)) { 328 if (!base64Decode(content->data(), content->size(), data)) {
331 DLOG(ERROR) << "Invalid base64 content for MHTML part."; 329 DVLOG(1) << "Invalid base64 content for MHTML part.";
332 return nullptr; 330 return nullptr;
333 } 331 }
334 break; 332 break;
335 case MIMEHeader::QuotedPrintable: 333 case MIMEHeader::QuotedPrintable:
336 quotedPrintableDecode(content->data(), content->size(), data); 334 quotedPrintableDecode(content->data(), content->size(), data);
337 break; 335 break;
338 case MIMEHeader::EightBit: 336 case MIMEHeader::EightBit:
339 case MIMEHeader::SevenBit: 337 case MIMEHeader::SevenBit:
340 case MIMEHeader::Binary: 338 case MIMEHeader::Binary:
341 data.append(content->data(), content->size()); 339 data.append(content->data(), content->size());
342 break; 340 break;
343 default: 341 default:
344 DLOG(ERROR) << "Invalid encoding for MHTML part."; 342 DVLOG(1) << "Invalid encoding for MHTML part.";
345 return nullptr; 343 return nullptr;
346 } 344 }
347 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); 345 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data);
348 // FIXME: the URL in the MIME header could be relative, we should resolve it if it is. 346 // FIXME: the URL in the MIME header could be relative, we should resolve it if it is.
349 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rf c2557#section-5 347 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rf c2557#section-5
350 // IE and Firefox (UNMht) seem to generate only absolute URLs. 348 // IE and Firefox (UNMht) seem to generate only absolute URLs.
351 KURL location = KURL(KURL(), mimeHeader.contentLocation()); 349 KURL location = KURL(KURL(), mimeHeader.contentLocation());
352 return ArchiveResource::create( 350 return ArchiveResource::create(
353 contentBuffer, location, mimeHeader.contentID(), AtomicString(mimeHeader .contentType()), AtomicString(mimeHeader.charset())); 351 contentBuffer, location, mimeHeader.contentID(), AtomicString(mimeHeader .contentType()), AtomicString(mimeHeader.charset()));
354 } 352 }
(...skipping 14 matching lines...) Expand all
369 if (!contentID.startsWith('<') || !contentID.endsWith('>')) 367 if (!contentID.startsWith('<') || !contentID.endsWith('>'))
370 return KURL(); 368 return KURL();
371 369
372 StringBuilder uriBuilder; 370 StringBuilder uriBuilder;
373 uriBuilder.append("cid:"); 371 uriBuilder.append("cid:");
374 uriBuilder.append(contentID, 1, contentID.length() - 2); 372 uriBuilder.append(contentID, 1, contentID.length() - 2);
375 return KURL(KURL(), uriBuilder.toString()); 373 return KURL(KURL(), uriBuilder.toString());
376 } 374 }
377 375
378 } // namespace blink 376 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698