OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
131 if (mimeParametersIterator != keyValuePairs.end()) { | 131 if (mimeParametersIterator != keyValuePairs.end()) { |
132 ParsedContentType parsedContentType(mimeParametersIterator->value); | 132 ParsedContentType parsedContentType(mimeParametersIterator->value); |
133 mimeHeader->m_contentType = parsedContentType.mimeType(); | 133 mimeHeader->m_contentType = parsedContentType.mimeType(); |
134 if (!mimeHeader->isMultipart()) { | 134 if (!mimeHeader->isMultipart()) { |
135 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace(
); | 135 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace(
); |
136 } else { | 136 } else { |
137 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam
e("type"); | 137 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam
e("type"); |
138 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo
rName("boundary"); | 138 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo
rName("boundary"); |
139 if (mimeHeader->m_endOfPartBoundary.isNull()) { | 139 if (mimeHeader->m_endOfPartBoundary.isNull()) { |
140 WTF_LOG_ERROR("No boundary found in multipart MIME header."); | 140 WTF_LOG_ERROR("No boundary found in multipart MIME header."); |
141 return 0; | 141 return nullptr; |
142 } | 142 } |
143 mimeHeader->m_endOfPartBoundary.insert("--", 0); | 143 mimeHeader->m_endOfPartBoundary.insert("--", 0); |
144 mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundar
y; | 144 mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundar
y; |
145 mimeHeader->m_endOfDocumentBoundary.append("--"); | 145 mimeHeader->m_endOfDocumentBoundary.append("--"); |
146 } | 146 } |
147 } | 147 } |
148 | 148 |
149 mimeParametersIterator = keyValuePairs.find("content-transfer-encoding"); | 149 mimeParametersIterator = keyValuePairs.find("content-transfer-encoding"); |
150 if (mimeParametersIterator != keyValuePairs.end()) | 150 if (mimeParametersIterator != keyValuePairs.end()) |
151 mimeHeader->m_contentTransferEncoding = parseContentTransferEncoding(mim
eParametersIterator->value); | 151 mimeHeader->m_contentTransferEncoding = parseContentTransferEncoding(mim
eParametersIterator->value); |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
197 PassRefPtr<MHTMLArchive> MHTMLParser::parseArchive() | 197 PassRefPtr<MHTMLArchive> MHTMLParser::parseArchive() |
198 { | 198 { |
199 RefPtr<MIMEHeader> header = MIMEHeader::parseHeader(&m_lineReader); | 199 RefPtr<MIMEHeader> header = MIMEHeader::parseHeader(&m_lineReader); |
200 return parseArchiveWithHeader(header.get()); | 200 return parseArchiveWithHeader(header.get()); |
201 } | 201 } |
202 | 202 |
203 PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header) | 203 PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header) |
204 { | 204 { |
205 if (!header) { | 205 if (!header) { |
206 WTF_LOG_ERROR("Failed to parse MHTML part: no header."); | 206 WTF_LOG_ERROR("Failed to parse MHTML part: no header."); |
207 return 0; | 207 return nullptr; |
208 } | 208 } |
209 | 209 |
210 RefPtr<MHTMLArchive> archive = MHTMLArchive::create(); | 210 RefPtr<MHTMLArchive> archive = MHTMLArchive::create(); |
211 if (!header->isMultipart()) { | 211 if (!header->isMultipart()) { |
212 // With IE a page with no resource is not multi-part. | 212 // With IE a page with no resource is not multi-part. |
213 bool endOfArchiveReached = false; | 213 bool endOfArchiveReached = false; |
214 RefPtr<ArchiveResource> resource = parseNextPart(*header, String(), Stri
ng(), endOfArchiveReached); | 214 RefPtr<ArchiveResource> resource = parseNextPart(*header, String(), Stri
ng(), endOfArchiveReached); |
215 if (!resource) | 215 if (!resource) |
216 return 0; | 216 return nullptr; |
217 archive->setMainResource(resource); | 217 archive->setMainResource(resource); |
218 return archive; | 218 return archive; |
219 } | 219 } |
220 | 220 |
221 // Skip the message content (it's a generic browser specific message). | 221 // Skip the message content (it's a generic browser specific message). |
222 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); | 222 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); |
223 | 223 |
224 bool endOfArchive = false; | 224 bool endOfArchive = false; |
225 while (!endOfArchive) { | 225 while (!endOfArchive) { |
226 RefPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(&m_lineReade
r); | 226 RefPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(&m_lineReade
r); |
227 if (!resourceHeader) { | 227 if (!resourceHeader) { |
228 WTF_LOG_ERROR("Failed to parse MHTML, invalid MIME header."); | 228 WTF_LOG_ERROR("Failed to parse MHTML, invalid MIME header."); |
229 return 0; | 229 return nullptr; |
230 } | 230 } |
231 if (resourceHeader->contentType() == "multipart/alternative") { | 231 if (resourceHeader->contentType() == "multipart/alternative") { |
232 // Ignore IE nesting which makes little sense (IE seems to nest only
some of the frames). | 232 // Ignore IE nesting which makes little sense (IE seems to nest only
some of the frames). |
233 RefPtr<MHTMLArchive> subframeArchive = parseArchiveWithHeader(resour
ceHeader.get()); | 233 RefPtr<MHTMLArchive> subframeArchive = parseArchiveWithHeader(resour
ceHeader.get()); |
234 if (!subframeArchive) { | 234 if (!subframeArchive) { |
235 WTF_LOG_ERROR("Failed to parse MHTML subframe."); | 235 WTF_LOG_ERROR("Failed to parse MHTML subframe."); |
236 return 0; | 236 return nullptr; |
237 } | 237 } |
238 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he
ader->endOfPartBoundary()); | 238 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he
ader->endOfPartBoundary()); |
239 ASSERT_UNUSED(endOfPartReached, endOfPartReached); | 239 ASSERT_UNUSED(endOfPartReached, endOfPartReached); |
240 // The top-frame is the first frame found, regardless of the nesting
level. | 240 // The top-frame is the first frame found, regardless of the nesting
level. |
241 if (subframeArchive->mainResource()) | 241 if (subframeArchive->mainResource()) |
242 addResourceToArchive(subframeArchive->mainResource(), archive.ge
t()); | 242 addResourceToArchive(subframeArchive->mainResource(), archive.ge
t()); |
243 archive->addSubframeArchive(subframeArchive); | 243 archive->addSubframeArchive(subframeArchive); |
244 continue; | 244 continue; |
245 } | 245 } |
246 | 246 |
247 RefPtr<ArchiveResource> resource = parseNextPart(*resourceHeader, header
->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive); | 247 RefPtr<ArchiveResource> resource = parseNextPart(*resourceHeader, header
->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive); |
248 if (!resource) { | 248 if (!resource) { |
249 WTF_LOG_ERROR("Failed to parse MHTML part."); | 249 WTF_LOG_ERROR("Failed to parse MHTML part."); |
250 return 0; | 250 return nullptr; |
251 } | 251 } |
252 addResourceToArchive(resource.get(), archive.get()); | 252 addResourceToArchive(resource.get(), archive.get()); |
253 } | 253 } |
254 | 254 |
255 return archive.release(); | 255 return archive.release(); |
256 } | 256 } |
257 | 257 |
258 void MHTMLParser::addResourceToArchive(ArchiveResource* resource, MHTMLArchive*
archive) | 258 void MHTMLParser::addResourceToArchive(ArchiveResource* resource, MHTMLArchive*
archive) |
259 { | 259 { |
260 const AtomicString& mimeType = resource->mimeType(); | 260 const AtomicString& mimeType = resource->mimeType(); |
(...skipping 22 matching lines...) Expand all Loading... |
283 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc
oding(); | 283 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc
oding(); |
284 if (contentTransferEncoding == MIMEHeader::Unknown) | 284 if (contentTransferEncoding == MIMEHeader::Unknown) |
285 contentTransferEncoding = MIMEHeader::Binary; | 285 contentTransferEncoding = MIMEHeader::Binary; |
286 | 286 |
287 RefPtr<SharedBuffer> content = SharedBuffer::create(); | 287 RefPtr<SharedBuffer> content = SharedBuffer::create(); |
288 const bool checkBoundary = !endOfPartBoundary.isEmpty(); | 288 const bool checkBoundary = !endOfPartBoundary.isEmpty(); |
289 bool endOfPartReached = false; | 289 bool endOfPartReached = false; |
290 if (contentTransferEncoding == MIMEHeader::Binary) { | 290 if (contentTransferEncoding == MIMEHeader::Binary) { |
291 if (!checkBoundary) { | 291 if (!checkBoundary) { |
292 WTF_LOG_ERROR("Binary contents requires end of part"); | 292 WTF_LOG_ERROR("Binary contents requires end of part"); |
293 return 0; | 293 return nullptr; |
294 } | 294 } |
295 m_lineReader.setSeparator(endOfPartBoundary.utf8().data()); | 295 m_lineReader.setSeparator(endOfPartBoundary.utf8().data()); |
296 Vector<char> part; | 296 Vector<char> part; |
297 if (!m_lineReader.nextChunk(part)) { | 297 if (!m_lineReader.nextChunk(part)) { |
298 WTF_LOG_ERROR("Binary contents requires end of part"); | 298 WTF_LOG_ERROR("Binary contents requires end of part"); |
299 return 0; | 299 return nullptr; |
300 } | 300 } |
301 content->append(part); | 301 content->append(part); |
302 m_lineReader.setSeparator("\r\n"); | 302 m_lineReader.setSeparator("\r\n"); |
303 Vector<char> nextChars; | 303 Vector<char> nextChars; |
304 if (m_lineReader.peek(nextChars, 2) != 2) { | 304 if (m_lineReader.peek(nextChars, 2) != 2) { |
305 WTF_LOG_ERROR("Invalid seperator."); | 305 WTF_LOG_ERROR("Invalid seperator."); |
306 return 0; | 306 return nullptr; |
307 } | 307 } |
308 endOfPartReached = true; | 308 endOfPartReached = true; |
309 ASSERT(nextChars.size() == 2); | 309 ASSERT(nextChars.size() == 2); |
310 endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-'); | 310 endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-'); |
311 if (!endOfArchiveReached) { | 311 if (!endOfArchiveReached) { |
312 String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()
; | 312 String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()
; |
313 if (!line.isEmpty()) { | 313 if (!line.isEmpty()) { |
314 WTF_LOG_ERROR("No CRLF at end of binary section."); | 314 WTF_LOG_ERROR("No CRLF at end of binary section."); |
315 return 0; | 315 return nullptr; |
316 } | 316 } |
317 } | 317 } |
318 } else { | 318 } else { |
319 String line; | 319 String line; |
320 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).
isNull()) { | 320 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).
isNull()) { |
321 endOfArchiveReached = (line == endOfDocumentBoundary); | 321 endOfArchiveReached = (line == endOfDocumentBoundary); |
322 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReach
ed)) { | 322 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReach
ed)) { |
323 endOfPartReached = true; | 323 endOfPartReached = true; |
324 break; | 324 break; |
325 } | 325 } |
326 // Note that we use line.utf8() and not line.ascii() as ascii turns
special characters (such as tab, line-feed...) into '?'. | 326 // Note that we use line.utf8() and not line.ascii() as ascii turns
special characters (such as tab, line-feed...) into '?'. |
327 content->append(line.utf8().data(), line.length()); | 327 content->append(line.utf8().data(), line.length()); |
328 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { | 328 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { |
329 // The line reader removes the \r\n, but we need them for the co
ntent in this case as the QuotedPrintable decoder expects CR-LF terminated lines
. | 329 // The line reader removes the \r\n, but we need them for the co
ntent in this case as the QuotedPrintable decoder expects CR-LF terminated lines
. |
330 content->append("\r\n", 2); | 330 content->append("\r\n", 2); |
331 } | 331 } |
332 } | 332 } |
333 } | 333 } |
334 if (!endOfPartReached && checkBoundary) { | 334 if (!endOfPartReached && checkBoundary) { |
335 WTF_LOG_ERROR("No bounday found for MHTML part."); | 335 WTF_LOG_ERROR("No bounday found for MHTML part."); |
336 return 0; | 336 return nullptr; |
337 } | 337 } |
338 | 338 |
339 Vector<char> data; | 339 Vector<char> data; |
340 switch (contentTransferEncoding) { | 340 switch (contentTransferEncoding) { |
341 case MIMEHeader::Base64: | 341 case MIMEHeader::Base64: |
342 if (!base64Decode(content->data(), content->size(), data)) { | 342 if (!base64Decode(content->data(), content->size(), data)) { |
343 WTF_LOG_ERROR("Invalid base64 content for MHTML part."); | 343 WTF_LOG_ERROR("Invalid base64 content for MHTML part."); |
344 return 0; | 344 return nullptr; |
345 } | 345 } |
346 break; | 346 break; |
347 case MIMEHeader::QuotedPrintable: | 347 case MIMEHeader::QuotedPrintable: |
348 quotedPrintableDecode(content->data(), content->size(), data); | 348 quotedPrintableDecode(content->data(), content->size(), data); |
349 break; | 349 break; |
350 case MIMEHeader::EightBit: | 350 case MIMEHeader::EightBit: |
351 case MIMEHeader::SevenBit: | 351 case MIMEHeader::SevenBit: |
352 case MIMEHeader::Binary: | 352 case MIMEHeader::Binary: |
353 data.append(content->data(), content->size()); | 353 data.append(content->data(), content->size()); |
354 break; | 354 break; |
355 default: | 355 default: |
356 WTF_LOG_ERROR("Invalid encoding for MHTML part."); | 356 WTF_LOG_ERROR("Invalid encoding for MHTML part."); |
357 return 0; | 357 return nullptr; |
358 } | 358 } |
359 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); | 359 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); |
360 // FIXME: the URL in the MIME header could be relative, we should resolve it
if it is. | 360 // FIXME: the URL in the MIME header could be relative, we should resolve it
if it is. |
361 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rf
c2557#section-5 | 361 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rf
c2557#section-5 |
362 // IE and Firefox (UNMht) seem to generate only absolute URLs. | 362 // IE and Firefox (UNMht) seem to generate only absolute URLs. |
363 KURL location = KURL(KURL(), mimeHeader.contentLocation()); | 363 KURL location = KURL(KURL(), mimeHeader.contentLocation()); |
364 return ArchiveResource::create(contentBuffer, location, AtomicString(mimeHea
der.contentType()), AtomicString(mimeHeader.charset()), String()); | 364 return ArchiveResource::create(contentBuffer, location, AtomicString(mimeHea
der.contentType()), AtomicString(mimeHeader.charset()), String()); |
365 } | 365 } |
366 | 366 |
367 size_t MHTMLParser::frameCount() const | 367 size_t MHTMLParser::frameCount() const |
(...skipping 10 matching lines...) Expand all Loading... |
378 { | 378 { |
379 return m_resources.size(); | 379 return m_resources.size(); |
380 } | 380 } |
381 | 381 |
382 ArchiveResource* MHTMLParser::subResourceAt(size_t index) const | 382 ArchiveResource* MHTMLParser::subResourceAt(size_t index) const |
383 { | 383 { |
384 return m_resources[index].get(); | 384 return m_resources[index].get(); |
385 } | 385 } |
386 | 386 |
387 } | 387 } |
OLD | NEW |