| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 131 if (mimeParametersIterator != keyValuePairs.end()) { | 131 if (mimeParametersIterator != keyValuePairs.end()) { |
| 132 ParsedContentType parsedContentType(mimeParametersIterator->value); | 132 ParsedContentType parsedContentType(mimeParametersIterator->value); |
| 133 mimeHeader->m_contentType = parsedContentType.mimeType(); | 133 mimeHeader->m_contentType = parsedContentType.mimeType(); |
| 134 if (!mimeHeader->isMultipart()) { | 134 if (!mimeHeader->isMultipart()) { |
| 135 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace(
); | 135 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace(
); |
| 136 } else { | 136 } else { |
| 137 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam
e("type"); | 137 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam
e("type"); |
| 138 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo
rName("boundary"); | 138 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo
rName("boundary"); |
| 139 if (mimeHeader->m_endOfPartBoundary.isNull()) { | 139 if (mimeHeader->m_endOfPartBoundary.isNull()) { |
| 140 WTF_LOG_ERROR("No boundary found in multipart MIME header."); | 140 WTF_LOG_ERROR("No boundary found in multipart MIME header."); |
| 141 return 0; | 141 return nullptr; |
| 142 } | 142 } |
| 143 mimeHeader->m_endOfPartBoundary.insert("--", 0); | 143 mimeHeader->m_endOfPartBoundary.insert("--", 0); |
| 144 mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundar
y; | 144 mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundar
y; |
| 145 mimeHeader->m_endOfDocumentBoundary.append("--"); | 145 mimeHeader->m_endOfDocumentBoundary.append("--"); |
| 146 } | 146 } |
| 147 } | 147 } |
| 148 | 148 |
| 149 mimeParametersIterator = keyValuePairs.find("content-transfer-encoding"); | 149 mimeParametersIterator = keyValuePairs.find("content-transfer-encoding"); |
| 150 if (mimeParametersIterator != keyValuePairs.end()) | 150 if (mimeParametersIterator != keyValuePairs.end()) |
| 151 mimeHeader->m_contentTransferEncoding = parseContentTransferEncoding(mim
eParametersIterator->value); | 151 mimeHeader->m_contentTransferEncoding = parseContentTransferEncoding(mim
eParametersIterator->value); |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 197 PassRefPtr<MHTMLArchive> MHTMLParser::parseArchive() | 197 PassRefPtr<MHTMLArchive> MHTMLParser::parseArchive() |
| 198 { | 198 { |
| 199 RefPtr<MIMEHeader> header = MIMEHeader::parseHeader(&m_lineReader); | 199 RefPtr<MIMEHeader> header = MIMEHeader::parseHeader(&m_lineReader); |
| 200 return parseArchiveWithHeader(header.get()); | 200 return parseArchiveWithHeader(header.get()); |
| 201 } | 201 } |
| 202 | 202 |
| 203 PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header) | 203 PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header) |
| 204 { | 204 { |
| 205 if (!header) { | 205 if (!header) { |
| 206 WTF_LOG_ERROR("Failed to parse MHTML part: no header."); | 206 WTF_LOG_ERROR("Failed to parse MHTML part: no header."); |
| 207 return 0; | 207 return nullptr; |
| 208 } | 208 } |
| 209 | 209 |
| 210 RefPtr<MHTMLArchive> archive = MHTMLArchive::create(); | 210 RefPtr<MHTMLArchive> archive = MHTMLArchive::create(); |
| 211 if (!header->isMultipart()) { | 211 if (!header->isMultipart()) { |
| 212 // With IE a page with no resource is not multi-part. | 212 // With IE a page with no resource is not multi-part. |
| 213 bool endOfArchiveReached = false; | 213 bool endOfArchiveReached = false; |
| 214 RefPtr<ArchiveResource> resource = parseNextPart(*header, String(), Stri
ng(), endOfArchiveReached); | 214 RefPtr<ArchiveResource> resource = parseNextPart(*header, String(), Stri
ng(), endOfArchiveReached); |
| 215 if (!resource) | 215 if (!resource) |
| 216 return 0; | 216 return nullptr; |
| 217 archive->setMainResource(resource); | 217 archive->setMainResource(resource); |
| 218 return archive; | 218 return archive; |
| 219 } | 219 } |
| 220 | 220 |
| 221 // Skip the message content (it's a generic browser specific message). | 221 // Skip the message content (it's a generic browser specific message). |
| 222 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); | 222 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); |
| 223 | 223 |
| 224 bool endOfArchive = false; | 224 bool endOfArchive = false; |
| 225 while (!endOfArchive) { | 225 while (!endOfArchive) { |
| 226 RefPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(&m_lineReade
r); | 226 RefPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(&m_lineReade
r); |
| 227 if (!resourceHeader) { | 227 if (!resourceHeader) { |
| 228 WTF_LOG_ERROR("Failed to parse MHTML, invalid MIME header."); | 228 WTF_LOG_ERROR("Failed to parse MHTML, invalid MIME header."); |
| 229 return 0; | 229 return nullptr; |
| 230 } | 230 } |
| 231 if (resourceHeader->contentType() == "multipart/alternative") { | 231 if (resourceHeader->contentType() == "multipart/alternative") { |
| 232 // Ignore IE nesting which makes little sense (IE seems to nest only
some of the frames). | 232 // Ignore IE nesting which makes little sense (IE seems to nest only
some of the frames). |
| 233 RefPtr<MHTMLArchive> subframeArchive = parseArchiveWithHeader(resour
ceHeader.get()); | 233 RefPtr<MHTMLArchive> subframeArchive = parseArchiveWithHeader(resour
ceHeader.get()); |
| 234 if (!subframeArchive) { | 234 if (!subframeArchive) { |
| 235 WTF_LOG_ERROR("Failed to parse MHTML subframe."); | 235 WTF_LOG_ERROR("Failed to parse MHTML subframe."); |
| 236 return 0; | 236 return nullptr; |
| 237 } | 237 } |
| 238 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he
ader->endOfPartBoundary()); | 238 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he
ader->endOfPartBoundary()); |
| 239 ASSERT_UNUSED(endOfPartReached, endOfPartReached); | 239 ASSERT_UNUSED(endOfPartReached, endOfPartReached); |
| 240 // The top-frame is the first frame found, regardless of the nesting
level. | 240 // The top-frame is the first frame found, regardless of the nesting
level. |
| 241 if (subframeArchive->mainResource()) | 241 if (subframeArchive->mainResource()) |
| 242 addResourceToArchive(subframeArchive->mainResource(), archive.ge
t()); | 242 addResourceToArchive(subframeArchive->mainResource(), archive.ge
t()); |
| 243 archive->addSubframeArchive(subframeArchive); | 243 archive->addSubframeArchive(subframeArchive); |
| 244 continue; | 244 continue; |
| 245 } | 245 } |
| 246 | 246 |
| 247 RefPtr<ArchiveResource> resource = parseNextPart(*resourceHeader, header
->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive); | 247 RefPtr<ArchiveResource> resource = parseNextPart(*resourceHeader, header
->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive); |
| 248 if (!resource) { | 248 if (!resource) { |
| 249 WTF_LOG_ERROR("Failed to parse MHTML part."); | 249 WTF_LOG_ERROR("Failed to parse MHTML part."); |
| 250 return 0; | 250 return nullptr; |
| 251 } | 251 } |
| 252 addResourceToArchive(resource.get(), archive.get()); | 252 addResourceToArchive(resource.get(), archive.get()); |
| 253 } | 253 } |
| 254 | 254 |
| 255 return archive.release(); | 255 return archive.release(); |
| 256 } | 256 } |
| 257 | 257 |
| 258 void MHTMLParser::addResourceToArchive(ArchiveResource* resource, MHTMLArchive*
archive) | 258 void MHTMLParser::addResourceToArchive(ArchiveResource* resource, MHTMLArchive*
archive) |
| 259 { | 259 { |
| 260 const AtomicString& mimeType = resource->mimeType(); | 260 const AtomicString& mimeType = resource->mimeType(); |
| (...skipping 22 matching lines...) Expand all Loading... |
| 283 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc
oding(); | 283 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc
oding(); |
| 284 if (contentTransferEncoding == MIMEHeader::Unknown) | 284 if (contentTransferEncoding == MIMEHeader::Unknown) |
| 285 contentTransferEncoding = MIMEHeader::Binary; | 285 contentTransferEncoding = MIMEHeader::Binary; |
| 286 | 286 |
| 287 RefPtr<SharedBuffer> content = SharedBuffer::create(); | 287 RefPtr<SharedBuffer> content = SharedBuffer::create(); |
| 288 const bool checkBoundary = !endOfPartBoundary.isEmpty(); | 288 const bool checkBoundary = !endOfPartBoundary.isEmpty(); |
| 289 bool endOfPartReached = false; | 289 bool endOfPartReached = false; |
| 290 if (contentTransferEncoding == MIMEHeader::Binary) { | 290 if (contentTransferEncoding == MIMEHeader::Binary) { |
| 291 if (!checkBoundary) { | 291 if (!checkBoundary) { |
| 292 WTF_LOG_ERROR("Binary contents requires end of part"); | 292 WTF_LOG_ERROR("Binary contents requires end of part"); |
| 293 return 0; | 293 return nullptr; |
| 294 } | 294 } |
| 295 m_lineReader.setSeparator(endOfPartBoundary.utf8().data()); | 295 m_lineReader.setSeparator(endOfPartBoundary.utf8().data()); |
| 296 Vector<char> part; | 296 Vector<char> part; |
| 297 if (!m_lineReader.nextChunk(part)) { | 297 if (!m_lineReader.nextChunk(part)) { |
| 298 WTF_LOG_ERROR("Binary contents requires end of part"); | 298 WTF_LOG_ERROR("Binary contents requires end of part"); |
| 299 return 0; | 299 return nullptr; |
| 300 } | 300 } |
| 301 content->append(part); | 301 content->append(part); |
| 302 m_lineReader.setSeparator("\r\n"); | 302 m_lineReader.setSeparator("\r\n"); |
| 303 Vector<char> nextChars; | 303 Vector<char> nextChars; |
| 304 if (m_lineReader.peek(nextChars, 2) != 2) { | 304 if (m_lineReader.peek(nextChars, 2) != 2) { |
| 305 WTF_LOG_ERROR("Invalid seperator."); | 305 WTF_LOG_ERROR("Invalid seperator."); |
| 306 return 0; | 306 return nullptr; |
| 307 } | 307 } |
| 308 endOfPartReached = true; | 308 endOfPartReached = true; |
| 309 ASSERT(nextChars.size() == 2); | 309 ASSERT(nextChars.size() == 2); |
| 310 endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-'); | 310 endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-'); |
| 311 if (!endOfArchiveReached) { | 311 if (!endOfArchiveReached) { |
| 312 String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()
; | 312 String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()
; |
| 313 if (!line.isEmpty()) { | 313 if (!line.isEmpty()) { |
| 314 WTF_LOG_ERROR("No CRLF at end of binary section."); | 314 WTF_LOG_ERROR("No CRLF at end of binary section."); |
| 315 return 0; | 315 return nullptr; |
| 316 } | 316 } |
| 317 } | 317 } |
| 318 } else { | 318 } else { |
| 319 String line; | 319 String line; |
| 320 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).
isNull()) { | 320 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).
isNull()) { |
| 321 endOfArchiveReached = (line == endOfDocumentBoundary); | 321 endOfArchiveReached = (line == endOfDocumentBoundary); |
| 322 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReach
ed)) { | 322 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReach
ed)) { |
| 323 endOfPartReached = true; | 323 endOfPartReached = true; |
| 324 break; | 324 break; |
| 325 } | 325 } |
| 326 // Note that we use line.utf8() and not line.ascii() as ascii turns
special characters (such as tab, line-feed...) into '?'. | 326 // Note that we use line.utf8() and not line.ascii() as ascii turns
special characters (such as tab, line-feed...) into '?'. |
| 327 content->append(line.utf8().data(), line.length()); | 327 content->append(line.utf8().data(), line.length()); |
| 328 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { | 328 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { |
| 329 // The line reader removes the \r\n, but we need them for the co
ntent in this case as the QuotedPrintable decoder expects CR-LF terminated lines
. | 329 // The line reader removes the \r\n, but we need them for the co
ntent in this case as the QuotedPrintable decoder expects CR-LF terminated lines
. |
| 330 content->append("\r\n", 2); | 330 content->append("\r\n", 2); |
| 331 } | 331 } |
| 332 } | 332 } |
| 333 } | 333 } |
| 334 if (!endOfPartReached && checkBoundary) { | 334 if (!endOfPartReached && checkBoundary) { |
| 335 WTF_LOG_ERROR("No bounday found for MHTML part."); | 335 WTF_LOG_ERROR("No bounday found for MHTML part."); |
| 336 return 0; | 336 return nullptr; |
| 337 } | 337 } |
| 338 | 338 |
| 339 Vector<char> data; | 339 Vector<char> data; |
| 340 switch (contentTransferEncoding) { | 340 switch (contentTransferEncoding) { |
| 341 case MIMEHeader::Base64: | 341 case MIMEHeader::Base64: |
| 342 if (!base64Decode(content->data(), content->size(), data)) { | 342 if (!base64Decode(content->data(), content->size(), data)) { |
| 343 WTF_LOG_ERROR("Invalid base64 content for MHTML part."); | 343 WTF_LOG_ERROR("Invalid base64 content for MHTML part."); |
| 344 return 0; | 344 return nullptr; |
| 345 } | 345 } |
| 346 break; | 346 break; |
| 347 case MIMEHeader::QuotedPrintable: | 347 case MIMEHeader::QuotedPrintable: |
| 348 quotedPrintableDecode(content->data(), content->size(), data); | 348 quotedPrintableDecode(content->data(), content->size(), data); |
| 349 break; | 349 break; |
| 350 case MIMEHeader::EightBit: | 350 case MIMEHeader::EightBit: |
| 351 case MIMEHeader::SevenBit: | 351 case MIMEHeader::SevenBit: |
| 352 case MIMEHeader::Binary: | 352 case MIMEHeader::Binary: |
| 353 data.append(content->data(), content->size()); | 353 data.append(content->data(), content->size()); |
| 354 break; | 354 break; |
| 355 default: | 355 default: |
| 356 WTF_LOG_ERROR("Invalid encoding for MHTML part."); | 356 WTF_LOG_ERROR("Invalid encoding for MHTML part."); |
| 357 return 0; | 357 return nullptr; |
| 358 } | 358 } |
| 359 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); | 359 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); |
| 360 // FIXME: the URL in the MIME header could be relative, we should resolve it
if it is. | 360 // FIXME: the URL in the MIME header could be relative, we should resolve it
if it is. |
| 361 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rf
c2557#section-5 | 361 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rf
c2557#section-5 |
| 362 // IE and Firefox (UNMht) seem to generate only absolute URLs. | 362 // IE and Firefox (UNMht) seem to generate only absolute URLs. |
| 363 KURL location = KURL(KURL(), mimeHeader.contentLocation()); | 363 KURL location = KURL(KURL(), mimeHeader.contentLocation()); |
| 364 return ArchiveResource::create(contentBuffer, location, AtomicString(mimeHea
der.contentType()), AtomicString(mimeHeader.charset()), String()); | 364 return ArchiveResource::create(contentBuffer, location, AtomicString(mimeHea
der.contentType()), AtomicString(mimeHeader.charset()), String()); |
| 365 } | 365 } |
| 366 | 366 |
| 367 size_t MHTMLParser::frameCount() const | 367 size_t MHTMLParser::frameCount() const |
| (...skipping 10 matching lines...) Expand all Loading... |
| 378 { | 378 { |
| 379 return m_resources.size(); | 379 return m_resources.size(); |
| 380 } | 380 } |
| 381 | 381 |
| 382 ArchiveResource* MHTMLParser::subResourceAt(size_t index) const | 382 ArchiveResource* MHTMLParser::subResourceAt(size_t index) const |
| 383 { | 383 { |
| 384 return m_resources[index].get(); | 384 return m_resources[index].get(); |
| 385 } | 385 } |
| 386 | 386 |
| 387 } | 387 } |
| OLD | NEW |