| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 96 static KeyValueMap retrieveKeyValuePairs(SharedBufferChunkReader* buffer) | 96 static KeyValueMap retrieveKeyValuePairs(SharedBufferChunkReader* buffer) |
| 97 { | 97 { |
| 98 KeyValueMap keyValuePairs; | 98 KeyValueMap keyValuePairs; |
| 99 String line; | 99 String line; |
| 100 String key; | 100 String key; |
| 101 StringBuilder value; | 101 StringBuilder value; |
| 102 while (!(line = buffer->nextChunkAsUTF8StringWithLatin1Fallback()).isNull())
{ | 102 while (!(line = buffer->nextChunkAsUTF8StringWithLatin1Fallback()).isNull())
{ |
| 103 if (line.isEmpty()) | 103 if (line.isEmpty()) |
| 104 break; // Empty line means end of key/value section. | 104 break; // Empty line means end of key/value section. |
| 105 if (line[0] == '\t') { | 105 if (line[0] == '\t') { |
| 106 ASSERT(!key.isEmpty()); | |
| 107 value.append(line.substring(1)); | 106 value.append(line.substring(1)); |
| 108 continue; | 107 continue; |
| 109 } | 108 } |
| 110 // New key/value, store the previous one if any. | 109 // New key/value, store the previous one if any. |
| 111 if (!key.isEmpty()) { | 110 if (!key.isEmpty()) { |
| 112 if (keyValuePairs.find(key) != keyValuePairs.end()) | 111 if (keyValuePairs.find(key) != keyValuePairs.end()) |
| 113 DLOG(ERROR) << "Key duplicate found in MIME header. Key is '" <<
key << "', previous value replaced."; | 112 DVLOG(1) << "Key duplicate found in MIME header. Key is '" << ke
y << "', previous value replaced."; |
| 114 keyValuePairs.add(key, value.toString().stripWhiteSpace()); | 113 keyValuePairs.add(key, value.toString().stripWhiteSpace()); |
| 115 key = String(); | 114 key = String(); |
| 116 value.clear(); | 115 value.clear(); |
| 117 } | 116 } |
| 118 size_t semiColonIndex = line.find(':'); | 117 size_t semiColonIndex = line.find(':'); |
| 119 if (semiColonIndex == kNotFound) { | 118 if (semiColonIndex == kNotFound) { |
| 120 // This is not a key value pair, ignore. | 119 // This is not a key value pair, ignore. |
| 121 continue; | 120 continue; |
| 122 } | 121 } |
| 123 key = line.substring(0, semiColonIndex).lower().stripWhiteSpace(); | 122 key = line.substring(0, semiColonIndex).lower().stripWhiteSpace(); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 136 KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-t
ype"); | 135 KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-t
ype"); |
| 137 if (mimeParametersIterator != keyValuePairs.end()) { | 136 if (mimeParametersIterator != keyValuePairs.end()) { |
| 138 ParsedContentType parsedContentType(mimeParametersIterator->value); | 137 ParsedContentType parsedContentType(mimeParametersIterator->value); |
| 139 mimeHeader->m_contentType = parsedContentType.mimeType(); | 138 mimeHeader->m_contentType = parsedContentType.mimeType(); |
| 140 if (!mimeHeader->isMultipart()) { | 139 if (!mimeHeader->isMultipart()) { |
| 141 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace(
); | 140 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace(
); |
| 142 } else { | 141 } else { |
| 143 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam
e("type"); | 142 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam
e("type"); |
| 144 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo
rName("boundary"); | 143 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo
rName("boundary"); |
| 145 if (mimeHeader->m_endOfPartBoundary.isNull()) { | 144 if (mimeHeader->m_endOfPartBoundary.isNull()) { |
| 146 DLOG(ERROR) << "No boundary found in multipart MIME header."; | 145 DVLOG(1) << "No boundary found in multipart MIME header."; |
| 147 return nullptr; | 146 return nullptr; |
| 148 } | 147 } |
| 149 mimeHeader->m_endOfPartBoundary.insert("--", 0); | 148 mimeHeader->m_endOfPartBoundary.insert("--", 0); |
| 150 mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundar
y; | 149 mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundar
y; |
| 151 mimeHeader->m_endOfDocumentBoundary.append("--"); | 150 mimeHeader->m_endOfDocumentBoundary.append("--"); |
| 152 } | 151 } |
| 153 } | 152 } |
| 154 | 153 |
| 155 mimeParametersIterator = keyValuePairs.find("content-transfer-encoding"); | 154 mimeParametersIterator = keyValuePairs.find("content-transfer-encoding"); |
| 156 if (mimeParametersIterator != keyValuePairs.end()) | 155 if (mimeParametersIterator != keyValuePairs.end()) |
| (...skipping 17 matching lines...) Expand all Loading... |
| 174 if (encoding == "base64") | 173 if (encoding == "base64") |
| 175 return Base64; | 174 return Base64; |
| 176 if (encoding == "quoted-printable") | 175 if (encoding == "quoted-printable") |
| 177 return QuotedPrintable; | 176 return QuotedPrintable; |
| 178 if (encoding == "8bit") | 177 if (encoding == "8bit") |
| 179 return EightBit; | 178 return EightBit; |
| 180 if (encoding == "7bit") | 179 if (encoding == "7bit") |
| 181 return SevenBit; | 180 return SevenBit; |
| 182 if (encoding == "binary") | 181 if (encoding == "binary") |
| 183 return Binary; | 182 return Binary; |
| 184 DLOG(ERROR) << "Unknown encoding '" << text << "' found in MIME header."; | 183 DVLOG(1) << "Unknown encoding '" << text << "' found in MIME header."; |
| 185 return Unknown; | 184 return Unknown; |
| 186 } | 185 } |
| 187 | 186 |
| 188 MIMEHeader::MIMEHeader() | 187 MIMEHeader::MIMEHeader() |
| 189 : m_contentTransferEncoding(Unknown) | 188 : m_contentTransferEncoding(Unknown) |
| 190 { | 189 { |
| 191 } | 190 } |
| 192 | 191 |
| 193 static bool skipLinesUntilBoundaryFound(SharedBufferChunkReader& lineReader, con
st String& boundary) | 192 static bool skipLinesUntilBoundaryFound(SharedBufferChunkReader& lineReader, con
st String& boundary) |
| 194 { | 193 { |
| (...skipping 15 matching lines...) Expand all Loading... |
| 210 MIMEHeader* header = MIMEHeader::parseHeader(&m_lineReader); | 209 MIMEHeader* header = MIMEHeader::parseHeader(&m_lineReader); |
| 211 HeapVector<Member<ArchiveResource>> resources; | 210 HeapVector<Member<ArchiveResource>> resources; |
| 212 if (!parseArchiveWithHeader(header, resources)) | 211 if (!parseArchiveWithHeader(header, resources)) |
| 213 resources.clear(); | 212 resources.clear(); |
| 214 return resources; | 213 return resources; |
| 215 } | 214 } |
| 216 | 215 |
| 217 bool MHTMLParser::parseArchiveWithHeader(MIMEHeader* header, HeapVector<Member<A
rchiveResource>>& resources) | 216 bool MHTMLParser::parseArchiveWithHeader(MIMEHeader* header, HeapVector<Member<A
rchiveResource>>& resources) |
| 218 { | 217 { |
| 219 if (!header) { | 218 if (!header) { |
| 220 DLOG(ERROR) << "Failed to parse MHTML part: no header."; | 219 DVLOG(1) << "Failed to parse MHTML part: no header."; |
| 221 return false; | 220 return false; |
| 222 } | 221 } |
| 223 | 222 |
| 224 if (!header->isMultipart()) { | 223 if (!header->isMultipart()) { |
| 225 // With IE a page with no resource is not multi-part. | 224 // With IE a page with no resource is not multi-part. |
| 226 bool endOfArchiveReached = false; | 225 bool endOfArchiveReached = false; |
| 227 ArchiveResource* resource = parseNextPart(*header, String(), String(), e
ndOfArchiveReached); | 226 ArchiveResource* resource = parseNextPart(*header, String(), String(), e
ndOfArchiveReached); |
| 228 if (!resource) | 227 if (!resource) |
| 229 return false; | 228 return false; |
| 230 resources.append(resource); | 229 resources.append(resource); |
| 231 return true; | 230 return true; |
| 232 } | 231 } |
| 233 | 232 |
| 234 // Skip the message content (it's a generic browser specific message). | 233 // Skip the message content (it's a generic browser specific message). |
| 235 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); | 234 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); |
| 236 | 235 |
| 237 bool endOfArchive = false; | 236 bool endOfArchive = false; |
| 238 while (!endOfArchive) { | 237 while (!endOfArchive) { |
| 239 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader); | 238 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader); |
| 240 if (!resourceHeader) { | 239 if (!resourceHeader) { |
| 241 DLOG(ERROR) << "Failed to parse MHTML, invalid MIME header."; | 240 DVLOG(1) << "Failed to parse MHTML, invalid MIME header."; |
| 242 return false; | 241 return false; |
| 243 } | 242 } |
| 244 if (resourceHeader->contentType() == "multipart/alternative") { | 243 if (resourceHeader->contentType() == "multipart/alternative") { |
| 245 // Ignore IE nesting which makes little sense (IE seems to nest only
some of the frames). | 244 // Ignore IE nesting which makes little sense (IE seems to nest only
some of the frames). |
| 246 if (!parseArchiveWithHeader(resourceHeader, resources)) { | 245 if (!parseArchiveWithHeader(resourceHeader, resources)) { |
| 247 DLOG(ERROR) << "Failed to parse MHTML subframe."; | 246 DVLOG(1) << "Failed to parse MHTML subframe."; |
| 248 return false; | 247 return false; |
| 249 } | 248 } |
| 250 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he
ader->endOfPartBoundary()); | 249 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary(
)); |
| 251 ASSERT_UNUSED(endOfPartReached, endOfPartReached); | |
| 252 continue; | 250 continue; |
| 253 } | 251 } |
| 254 | 252 |
| 255 ArchiveResource* resource = parseNextPart(*resourceHeader, header->endOf
PartBoundary(), header->endOfDocumentBoundary(), endOfArchive); | 253 ArchiveResource* resource = parseNextPart(*resourceHeader, header->endOf
PartBoundary(), header->endOfDocumentBoundary(), endOfArchive); |
| 256 if (!resource) { | 254 if (!resource) { |
| 257 DLOG(ERROR) << "Failed to parse MHTML part."; | 255 DVLOG(1) << "Failed to parse MHTML part."; |
| 258 return false; | 256 return false; |
| 259 } | 257 } |
| 260 resources.append(resource); | 258 resources.append(resource); |
| 261 } | 259 } |
| 262 return true; | 260 return true; |
| 263 } | 261 } |
| 264 | 262 |
| 265 | 263 |
| 266 ArchiveResource* MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const
String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchi
veReached) | 264 ArchiveResource* MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const
String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchi
veReached) |
| 267 { | 265 { |
| 268 ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty()); | 266 ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty()); |
| 269 | 267 |
| 270 // If no content transfer encoding is specified, default to binary encoding. | 268 // If no content transfer encoding is specified, default to binary encoding. |
| 271 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc
oding(); | 269 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc
oding(); |
| 272 if (contentTransferEncoding == MIMEHeader::Unknown) | 270 if (contentTransferEncoding == MIMEHeader::Unknown) |
| 273 contentTransferEncoding = MIMEHeader::Binary; | 271 contentTransferEncoding = MIMEHeader::Binary; |
| 274 | 272 |
| 275 RefPtr<SharedBuffer> content = SharedBuffer::create(); | 273 RefPtr<SharedBuffer> content = SharedBuffer::create(); |
| 276 const bool checkBoundary = !endOfPartBoundary.isEmpty(); | 274 const bool checkBoundary = !endOfPartBoundary.isEmpty(); |
| 277 bool endOfPartReached = false; | 275 bool endOfPartReached = false; |
| 278 if (contentTransferEncoding == MIMEHeader::Binary) { | 276 if (contentTransferEncoding == MIMEHeader::Binary) { |
| 279 if (!checkBoundary) { | 277 if (!checkBoundary) { |
| 280 DLOG(ERROR) << "Binary contents requires end of part"; | 278 DVLOG(1) << "Binary contents requires end of part"; |
| 281 return nullptr; | 279 return nullptr; |
| 282 } | 280 } |
| 283 m_lineReader.setSeparator(endOfPartBoundary.utf8().data()); | 281 m_lineReader.setSeparator(endOfPartBoundary.utf8().data()); |
| 284 Vector<char> part; | 282 Vector<char> part; |
| 285 if (!m_lineReader.nextChunk(part)) { | 283 if (!m_lineReader.nextChunk(part)) { |
| 286 DLOG(ERROR) << "Binary contents requires end of part"; | 284 DVLOG(1) << "Binary contents requires end of part"; |
| 287 return nullptr; | 285 return nullptr; |
| 288 } | 286 } |
| 289 content->append(part); | 287 content->append(part); |
| 290 m_lineReader.setSeparator("\r\n"); | 288 m_lineReader.setSeparator("\r\n"); |
| 291 Vector<char> nextChars; | 289 Vector<char> nextChars; |
| 292 if (m_lineReader.peek(nextChars, 2) != 2) { | 290 if (m_lineReader.peek(nextChars, 2) != 2) { |
| 293 DLOG(ERROR) << "Invalid seperator."; | 291 DVLOG(1) << "Invalid seperator."; |
| 294 return nullptr; | 292 return nullptr; |
| 295 } | 293 } |
| 296 endOfPartReached = true; | 294 endOfPartReached = true; |
| 297 ASSERT(nextChars.size() == 2); | 295 ASSERT(nextChars.size() == 2); |
| 298 endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-'); | 296 endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-'); |
| 299 if (!endOfArchiveReached) { | 297 if (!endOfArchiveReached) { |
| 300 String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()
; | 298 String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()
; |
| 301 if (!line.isEmpty()) { | 299 if (!line.isEmpty()) { |
| 302 DLOG(ERROR) << "No CRLF at end of binary section."; | 300 DVLOG(1) << "No CRLF at end of binary section."; |
| 303 return nullptr; | 301 return nullptr; |
| 304 } | 302 } |
| 305 } | 303 } |
| 306 } else { | 304 } else { |
| 307 String line; | 305 String line; |
| 308 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).
isNull()) { | 306 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).
isNull()) { |
| 309 endOfArchiveReached = (line == endOfDocumentBoundary); | 307 endOfArchiveReached = (line == endOfDocumentBoundary); |
| 310 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReach
ed)) { | 308 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReach
ed)) { |
| 311 endOfPartReached = true; | 309 endOfPartReached = true; |
| 312 break; | 310 break; |
| 313 } | 311 } |
| 314 // Note that we use line.utf8() and not line.ascii() as ascii turns
special characters (such as tab, line-feed...) into '?'. | 312 // Note that we use line.utf8() and not line.ascii() as ascii turns
special characters (such as tab, line-feed...) into '?'. |
| 315 content->append(line.utf8().data(), line.length()); | 313 content->append(line.utf8().data(), line.length()); |
| 316 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { | 314 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { |
| 317 // The line reader removes the \r\n, but we need them for the co
ntent in this case as the QuotedPrintable decoder expects CR-LF terminated lines
. | 315 // The line reader removes the \r\n, but we need them for the co
ntent in this case as the QuotedPrintable decoder expects CR-LF terminated lines
. |
| 318 content->append("\r\n", 2u); | 316 content->append("\r\n", 2u); |
| 319 } | 317 } |
| 320 } | 318 } |
| 321 } | 319 } |
| 322 if (!endOfPartReached && checkBoundary) { | 320 if (!endOfPartReached && checkBoundary) { |
| 323 DLOG(ERROR) << "No bounday found for MHTML part."; | 321 DVLOG(1) << "No boundary found for MHTML part."; |
| 324 return nullptr; | 322 return nullptr; |
| 325 } | 323 } |
| 326 | 324 |
| 327 Vector<char> data; | 325 Vector<char> data; |
| 328 switch (contentTransferEncoding) { | 326 switch (contentTransferEncoding) { |
| 329 case MIMEHeader::Base64: | 327 case MIMEHeader::Base64: |
| 330 if (!base64Decode(content->data(), content->size(), data)) { | 328 if (!base64Decode(content->data(), content->size(), data)) { |
| 331 DLOG(ERROR) << "Invalid base64 content for MHTML part."; | 329 DVLOG(1) << "Invalid base64 content for MHTML part."; |
| 332 return nullptr; | 330 return nullptr; |
| 333 } | 331 } |
| 334 break; | 332 break; |
| 335 case MIMEHeader::QuotedPrintable: | 333 case MIMEHeader::QuotedPrintable: |
| 336 quotedPrintableDecode(content->data(), content->size(), data); | 334 quotedPrintableDecode(content->data(), content->size(), data); |
| 337 break; | 335 break; |
| 338 case MIMEHeader::EightBit: | 336 case MIMEHeader::EightBit: |
| 339 case MIMEHeader::SevenBit: | 337 case MIMEHeader::SevenBit: |
| 340 case MIMEHeader::Binary: | 338 case MIMEHeader::Binary: |
| 341 data.append(content->data(), content->size()); | 339 data.append(content->data(), content->size()); |
| 342 break; | 340 break; |
| 343 default: | 341 default: |
| 344 DLOG(ERROR) << "Invalid encoding for MHTML part."; | 342 DVLOG(1) << "Invalid encoding for MHTML part."; |
| 345 return nullptr; | 343 return nullptr; |
| 346 } | 344 } |
| 347 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); | 345 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); |
| 348 // FIXME: the URL in the MIME header could be relative, we should resolve it
if it is. | 346 // FIXME: the URL in the MIME header could be relative, we should resolve it
if it is. |
| 349 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rf
c2557#section-5 | 347 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rf
c2557#section-5 |
| 350 // IE and Firefox (UNMht) seem to generate only absolute URLs. | 348 // IE and Firefox (UNMht) seem to generate only absolute URLs. |
| 351 KURL location = KURL(KURL(), mimeHeader.contentLocation()); | 349 KURL location = KURL(KURL(), mimeHeader.contentLocation()); |
| 352 return ArchiveResource::create( | 350 return ArchiveResource::create( |
| 353 contentBuffer, location, mimeHeader.contentID(), AtomicString(mimeHeader
.contentType()), AtomicString(mimeHeader.charset())); | 351 contentBuffer, location, mimeHeader.contentID(), AtomicString(mimeHeader
.contentType()), AtomicString(mimeHeader.charset())); |
| 354 } | 352 } |
| (...skipping 14 matching lines...) Expand all Loading... |
| 369 if (!contentID.startsWith('<') || !contentID.endsWith('>')) | 367 if (!contentID.startsWith('<') || !contentID.endsWith('>')) |
| 370 return KURL(); | 368 return KURL(); |
| 371 | 369 |
| 372 StringBuilder uriBuilder; | 370 StringBuilder uriBuilder; |
| 373 uriBuilder.append("cid:"); | 371 uriBuilder.append("cid:"); |
| 374 uriBuilder.append(contentID, 1, contentID.length() - 2); | 372 uriBuilder.append(contentID, 1, contentID.length() - 2); |
| 375 return KURL(KURL(), uriBuilder.toString()); | 373 return KURL(KURL(), uriBuilder.toString()); |
| 376 } | 374 } |
| 377 | 375 |
| 378 } // namespace blink | 376 } // namespace blink |
| OLD | NEW |