| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 24 matching lines...) Expand all Loading... |
| 35 #include "platform/text/QuotedPrintable.h" | 35 #include "platform/text/QuotedPrintable.h" |
| 36 #include "wtf/HashMap.h" | 36 #include "wtf/HashMap.h" |
| 37 #include "wtf/text/Base64.h" | 37 #include "wtf/text/Base64.h" |
| 38 #include "wtf/text/StringBuilder.h" | 38 #include "wtf/text/StringBuilder.h" |
| 39 #include "wtf/text/StringConcatenate.h" | 39 #include "wtf/text/StringConcatenate.h" |
| 40 #include "wtf/text/StringHash.h" | 40 #include "wtf/text/StringHash.h" |
| 41 #include "wtf/text/WTFString.h" | 41 #include "wtf/text/WTFString.h" |
| 42 | 42 |
| 43 namespace blink { | 43 namespace blink { |
| 44 | 44 |
| 45 // This class is a limited MIME parser used to parse the MIME headers of MHTML f
iles. | 45 // This class is a limited MIME parser used to parse the MIME headers of MHTML |
| 46 // files. |
| 46 class MIMEHeader : public GarbageCollectedFinalized<MIMEHeader> { | 47 class MIMEHeader : public GarbageCollectedFinalized<MIMEHeader> { |
| 47 public: | 48 public: |
| 48 static MIMEHeader* create() { return new MIMEHeader; } | 49 static MIMEHeader* create() { return new MIMEHeader; } |
| 49 | 50 |
| 50 enum Encoding { | 51 enum Encoding { |
| 51 QuotedPrintable, | 52 QuotedPrintable, |
| 52 Base64, | 53 Base64, |
| 53 EightBit, | 54 EightBit, |
| 54 SevenBit, | 55 SevenBit, |
| 55 Binary, | 56 Binary, |
| (...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 233 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); | 234 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); |
| 234 | 235 |
| 235 bool endOfArchive = false; | 236 bool endOfArchive = false; |
| 236 while (!endOfArchive) { | 237 while (!endOfArchive) { |
| 237 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader); | 238 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader); |
| 238 if (!resourceHeader) { | 239 if (!resourceHeader) { |
| 239 DVLOG(1) << "Failed to parse MHTML, invalid MIME header."; | 240 DVLOG(1) << "Failed to parse MHTML, invalid MIME header."; |
| 240 return false; | 241 return false; |
| 241 } | 242 } |
| 242 if (resourceHeader->contentType() == "multipart/alternative") { | 243 if (resourceHeader->contentType() == "multipart/alternative") { |
| 243 // Ignore IE nesting which makes little sense (IE seems to nest only some
of the frames). | 244 // Ignore IE nesting which makes little sense (IE seems to nest only some |
| 245 // of the frames). |
| 244 if (!parseArchiveWithHeader(resourceHeader, resources)) { | 246 if (!parseArchiveWithHeader(resourceHeader, resources)) { |
| 245 DVLOG(1) << "Failed to parse MHTML subframe."; | 247 DVLOG(1) << "Failed to parse MHTML subframe."; |
| 246 return false; | 248 return false; |
| 247 } | 249 } |
| 248 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); | 250 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); |
| 249 continue; | 251 continue; |
| 250 } | 252 } |
| 251 | 253 |
| 252 ArchiveResource* resource = | 254 ArchiveResource* resource = |
| 253 parseNextPart(*resourceHeader, header->endOfPartBoundary(), | 255 parseNextPart(*resourceHeader, header->endOfPartBoundary(), |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 306 } | 308 } |
| 307 } else { | 309 } else { |
| 308 String line; | 310 String line; |
| 309 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()) | 311 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()) |
| 310 .isNull()) { | 312 .isNull()) { |
| 311 endOfArchiveReached = (line == endOfDocumentBoundary); | 313 endOfArchiveReached = (line == endOfDocumentBoundary); |
| 312 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReached)) { | 314 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReached)) { |
| 313 endOfPartReached = true; | 315 endOfPartReached = true; |
| 314 break; | 316 break; |
| 315 } | 317 } |
| 316 // Note that we use line.utf8() and not line.ascii() as ascii turns specia
l characters (such as tab, line-feed...) into '?'. | 318 // Note that we use line.utf8() and not line.ascii() as ascii turns |
| 319 // special characters (such as tab, line-feed...) into '?'. |
| 317 content->append(line.utf8().data(), line.length()); | 320 content->append(line.utf8().data(), line.length()); |
| 318 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { | 321 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { |
| 319 // The line reader removes the \r\n, but we need them for the content in
this case as the QuotedPrintable decoder expects CR-LF terminated lines. | 322 // The line reader removes the \r\n, but we need them for the content in |
| 323 // this case as the QuotedPrintable decoder expects CR-LF terminated |
| 324 // lines. |
| 320 content->append("\r\n", 2u); | 325 content->append("\r\n", 2u); |
| 321 } | 326 } |
| 322 } | 327 } |
| 323 } | 328 } |
| 324 if (!endOfPartReached && checkBoundary) { | 329 if (!endOfPartReached && checkBoundary) { |
| 325 DVLOG(1) << "No boundary found for MHTML part."; | 330 DVLOG(1) << "No boundary found for MHTML part."; |
| 326 return nullptr; | 331 return nullptr; |
| 327 } | 332 } |
| 328 | 333 |
| 329 Vector<char> data; | 334 Vector<char> data; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 340 case MIMEHeader::EightBit: | 345 case MIMEHeader::EightBit: |
| 341 case MIMEHeader::SevenBit: | 346 case MIMEHeader::SevenBit: |
| 342 case MIMEHeader::Binary: | 347 case MIMEHeader::Binary: |
| 343 data.append(content->data(), content->size()); | 348 data.append(content->data(), content->size()); |
| 344 break; | 349 break; |
| 345 default: | 350 default: |
| 346 DVLOG(1) << "Invalid encoding for MHTML part."; | 351 DVLOG(1) << "Invalid encoding for MHTML part."; |
| 347 return nullptr; | 352 return nullptr; |
| 348 } | 353 } |
| 349 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); | 354 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); |
| 350 // FIXME: the URL in the MIME header could be relative, we should resolve it i
f it is. | 355 // FIXME: the URL in the MIME header could be relative, we should resolve it |
| 351 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rfc2
557#section-5 | 356 // if it is. The specs mentions 5 ways to resolve a URL: |
| 357 // http://tools.ietf.org/html/rfc2557#section-5 |
| 352 // IE and Firefox (UNMht) seem to generate only absolute URLs. | 358 // IE and Firefox (UNMht) seem to generate only absolute URLs. |
| 353 KURL location = KURL(KURL(), mimeHeader.contentLocation()); | 359 KURL location = KURL(KURL(), mimeHeader.contentLocation()); |
| 354 return ArchiveResource::create(contentBuffer, location, | 360 return ArchiveResource::create(contentBuffer, location, |
| 355 mimeHeader.contentID(), | 361 mimeHeader.contentID(), |
| 356 AtomicString(mimeHeader.contentType()), | 362 AtomicString(mimeHeader.contentType()), |
| 357 AtomicString(mimeHeader.charset())); | 363 AtomicString(mimeHeader.charset())); |
| 358 } | 364 } |
| 359 | 365 |
| 360 // static | 366 // static |
| 361 KURL MHTMLParser::convertContentIDToURI(const String& contentID) { | 367 KURL MHTMLParser::convertContentIDToURI(const String& contentID) { |
| 362 // This function is based primarily on an example from rfc2557 in section | 368 // This function is based primarily on an example from rfc2557 in section |
| 363 // 9.5, but also based on more normative parts of specs like: | 369 // 9.5, but also based on more normative parts of specs like: |
| 364 // - rfc2557 - MHTML - section 8.3 - "Use of the Content-ID header and CID URL
s" | 370 // - rfc2557 - MHTML - section 8.3 - "Use of the Content-ID header and CID |
| 371 // URLs" |
| 365 // - rfc1738 - URL - section 4 (reserved scheme names; includes "cid") | 372 // - rfc1738 - URL - section 4 (reserved scheme names; includes "cid") |
| 366 // - rfc2387 - multipart/related - section 3.4 - "Syntax" (cid := msg-id) | 373 // - rfc2387 - multipart/related - section 3.4 - "Syntax" (cid := msg-id) |
| 367 // - rfc0822 - msg-id = "<" addr-spec ">"; addr-spec = local-part "@" domain | 374 // - rfc0822 - msg-id = "<" addr-spec ">"; addr-spec = local-part "@" domain |
| 368 | 375 |
| 369 if (contentID.length() <= 2) | 376 if (contentID.length() <= 2) |
| 370 return KURL(); | 377 return KURL(); |
| 371 | 378 |
| 372 if (!contentID.startsWith('<') || !contentID.endsWith('>')) | 379 if (!contentID.startsWith('<') || !contentID.endsWith('>')) |
| 373 return KURL(); | 380 return KURL(); |
| 374 | 381 |
| 375 StringBuilder uriBuilder; | 382 StringBuilder uriBuilder; |
| 376 uriBuilder.append("cid:"); | 383 uriBuilder.append("cid:"); |
| 377 uriBuilder.append(contentID, 1, contentID.length() - 2); | 384 uriBuilder.append(contentID, 1, contentID.length() - 2); |
| 378 return KURL(KURL(), uriBuilder.toString()); | 385 return KURL(KURL(), uriBuilder.toString()); |
| 379 } | 386 } |
| 380 | 387 |
| 381 } // namespace blink | 388 } // namespace blink |
| OLD | NEW |