OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 24 matching lines...) Expand all Loading... |
35 #include "platform/text/QuotedPrintable.h" | 35 #include "platform/text/QuotedPrintable.h" |
36 #include "wtf/HashMap.h" | 36 #include "wtf/HashMap.h" |
37 #include "wtf/text/Base64.h" | 37 #include "wtf/text/Base64.h" |
38 #include "wtf/text/StringBuilder.h" | 38 #include "wtf/text/StringBuilder.h" |
39 #include "wtf/text/StringConcatenate.h" | 39 #include "wtf/text/StringConcatenate.h" |
40 #include "wtf/text/StringHash.h" | 40 #include "wtf/text/StringHash.h" |
41 #include "wtf/text/WTFString.h" | 41 #include "wtf/text/WTFString.h" |
42 | 42 |
43 namespace blink { | 43 namespace blink { |
44 | 44 |
45 // This class is a limited MIME parser used to parse the MIME headers of MHTML f
iles. | 45 // This class is a limited MIME parser used to parse the MIME headers of MHTML |
| 46 // files. |
46 class MIMEHeader : public GarbageCollectedFinalized<MIMEHeader> { | 47 class MIMEHeader : public GarbageCollectedFinalized<MIMEHeader> { |
47 public: | 48 public: |
48 static MIMEHeader* create() { return new MIMEHeader; } | 49 static MIMEHeader* create() { return new MIMEHeader; } |
49 | 50 |
50 enum Encoding { | 51 enum Encoding { |
51 QuotedPrintable, | 52 QuotedPrintable, |
52 Base64, | 53 Base64, |
53 EightBit, | 54 EightBit, |
54 SevenBit, | 55 SevenBit, |
55 Binary, | 56 Binary, |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
233 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); | 234 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); |
234 | 235 |
235 bool endOfArchive = false; | 236 bool endOfArchive = false; |
236 while (!endOfArchive) { | 237 while (!endOfArchive) { |
237 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader); | 238 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader); |
238 if (!resourceHeader) { | 239 if (!resourceHeader) { |
239 DVLOG(1) << "Failed to parse MHTML, invalid MIME header."; | 240 DVLOG(1) << "Failed to parse MHTML, invalid MIME header."; |
240 return false; | 241 return false; |
241 } | 242 } |
242 if (resourceHeader->contentType() == "multipart/alternative") { | 243 if (resourceHeader->contentType() == "multipart/alternative") { |
243 // Ignore IE nesting which makes little sense (IE seems to nest only some
of the frames). | 244 // Ignore IE nesting which makes little sense (IE seems to nest only some |
| 245 // of the frames). |
244 if (!parseArchiveWithHeader(resourceHeader, resources)) { | 246 if (!parseArchiveWithHeader(resourceHeader, resources)) { |
245 DVLOG(1) << "Failed to parse MHTML subframe."; | 247 DVLOG(1) << "Failed to parse MHTML subframe."; |
246 return false; | 248 return false; |
247 } | 249 } |
248 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); | 250 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); |
249 continue; | 251 continue; |
250 } | 252 } |
251 | 253 |
252 ArchiveResource* resource = | 254 ArchiveResource* resource = |
253 parseNextPart(*resourceHeader, header->endOfPartBoundary(), | 255 parseNextPart(*resourceHeader, header->endOfPartBoundary(), |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
306 } | 308 } |
307 } else { | 309 } else { |
308 String line; | 310 String line; |
309 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()) | 311 while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()) |
310 .isNull()) { | 312 .isNull()) { |
311 endOfArchiveReached = (line == endOfDocumentBoundary); | 313 endOfArchiveReached = (line == endOfDocumentBoundary); |
312 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReached)) { | 314 if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReached)) { |
313 endOfPartReached = true; | 315 endOfPartReached = true; |
314 break; | 316 break; |
315 } | 317 } |
316 // Note that we use line.utf8() and not line.ascii() as ascii turns specia
l characters (such as tab, line-feed...) into '?'. | 318 // Note that we use line.utf8() and not line.ascii() as ascii turns |
| 319 // special characters (such as tab, line-feed...) into '?'. |
317 content->append(line.utf8().data(), line.length()); | 320 content->append(line.utf8().data(), line.length()); |
318 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { | 321 if (contentTransferEncoding == MIMEHeader::QuotedPrintable) { |
319 // The line reader removes the \r\n, but we need them for the content in
this case as the QuotedPrintable decoder expects CR-LF terminated lines. | 322 // The line reader removes the \r\n, but we need them for the content in |
| 323 // this case as the QuotedPrintable decoder expects CR-LF terminated |
| 324 // lines. |
320 content->append("\r\n", 2u); | 325 content->append("\r\n", 2u); |
321 } | 326 } |
322 } | 327 } |
323 } | 328 } |
324 if (!endOfPartReached && checkBoundary) { | 329 if (!endOfPartReached && checkBoundary) { |
325 DVLOG(1) << "No boundary found for MHTML part."; | 330 DVLOG(1) << "No boundary found for MHTML part."; |
326 return nullptr; | 331 return nullptr; |
327 } | 332 } |
328 | 333 |
329 Vector<char> data; | 334 Vector<char> data; |
(...skipping 10 matching lines...) Expand all Loading... |
340 case MIMEHeader::EightBit: | 345 case MIMEHeader::EightBit: |
341 case MIMEHeader::SevenBit: | 346 case MIMEHeader::SevenBit: |
342 case MIMEHeader::Binary: | 347 case MIMEHeader::Binary: |
343 data.append(content->data(), content->size()); | 348 data.append(content->data(), content->size()); |
344 break; | 349 break; |
345 default: | 350 default: |
346 DVLOG(1) << "Invalid encoding for MHTML part."; | 351 DVLOG(1) << "Invalid encoding for MHTML part."; |
347 return nullptr; | 352 return nullptr; |
348 } | 353 } |
349 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); | 354 RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); |
350 // FIXME: the URL in the MIME header could be relative, we should resolve it i
f it is. | 355 // FIXME: the URL in the MIME header could be relative, we should resolve it |
351 // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rfc2
557#section-5 | 356 // if it is. The specs mentions 5 ways to resolve a URL: |
| 357 // http://tools.ietf.org/html/rfc2557#section-5 |
352 // IE and Firefox (UNMht) seem to generate only absolute URLs. | 358 // IE and Firefox (UNMht) seem to generate only absolute URLs. |
353 KURL location = KURL(KURL(), mimeHeader.contentLocation()); | 359 KURL location = KURL(KURL(), mimeHeader.contentLocation()); |
354 return ArchiveResource::create(contentBuffer, location, | 360 return ArchiveResource::create(contentBuffer, location, |
355 mimeHeader.contentID(), | 361 mimeHeader.contentID(), |
356 AtomicString(mimeHeader.contentType()), | 362 AtomicString(mimeHeader.contentType()), |
357 AtomicString(mimeHeader.charset())); | 363 AtomicString(mimeHeader.charset())); |
358 } | 364 } |
359 | 365 |
360 // static | 366 // static |
361 KURL MHTMLParser::convertContentIDToURI(const String& contentID) { | 367 KURL MHTMLParser::convertContentIDToURI(const String& contentID) { |
362 // This function is based primarily on an example from rfc2557 in section | 368 // This function is based primarily on an example from rfc2557 in section |
363 // 9.5, but also based on more normative parts of specs like: | 369 // 9.5, but also based on more normative parts of specs like: |
364 // - rfc2557 - MHTML - section 8.3 - "Use of the Content-ID header and CID URL
s" | 370 // - rfc2557 - MHTML - section 8.3 - "Use of the Content-ID header and CID |
| 371 // URLs" |
365 // - rfc1738 - URL - section 4 (reserved scheme names; includes "cid") | 372 // - rfc1738 - URL - section 4 (reserved scheme names; includes "cid") |
366 // - rfc2387 - multipart/related - section 3.4 - "Syntax" (cid := msg-id) | 373 // - rfc2387 - multipart/related - section 3.4 - "Syntax" (cid := msg-id) |
367 // - rfc0822 - msg-id = "<" addr-spec ">"; addr-spec = local-part "@" domain | 374 // - rfc0822 - msg-id = "<" addr-spec ">"; addr-spec = local-part "@" domain |
368 | 375 |
369 if (contentID.length() <= 2) | 376 if (contentID.length() <= 2) |
370 return KURL(); | 377 return KURL(); |
371 | 378 |
372 if (!contentID.startsWith('<') || !contentID.endsWith('>')) | 379 if (!contentID.startsWith('<') || !contentID.endsWith('>')) |
373 return KURL(); | 380 return KURL(); |
374 | 381 |
375 StringBuilder uriBuilder; | 382 StringBuilder uriBuilder; |
376 uriBuilder.append("cid:"); | 383 uriBuilder.append("cid:"); |
377 uriBuilder.append(contentID, 1, contentID.length() - 2); | 384 uriBuilder.append(contentID, 1, contentID.length() - 2); |
378 return KURL(KURL(), uriBuilder.toString()); | 385 return KURL(KURL(), uriBuilder.toString()); |
379 } | 386 } |
380 | 387 |
381 } // namespace blink | 388 } // namespace blink |
OLD | NEW |