OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 14 matching lines...) Expand all Loading... |
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 */ | 29 */ |
30 | 30 |
31 #include "config.h" | 31 #include "config.h" |
32 #include "platform/mhtml/MHTMLParser.h" | 32 #include "platform/mhtml/MHTMLParser.h" |
33 | 33 |
34 #include "platform/MIMETypeRegistry.h" | 34 #include "platform/MIMETypeRegistry.h" |
| 35 #include "platform/mhtml/ArchiveResource.h" |
35 #include "platform/mhtml/MHTMLArchive.h" | 36 #include "platform/mhtml/MHTMLArchive.h" |
36 #include "platform/network/ParsedContentType.h" | 37 #include "platform/network/ParsedContentType.h" |
37 #include "platform/text/QuotedPrintable.h" | 38 #include "platform/text/QuotedPrintable.h" |
38 #include "wtf/HashMap.h" | 39 #include "wtf/HashMap.h" |
39 #include "wtf/RefCounted.h" | 40 #include "wtf/RefCounted.h" |
40 #include "wtf/text/Base64.h" | 41 #include "wtf/text/Base64.h" |
41 #include "wtf/text/StringBuilder.h" | 42 #include "wtf/text/StringBuilder.h" |
42 #include "wtf/text/StringConcatenate.h" | 43 #include "wtf/text/StringConcatenate.h" |
43 #include "wtf/text/StringHash.h" | 44 #include "wtf/text/StringHash.h" |
44 #include "wtf/text/WTFString.h" | 45 #include "wtf/text/WTFString.h" |
45 | 46 |
46 namespace WebCore { | 47 namespace WebCore { |
47 | 48 |
48 // This class is a limited MIME parser used to parse the MIME headers of MHTML f
iles. | 49 // This class is a limited MIME parser used to parse the MIME headers of MHTML f
iles. |
49 class MIMEHeader : public RefCounted<MIMEHeader> { | 50 class MIMEHeader : public RefCountedWillBeGarbageCollectedFinalized<MIMEHeader>
{ |
50 public: | 51 public: |
| 52 static PassRefPtrWillBeRawPtr<MIMEHeader> create() |
| 53 { |
| 54 return adoptRefWillBeNoop(new MIMEHeader()); |
| 55 } |
| 56 |
51 enum Encoding { | 57 enum Encoding { |
52 QuotedPrintable, | 58 QuotedPrintable, |
53 Base64, | 59 Base64, |
54 EightBit, | 60 EightBit, |
55 SevenBit, | 61 SevenBit, |
56 Binary, | 62 Binary, |
57 Unknown | 63 Unknown |
58 }; | 64 }; |
59 | 65 |
60 static PassRefPtr<MIMEHeader> parseHeader(SharedBufferChunkReader* crLFLineR
eader); | 66 static PassRefPtrWillBeRawPtr<MIMEHeader> parseHeader(SharedBufferChunkReade
r* crLFLineReader); |
61 | 67 |
62 bool isMultipart() const { return m_contentType.startsWith("multipart/"); } | 68 bool isMultipart() const { return m_contentType.startsWith("multipart/"); } |
63 | 69 |
64 String contentType() const { return m_contentType; } | 70 String contentType() const { return m_contentType; } |
65 String charset() const { return m_charset; } | 71 String charset() const { return m_charset; } |
66 Encoding contentTransferEncoding() const { return m_contentTransferEncoding;
} | 72 Encoding contentTransferEncoding() const { return m_contentTransferEncoding;
} |
67 String contentLocation() const { return m_contentLocation; } | 73 String contentLocation() const { return m_contentLocation; } |
68 | 74 |
69 // Multi-part type and boundaries are only valid for multipart MIME headers. | 75 // Multi-part type and boundaries are only valid for multipart MIME headers. |
70 String multiPartType() const { return m_multipartType; } | 76 String multiPartType() const { return m_multipartType; } |
71 String endOfPartBoundary() const { return m_endOfPartBoundary; } | 77 String endOfPartBoundary() const { return m_endOfPartBoundary; } |
72 String endOfDocumentBoundary() const { return m_endOfDocumentBoundary; } | 78 String endOfDocumentBoundary() const { return m_endOfDocumentBoundary; } |
73 | 79 |
| 80 void trace(Visitor*) { } |
| 81 |
74 private: | 82 private: |
75 MIMEHeader(); | 83 MIMEHeader(); |
76 | 84 |
77 static Encoding parseContentTransferEncoding(const String&); | 85 static Encoding parseContentTransferEncoding(const String&); |
78 | 86 |
79 String m_contentType; | 87 String m_contentType; |
80 String m_charset; | 88 String m_charset; |
81 Encoding m_contentTransferEncoding; | 89 Encoding m_contentTransferEncoding; |
82 String m_contentLocation; | 90 String m_contentLocation; |
83 String m_multipartType; | 91 String m_multipartType; |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
116 } | 124 } |
117 key = line.substring(0, semiColonIndex).lower().stripWhiteSpace(); | 125 key = line.substring(0, semiColonIndex).lower().stripWhiteSpace(); |
118 value.append(line.substring(semiColonIndex + 1)); | 126 value.append(line.substring(semiColonIndex + 1)); |
119 } | 127 } |
120 // Store the last property if there is one. | 128 // Store the last property if there is one. |
121 if (!key.isEmpty()) | 129 if (!key.isEmpty()) |
122 keyValuePairs.set(key, value.toString().stripWhiteSpace()); | 130 keyValuePairs.set(key, value.toString().stripWhiteSpace()); |
123 return keyValuePairs; | 131 return keyValuePairs; |
124 } | 132 } |
125 | 133 |
126 PassRefPtr<MIMEHeader> MIMEHeader::parseHeader(SharedBufferChunkReader* buffer) | 134 PassRefPtrWillBeRawPtr<MIMEHeader> MIMEHeader::parseHeader(SharedBufferChunkRead
er* buffer) |
127 { | 135 { |
128 RefPtr<MIMEHeader> mimeHeader = adoptRef(new MIMEHeader); | 136 RefPtrWillBeRawPtr<MIMEHeader> mimeHeader = MIMEHeader::create(); |
129 KeyValueMap keyValuePairs = retrieveKeyValuePairs(buffer); | 137 KeyValueMap keyValuePairs = retrieveKeyValuePairs(buffer); |
130 KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-t
ype"); | 138 KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-t
ype"); |
131 if (mimeParametersIterator != keyValuePairs.end()) { | 139 if (mimeParametersIterator != keyValuePairs.end()) { |
132 ParsedContentType parsedContentType(mimeParametersIterator->value); | 140 ParsedContentType parsedContentType(mimeParametersIterator->value); |
133 mimeHeader->m_contentType = parsedContentType.mimeType(); | 141 mimeHeader->m_contentType = parsedContentType.mimeType(); |
134 if (!mimeHeader->isMultipart()) { | 142 if (!mimeHeader->isMultipart()) { |
135 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace(
); | 143 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace(
); |
136 } else { | 144 } else { |
137 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam
e("type"); | 145 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam
e("type"); |
138 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo
rName("boundary"); | 146 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo
rName("boundary"); |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
187 return true; | 195 return true; |
188 } | 196 } |
189 return false; | 197 return false; |
190 } | 198 } |
191 | 199 |
192 MHTMLParser::MHTMLParser(SharedBuffer* data) | 200 MHTMLParser::MHTMLParser(SharedBuffer* data) |
193 : m_lineReader(data, "\r\n") | 201 : m_lineReader(data, "\r\n") |
194 { | 202 { |
195 } | 203 } |
196 | 204 |
197 PassRefPtr<MHTMLArchive> MHTMLParser::parseArchive() | 205 PassRefPtrWillBeRawPtr<MHTMLArchive> MHTMLParser::parseArchive() |
198 { | 206 { |
199 RefPtr<MIMEHeader> header = MIMEHeader::parseHeader(&m_lineReader); | 207 RefPtrWillBeRawPtr<MIMEHeader> header = MIMEHeader::parseHeader(&m_lineReade
r); |
200 return parseArchiveWithHeader(header.get()); | 208 return parseArchiveWithHeader(header.get()); |
201 } | 209 } |
202 | 210 |
203 PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header) | 211 PassRefPtrWillBeRawPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHea
der* header) |
204 { | 212 { |
205 if (!header) { | 213 if (!header) { |
206 WTF_LOG_ERROR("Failed to parse MHTML part: no header."); | 214 WTF_LOG_ERROR("Failed to parse MHTML part: no header."); |
207 return nullptr; | 215 return nullptr; |
208 } | 216 } |
209 | 217 |
210 RefPtr<MHTMLArchive> archive = MHTMLArchive::create(); | 218 RefPtrWillBeRawPtr<MHTMLArchive> archive = MHTMLArchive::create(); |
211 if (!header->isMultipart()) { | 219 if (!header->isMultipart()) { |
212 // With IE a page with no resource is not multi-part. | 220 // With IE a page with no resource is not multi-part. |
213 bool endOfArchiveReached = false; | 221 bool endOfArchiveReached = false; |
214 RefPtr<ArchiveResource> resource = parseNextPart(*header, String(), Stri
ng(), endOfArchiveReached); | 222 RefPtrWillBeRawPtr<ArchiveResource> resource = parseNextPart(*header, St
ring(), String(), endOfArchiveReached); |
215 if (!resource) | 223 if (!resource) |
216 return nullptr; | 224 return nullptr; |
217 archive->setMainResource(resource); | 225 archive->setMainResource(resource); |
218 return archive; | 226 return archive; |
219 } | 227 } |
220 | 228 |
221 // Skip the message content (it's a generic browser specific message). | 229 // Skip the message content (it's a generic browser specific message). |
222 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); | 230 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); |
223 | 231 |
224 bool endOfArchive = false; | 232 bool endOfArchive = false; |
225 while (!endOfArchive) { | 233 while (!endOfArchive) { |
226 RefPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(&m_lineReade
r); | 234 RefPtrWillBeRawPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(
&m_lineReader); |
227 if (!resourceHeader) { | 235 if (!resourceHeader) { |
228 WTF_LOG_ERROR("Failed to parse MHTML, invalid MIME header."); | 236 WTF_LOG_ERROR("Failed to parse MHTML, invalid MIME header."); |
229 return nullptr; | 237 return nullptr; |
230 } | 238 } |
231 if (resourceHeader->contentType() == "multipart/alternative") { | 239 if (resourceHeader->contentType() == "multipart/alternative") { |
232 // Ignore IE nesting which makes little sense (IE seems to nest only
some of the frames). | 240 // Ignore IE nesting which makes little sense (IE seems to nest only
some of the frames). |
233 RefPtr<MHTMLArchive> subframeArchive = parseArchiveWithHeader(resour
ceHeader.get()); | 241 RefPtrWillBeRawPtr<MHTMLArchive> subframeArchive = parseArchiveWithH
eader(resourceHeader.get()); |
234 if (!subframeArchive) { | 242 if (!subframeArchive) { |
235 WTF_LOG_ERROR("Failed to parse MHTML subframe."); | 243 WTF_LOG_ERROR("Failed to parse MHTML subframe."); |
236 return nullptr; | 244 return nullptr; |
237 } | 245 } |
238 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he
ader->endOfPartBoundary()); | 246 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he
ader->endOfPartBoundary()); |
239 ASSERT_UNUSED(endOfPartReached, endOfPartReached); | 247 ASSERT_UNUSED(endOfPartReached, endOfPartReached); |
240 // The top-frame is the first frame found, regardless of the nesting
level. | 248 // The top-frame is the first frame found, regardless of the nesting
level. |
241 if (subframeArchive->mainResource()) | 249 if (subframeArchive->mainResource()) |
242 addResourceToArchive(subframeArchive->mainResource(), archive.ge
t()); | 250 addResourceToArchive(subframeArchive->mainResource(), archive.ge
t()); |
243 archive->addSubframeArchive(subframeArchive); | 251 archive->addSubframeArchive(subframeArchive); |
244 continue; | 252 continue; |
245 } | 253 } |
246 | 254 |
247 RefPtr<ArchiveResource> resource = parseNextPart(*resourceHeader, header
->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive); | 255 RefPtrWillBeRawPtr<ArchiveResource> resource = parseNextPart(*resourceHe
ader, header->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive
); |
248 if (!resource) { | 256 if (!resource) { |
249 WTF_LOG_ERROR("Failed to parse MHTML part."); | 257 WTF_LOG_ERROR("Failed to parse MHTML part."); |
250 return nullptr; | 258 return nullptr; |
251 } | 259 } |
252 addResourceToArchive(resource.get(), archive.get()); | 260 addResourceToArchive(resource.get(), archive.get()); |
253 } | 261 } |
254 | 262 |
255 return archive.release(); | 263 return archive.release(); |
256 } | 264 } |
257 | 265 |
258 void MHTMLParser::addResourceToArchive(ArchiveResource* resource, MHTMLArchive*
archive) | 266 void MHTMLParser::addResourceToArchive(ArchiveResource* resource, MHTMLArchive*
archive) |
259 { | 267 { |
260 const AtomicString& mimeType = resource->mimeType(); | 268 const AtomicString& mimeType = resource->mimeType(); |
261 if (!MIMETypeRegistry::isSupportedNonImageMIMEType(mimeType) || MIMETypeRegi
stry::isSupportedJavaScriptMIMEType(mimeType) || mimeType == "text/css") { | 269 if (!MIMETypeRegistry::isSupportedNonImageMIMEType(mimeType) || MIMETypeRegi
stry::isSupportedJavaScriptMIMEType(mimeType) || mimeType == "text/css") { |
262 m_resources.append(resource); | 270 m_resources.append(resource); |
263 return; | 271 return; |
264 } | 272 } |
265 | 273 |
266 // The first document suitable resource is the main frame. | 274 // The first document suitable resource is the main frame. |
267 if (!archive->mainResource()) { | 275 if (!archive->mainResource()) { |
268 archive->setMainResource(resource); | 276 archive->setMainResource(resource); |
269 m_frames.append(archive); | 277 m_frames.append(archive); |
270 return; | 278 return; |
271 } | 279 } |
272 | 280 |
273 RefPtr<MHTMLArchive> subframe = MHTMLArchive::create(); | 281 RefPtrWillBeRawPtr<MHTMLArchive> subframe = MHTMLArchive::create(); |
274 subframe->setMainResource(resource); | 282 subframe->setMainResource(resource); |
275 m_frames.append(subframe); | 283 m_frames.append(subframe); |
276 } | 284 } |
277 | 285 |
278 PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea
der, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool&
endOfArchiveReached) | 286 PassRefPtrWillBeRawPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHea
der& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBou
ndary, bool& endOfArchiveReached) |
279 { | 287 { |
280 ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty()); | 288 ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty()); |
281 | 289 |
282 // If no content transfer encoding is specified, default to binary encoding. | 290 // If no content transfer encoding is specified, default to binary encoding. |
283 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc
oding(); | 291 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc
oding(); |
284 if (contentTransferEncoding == MIMEHeader::Unknown) | 292 if (contentTransferEncoding == MIMEHeader::Unknown) |
285 contentTransferEncoding = MIMEHeader::Binary; | 293 contentTransferEncoding = MIMEHeader::Binary; |
286 | 294 |
287 RefPtr<SharedBuffer> content = SharedBuffer::create(); | 295 RefPtr<SharedBuffer> content = SharedBuffer::create(); |
288 const bool checkBoundary = !endOfPartBoundary.isEmpty(); | 296 const bool checkBoundary = !endOfPartBoundary.isEmpty(); |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
378 { | 386 { |
379 return m_resources.size(); | 387 return m_resources.size(); |
380 } | 388 } |
381 | 389 |
382 ArchiveResource* MHTMLParser::subResourceAt(size_t index) const | 390 ArchiveResource* MHTMLParser::subResourceAt(size_t index) const |
383 { | 391 { |
384 return m_resources[index].get(); | 392 return m_resources[index].get(); |
385 } | 393 } |
386 | 394 |
387 } | 395 } |
OLD | NEW |