OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 28 matching lines...) Expand all Loading... |
39 #include "wtf/text/StringBuilder.h" | 39 #include "wtf/text/StringBuilder.h" |
40 #include "wtf/text/StringConcatenate.h" | 40 #include "wtf/text/StringConcatenate.h" |
41 #include "wtf/text/StringHash.h" | 41 #include "wtf/text/StringHash.h" |
42 #include "wtf/text/WTFString.h" | 42 #include "wtf/text/WTFString.h" |
43 | 43 |
44 namespace blink { | 44 namespace blink { |
45 | 45 |
46 // This class is a limited MIME parser used to parse the MIME headers of MHTML f
iles. | 46 // This class is a limited MIME parser used to parse the MIME headers of MHTML f
iles. |
47 class MIMEHeader : public GarbageCollectedFinalized<MIMEHeader> { | 47 class MIMEHeader : public GarbageCollectedFinalized<MIMEHeader> { |
48 public: | 48 public: |
49 static RawPtr<MIMEHeader> create() | 49 static MIMEHeader* create() |
50 { | 50 { |
51 return new MIMEHeader(); | 51 return new MIMEHeader; |
52 } | 52 } |
53 | 53 |
54 enum Encoding { | 54 enum Encoding { |
55 QuotedPrintable, | 55 QuotedPrintable, |
56 Base64, | 56 Base64, |
57 EightBit, | 57 EightBit, |
58 SevenBit, | 58 SevenBit, |
59 Binary, | 59 Binary, |
60 Unknown | 60 Unknown |
61 }; | 61 }; |
62 | 62 |
63 static RawPtr<MIMEHeader> parseHeader(SharedBufferChunkReader* crLFLineReade
r); | 63 static MIMEHeader* parseHeader(SharedBufferChunkReader* crLFLineReader); |
64 | 64 |
65 bool isMultipart() const { return m_contentType.startsWith("multipart/", Tex
tCaseInsensitive); } | 65 bool isMultipart() const { return m_contentType.startsWith("multipart/", Tex
tCaseInsensitive); } |
66 | 66 |
67 String contentType() const { return m_contentType; } | 67 String contentType() const { return m_contentType; } |
68 String charset() const { return m_charset; } | 68 String charset() const { return m_charset; } |
69 Encoding contentTransferEncoding() const { return m_contentTransferEncoding;
} | 69 Encoding contentTransferEncoding() const { return m_contentTransferEncoding;
} |
70 String contentLocation() const { return m_contentLocation; } | 70 String contentLocation() const { return m_contentLocation; } |
71 String contentID() const { return m_contentID; } | 71 String contentID() const { return m_contentID; } |
72 | 72 |
73 // Multi-part type and boundaries are only valid for multipart MIME headers. | 73 // Multi-part type and boundaries are only valid for multipart MIME headers. |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
123 } | 123 } |
124 key = line.substring(0, semiColonIndex).lower().stripWhiteSpace(); | 124 key = line.substring(0, semiColonIndex).lower().stripWhiteSpace(); |
125 value.append(line.substring(semiColonIndex + 1)); | 125 value.append(line.substring(semiColonIndex + 1)); |
126 } | 126 } |
127 // Store the last property if there is one. | 127 // Store the last property if there is one. |
128 if (!key.isEmpty()) | 128 if (!key.isEmpty()) |
129 keyValuePairs.set(key, value.toString().stripWhiteSpace()); | 129 keyValuePairs.set(key, value.toString().stripWhiteSpace()); |
130 return keyValuePairs; | 130 return keyValuePairs; |
131 } | 131 } |
132 | 132 |
133 RawPtr<MIMEHeader> MIMEHeader::parseHeader(SharedBufferChunkReader* buffer) | 133 MIMEHeader* MIMEHeader::parseHeader(SharedBufferChunkReader* buffer) |
134 { | 134 { |
135 RawPtr<MIMEHeader> mimeHeader = MIMEHeader::create(); | 135 MIMEHeader* mimeHeader = MIMEHeader::create(); |
136 KeyValueMap keyValuePairs = retrieveKeyValuePairs(buffer); | 136 KeyValueMap keyValuePairs = retrieveKeyValuePairs(buffer); |
137 KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-t
ype"); | 137 KeyValueMap::iterator mimeParametersIterator = keyValuePairs.find("content-t
ype"); |
138 if (mimeParametersIterator != keyValuePairs.end()) { | 138 if (mimeParametersIterator != keyValuePairs.end()) { |
139 ParsedContentType parsedContentType(mimeParametersIterator->value); | 139 ParsedContentType parsedContentType(mimeParametersIterator->value); |
140 mimeHeader->m_contentType = parsedContentType.mimeType(); | 140 mimeHeader->m_contentType = parsedContentType.mimeType(); |
141 if (!mimeHeader->isMultipart()) { | 141 if (!mimeHeader->isMultipart()) { |
142 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace(
); | 142 mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace(
); |
143 } else { | 143 } else { |
144 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam
e("type"); | 144 mimeHeader->m_multipartType = parsedContentType.parameterValueForNam
e("type"); |
145 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo
rName("boundary"); | 145 mimeHeader->m_endOfPartBoundary = parsedContentType.parameterValueFo
rName("boundary"); |
(...skipping 13 matching lines...) Expand all Loading... |
159 | 159 |
160 mimeParametersIterator = keyValuePairs.find("content-location"); | 160 mimeParametersIterator = keyValuePairs.find("content-location"); |
161 if (mimeParametersIterator != keyValuePairs.end()) | 161 if (mimeParametersIterator != keyValuePairs.end()) |
162 mimeHeader->m_contentLocation = mimeParametersIterator->value; | 162 mimeHeader->m_contentLocation = mimeParametersIterator->value; |
163 | 163 |
164 // See rfc2557 - section 8.3 - Use of the Content-ID header and CID URLs. | 164 // See rfc2557 - section 8.3 - Use of the Content-ID header and CID URLs. |
165 mimeParametersIterator = keyValuePairs.find("content-id"); | 165 mimeParametersIterator = keyValuePairs.find("content-id"); |
166 if (mimeParametersIterator != keyValuePairs.end()) | 166 if (mimeParametersIterator != keyValuePairs.end()) |
167 mimeHeader->m_contentID = mimeParametersIterator->value; | 167 mimeHeader->m_contentID = mimeParametersIterator->value; |
168 | 168 |
169 return mimeHeader.release(); | 169 return mimeHeader; |
170 } | 170 } |
171 | 171 |
172 MIMEHeader::Encoding MIMEHeader::parseContentTransferEncoding(const String& text
) | 172 MIMEHeader::Encoding MIMEHeader::parseContentTransferEncoding(const String& text
) |
173 { | 173 { |
174 String encoding = text.stripWhiteSpace().lower(); | 174 String encoding = text.stripWhiteSpace().lower(); |
175 if (encoding == "base64") | 175 if (encoding == "base64") |
176 return Base64; | 176 return Base64; |
177 if (encoding == "quoted-printable") | 177 if (encoding == "quoted-printable") |
178 return QuotedPrintable; | 178 return QuotedPrintable; |
179 if (encoding == "8bit") | 179 if (encoding == "8bit") |
(...skipping 21 matching lines...) Expand all Loading... |
201 return false; | 201 return false; |
202 } | 202 } |
203 | 203 |
204 MHTMLParser::MHTMLParser(SharedBuffer* data) | 204 MHTMLParser::MHTMLParser(SharedBuffer* data) |
205 : m_lineReader(data, "\r\n") | 205 : m_lineReader(data, "\r\n") |
206 { | 206 { |
207 } | 207 } |
208 | 208 |
209 HeapVector<Member<ArchiveResource>> MHTMLParser::parseArchive() | 209 HeapVector<Member<ArchiveResource>> MHTMLParser::parseArchive() |
210 { | 210 { |
211 RawPtr<MIMEHeader> header = MIMEHeader::parseHeader(&m_lineReader); | 211 MIMEHeader* header = MIMEHeader::parseHeader(&m_lineReader); |
212 HeapVector<Member<ArchiveResource>> resources; | 212 HeapVector<Member<ArchiveResource>> resources; |
213 if (!parseArchiveWithHeader(header.get(), resources)) | 213 if (!parseArchiveWithHeader(header, resources)) |
214 resources.clear(); | 214 resources.clear(); |
215 return resources; | 215 return resources; |
216 } | 216 } |
217 | 217 |
218 bool MHTMLParser::parseArchiveWithHeader(MIMEHeader* header, HeapVector<Member<A
rchiveResource>>& resources) | 218 bool MHTMLParser::parseArchiveWithHeader(MIMEHeader* header, HeapVector<Member<A
rchiveResource>>& resources) |
219 { | 219 { |
220 if (!header) { | 220 if (!header) { |
221 DLOG(ERROR) << "Failed to parse MHTML part: no header."; | 221 DLOG(ERROR) << "Failed to parse MHTML part: no header."; |
222 return false; | 222 return false; |
223 } | 223 } |
224 | 224 |
225 if (!header->isMultipart()) { | 225 if (!header->isMultipart()) { |
226 // With IE a page with no resource is not multi-part. | 226 // With IE a page with no resource is not multi-part. |
227 bool endOfArchiveReached = false; | 227 bool endOfArchiveReached = false; |
228 RawPtr<ArchiveResource> resource = parseNextPart(*header, String(), Stri
ng(), endOfArchiveReached); | 228 ArchiveResource* resource = parseNextPart(*header, String(), String(), e
ndOfArchiveReached); |
229 if (!resource) | 229 if (!resource) |
230 return false; | 230 return false; |
231 resources.append(resource); | 231 resources.append(resource); |
232 return true; | 232 return true; |
233 } | 233 } |
234 | 234 |
235 // Skip the message content (it's a generic browser specific message). | 235 // Skip the message content (it's a generic browser specific message). |
236 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); | 236 skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); |
237 | 237 |
238 bool endOfArchive = false; | 238 bool endOfArchive = false; |
239 while (!endOfArchive) { | 239 while (!endOfArchive) { |
240 RawPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(&m_lineReade
r); | 240 MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader); |
241 if (!resourceHeader) { | 241 if (!resourceHeader) { |
242 DLOG(ERROR) << "Failed to parse MHTML, invalid MIME header."; | 242 DLOG(ERROR) << "Failed to parse MHTML, invalid MIME header."; |
243 return false; | 243 return false; |
244 } | 244 } |
245 if (resourceHeader->contentType() == "multipart/alternative") { | 245 if (resourceHeader->contentType() == "multipart/alternative") { |
246 // Ignore IE nesting which makes little sense (IE seems to nest only
some of the frames). | 246 // Ignore IE nesting which makes little sense (IE seems to nest only
some of the frames). |
247 if (!parseArchiveWithHeader(resourceHeader.get(), resources)) { | 247 if (!parseArchiveWithHeader(resourceHeader, resources)) { |
248 DLOG(ERROR) << "Failed to parse MHTML subframe."; | 248 DLOG(ERROR) << "Failed to parse MHTML subframe."; |
249 return false; | 249 return false; |
250 } | 250 } |
251 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he
ader->endOfPartBoundary()); | 251 bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, he
ader->endOfPartBoundary()); |
252 ASSERT_UNUSED(endOfPartReached, endOfPartReached); | 252 ASSERT_UNUSED(endOfPartReached, endOfPartReached); |
253 continue; | 253 continue; |
254 } | 254 } |
255 | 255 |
256 RawPtr<ArchiveResource> resource = parseNextPart(*resourceHeader, header
->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive); | 256 ArchiveResource* resource = parseNextPart(*resourceHeader, header->endOf
PartBoundary(), header->endOfDocumentBoundary(), endOfArchive); |
257 if (!resource) { | 257 if (!resource) { |
258 DLOG(ERROR) << "Failed to parse MHTML part."; | 258 DLOG(ERROR) << "Failed to parse MHTML part."; |
259 return false; | 259 return false; |
260 } | 260 } |
261 resources.append(resource); | 261 resources.append(resource); |
262 } | 262 } |
263 return true; | 263 return true; |
264 } | 264 } |
265 | 265 |
266 | 266 |
267 RawPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader,
const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& end
OfArchiveReached) | 267 ArchiveResource* MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const
String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchi
veReached) |
268 { | 268 { |
269 ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty()); | 269 ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty()); |
270 | 270 |
271 // If no content transfer encoding is specified, default to binary encoding. | 271 // If no content transfer encoding is specified, default to binary encoding. |
272 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc
oding(); | 272 MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEnc
oding(); |
273 if (contentTransferEncoding == MIMEHeader::Unknown) | 273 if (contentTransferEncoding == MIMEHeader::Unknown) |
274 contentTransferEncoding = MIMEHeader::Binary; | 274 contentTransferEncoding = MIMEHeader::Binary; |
275 | 275 |
276 RefPtr<SharedBuffer> content = SharedBuffer::create(); | 276 RefPtr<SharedBuffer> content = SharedBuffer::create(); |
277 const bool checkBoundary = !endOfPartBoundary.isEmpty(); | 277 const bool checkBoundary = !endOfPartBoundary.isEmpty(); |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
370 if (!contentID.startsWith('<') || !contentID.endsWith('>')) | 370 if (!contentID.startsWith('<') || !contentID.endsWith('>')) |
371 return KURL(); | 371 return KURL(); |
372 | 372 |
373 StringBuilder uriBuilder; | 373 StringBuilder uriBuilder; |
374 uriBuilder.append("cid:"); | 374 uriBuilder.append("cid:"); |
375 uriBuilder.append(contentID, 1, contentID.length() - 2); | 375 uriBuilder.append(contentID, 1, contentID.length() - 2); |
376 return KURL(KURL(), uriBuilder.toString()); | 376 return KURL(KURL(), uriBuilder.toString()); |
377 } | 377 } |
378 | 378 |
379 } // namespace blink | 379 } // namespace blink |
OLD | NEW |