OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2009 Google Inc. All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions are | |
6 * met: | |
7 * | |
8 * * Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * * Redistributions in binary form must reproduce the above | |
11 * copyright notice, this list of conditions and the following disclaimer | |
12 * in the documentation and/or other materials provided with the | |
13 * distribution. | |
14 * * Neither the name of Google Inc. nor the names of its | |
15 * contributors may be used to endorse or promote products derived from | |
16 * this software without specific prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 | |
31 #include "public/web/WebPageSerializer.h" | |
32 | |
33 #include "core/HTMLNames.h" | |
34 #include "core/dom/Document.h" | |
35 #include "core/dom/Element.h" | |
36 #include "core/frame/Frame.h" | |
37 #include "core/frame/LocalFrame.h" | |
38 #include "core/frame/RemoteFrame.h" | |
39 #include "core/html/HTMLAllCollection.h" | |
40 #include "core/html/HTMLFrameElementBase.h" | |
41 #include "core/html/HTMLFrameOwnerElement.h" | |
42 #include "core/html/HTMLInputElement.h" | |
43 #include "core/html/HTMLTableElement.h" | |
44 #include "core/loader/DocumentLoader.h" | |
45 #include "core/page/PageSerializer.h" | |
46 #include "platform/SerializedResource.h" | |
47 #include "platform/SharedBuffer.h" | |
48 #include "platform/mhtml/MHTMLArchive.h" | |
49 #include "platform/mhtml/MHTMLParser.h" | |
50 #include "platform/weborigin/KURL.h" | |
51 #include "public/platform/WebCString.h" | |
52 #include "public/platform/WebString.h" | |
53 #include "public/platform/WebURL.h" | |
54 #include "public/platform/WebVector.h" | |
55 #include "public/web/WebDocument.h" | |
56 #include "public/web/WebFrame.h" | |
57 #include "public/web/WebPageSerializerClient.h" | |
58 #include "web/WebLocalFrameImpl.h" | |
59 #include "web/WebPageSerializerImpl.h" | |
60 #include "web/WebRemoteFrameImpl.h" | |
61 #include "wtf/Assertions.h" | |
62 #include "wtf/HashMap.h" | |
63 #include "wtf/HashSet.h" | |
64 #include "wtf/Noncopyable.h" | |
65 #include "wtf/Vector.h" | |
66 #include "wtf/text/StringConcatenate.h" | |
67 | |
68 namespace blink { | |
69 | |
70 namespace { | |
71 | |
72 class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate { | |
73 WTF_MAKE_NONCOPYABLE(MHTMLPageSerializerDelegate); | |
74 public: | |
75 explicit MHTMLPageSerializerDelegate(WebPageSerializer::MHTMLPartsGeneration
Delegate&); | |
76 bool shouldIgnoreAttribute(const Attribute&) override; | |
77 bool rewriteLink(const Element&, String& rewrittenLink) override; | |
78 bool shouldSkipResource(const KURL&) override; | |
79 | |
80 private: | |
81 WebPageSerializer::MHTMLPartsGenerationDelegate& m_webDelegate; | |
82 }; | |
83 | |
84 MHTMLPageSerializerDelegate::MHTMLPageSerializerDelegate( | |
85 WebPageSerializer::MHTMLPartsGenerationDelegate& webDelegate) | |
86 : m_webDelegate(webDelegate) | |
87 { | |
88 } | |
89 | |
90 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu
te) | |
91 { | |
92 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i
mages, as only the value of src | |
93 // is pulled into the archive. Discarding srcset prevents the problem. Long
term we should make sure to MHTML | |
94 // plays nicely with srcset. | |
95 return attribute.localName() == HTMLNames::srcsetAttr; | |
96 } | |
97 | |
98 bool MHTMLPageSerializerDelegate::rewriteLink( | |
99 const Element& element, | |
100 String& rewrittenLink) | |
101 { | |
102 if (!element.isFrameOwnerElement()) | |
103 return false; | |
104 | |
105 auto* frameOwnerElement = toHTMLFrameOwnerElement(&element); | |
106 Frame* frame = frameOwnerElement->contentFrame(); | |
107 if (!frame) | |
108 return false; | |
109 | |
110 WebString contentID = m_webDelegate.getContentID(*WebFrame::fromFrame(frame)
); | |
111 KURL cidURI = MHTMLParser::convertContentIDToURI(contentID); | |
112 ASSERT(cidURI.isValid()); | |
113 | |
114 if (isHTMLFrameElementBase(&element)) { | |
115 rewrittenLink = cidURI.string(); | |
116 return true; | |
117 } | |
118 | |
119 if (isHTMLObjectElement(&element)) { | |
120 Document* doc = frameOwnerElement->contentDocument(); | |
121 bool isHandledBySerializer = doc->isHTMLDocument() | |
122 || doc->isXHTMLDocument() || doc->isImageDocument(); | |
123 if (isHandledBySerializer) { | |
124 rewrittenLink = cidURI.string(); | |
125 return true; | |
126 } | |
127 } | |
128 | |
129 return false; | |
130 } | |
131 | |
132 bool MHTMLPageSerializerDelegate::shouldSkipResource(const KURL& url) | |
133 { | |
134 return m_webDelegate.shouldSkipResource(url); | |
135 } | |
136 | |
137 } // namespace | |
138 | |
139 WebData WebPageSerializer::generateMHTMLHeader( | |
140 const WebString& boundary, WebLocalFrame* frame) | |
141 { | |
142 Document* document = toWebLocalFrameImpl(frame)->frame()->document(); | |
143 | |
144 RefPtr<SharedBuffer> buffer = SharedBuffer::create(); | |
145 MHTMLArchive::generateMHTMLHeader( | |
146 boundary, document->title(), document->suggestedMIMEType(), | |
147 *buffer); | |
148 return buffer.release(); | |
149 } | |
150 | |
151 WebData WebPageSerializer::generateMHTMLParts( | |
152 const WebString& boundary, WebLocalFrame* webFrame, bool useBinaryEncoding, | |
153 MHTMLPartsGenerationDelegate* webDelegate) | |
154 { | |
155 ASSERT(webFrame); | |
156 ASSERT(webDelegate); | |
157 | |
158 // Translate arguments from public to internal blink APIs. | |
159 LocalFrame* frame = toWebLocalFrameImpl(webFrame)->frame(); | |
160 MHTMLArchive::EncodingPolicy encodingPolicy = useBinaryEncoding | |
161 ? MHTMLArchive::EncodingPolicy::UseBinaryEncoding | |
162 : MHTMLArchive::EncodingPolicy::UseDefaultEncoding; | |
163 | |
164 // Serialize. | |
165 Vector<SerializedResource> resources; | |
166 MHTMLPageSerializerDelegate coreDelegate(*webDelegate); | |
167 PageSerializer serializer(resources, coreDelegate); | |
168 serializer.serializeFrame(*frame); | |
169 | |
170 // Get Content-ID for the frame being serialized. | |
171 String frameContentID = webDelegate->getContentID(*webFrame); | |
172 ASSERT(!frameContentID.isEmpty()); | |
173 | |
174 // Encode serializer's output as MHTML. | |
175 RefPtr<SharedBuffer> output = SharedBuffer::create(); | |
176 bool isFirstResource = true; | |
177 for (const SerializedResource& resource : resources) { | |
178 // Frame is the 1st resource (see PageSerializer::serializeFrame doc | |
179 // comment). Frames get a Content-ID header. | |
180 String contentID = isFirstResource ? frameContentID : String(); | |
181 | |
182 MHTMLArchive::generateMHTMLPart( | |
183 boundary, contentID, encodingPolicy, resource, *output); | |
184 | |
185 isFirstResource = false; | |
186 } | |
187 return output.release(); | |
188 } | |
189 | |
190 WebData WebPageSerializer::generateMHTMLFooter(const WebString& boundary) | |
191 { | |
192 RefPtr<SharedBuffer> buffer = SharedBuffer::create(); | |
193 MHTMLArchive::generateMHTMLFooter(boundary, *buffer); | |
194 return buffer.release(); | |
195 } | |
196 | |
197 bool WebPageSerializer::serialize( | |
198 WebLocalFrame* frame, | |
199 WebPageSerializerClient* client, | |
200 const WebVector<std::pair<WebURL, WebString>>& urlsToLocalPaths) | |
201 { | |
202 WebPageSerializerImpl serializerImpl(frame, client, urlsToLocalPaths); | |
203 return serializerImpl.serialize(); | |
204 } | |
205 | |
206 WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& cha
rset) | |
207 { | |
208 // TODO(yosin) We should call |PageSerializer::metaCharsetDeclarationOf()|. | |
209 String charsetString = "<meta http-equiv=\"Content-Type\" content=\"text/htm
l; charset=" + static_cast<const String&>(charset) + "\">"; | |
210 return charsetString; | |
211 } | |
212 | |
213 WebString WebPageSerializer::generateMarkOfTheWebDeclaration(const WebURL& url) | |
214 { | |
215 StringBuilder builder; | |
216 builder.append("\n<!-- "); | |
217 builder.append(PageSerializer::markOfTheWebDeclaration(url)); | |
218 builder.append(" -->\n"); | |
219 return builder.toString(); | |
220 } | |
221 | |
222 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar
get) | |
223 { | |
224 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|. | |
225 if (baseTarget.isEmpty()) | |
226 return String("<base href=\".\">"); | |
227 String baseString = "<base href=\".\" target=\"" + static_cast<const String&
>(baseTarget) + "\">"; | |
228 return baseString; | |
229 } | |
230 | |
231 } // namespace blink | |
OLD | NEW |