Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(108)

Side by Side Diff: third_party/WebKit/Source/web/WebPageSerializer.cpp

Issue 1417323006: OOPIFs: Deduplicating MHTML parts across frames. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-serialization-per-frame
Patch Set: Introduced MHTMLPartsGenerationDelegate interface. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2009 Google Inc. All rights reserved. 2 * Copyright (C) 2009 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
63 #include "wtf/HashMap.h" 63 #include "wtf/HashMap.h"
64 #include "wtf/HashSet.h" 64 #include "wtf/HashSet.h"
65 #include "wtf/Noncopyable.h" 65 #include "wtf/Noncopyable.h"
66 #include "wtf/Vector.h" 66 #include "wtf/Vector.h"
67 #include "wtf/text/StringConcatenate.h" 67 #include "wtf/text/StringConcatenate.h"
68 68
69 namespace blink { 69 namespace blink {
70 70
71 namespace { 71 namespace {
72 72
73 using ContentIDMap = WillBeHeapHashMap<RawPtrWillBeMember<Frame>, String>; 73 class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate {
74
75 class MHTMLPageSerializerDelegate final :
76 public NoBaseWillBeGarbageCollected<MHTMLPageSerializerDelegate>,
77 public PageSerializer::Delegate {
78 WTF_MAKE_NONCOPYABLE(MHTMLPageSerializerDelegate); 74 WTF_MAKE_NONCOPYABLE(MHTMLPageSerializerDelegate);
79 public: 75 public:
80 MHTMLPageSerializerDelegate(const ContentIDMap& frameToContentID); 76 MHTMLPageSerializerDelegate(WebPageSerializer::MHTMLPartsGenerationDelegate& );
dcheng 2015/12/22 01:01:45 explicit
Łukasz Anforowicz 2015/12/22 21:06:46 Oops. Done.
81 bool shouldIgnoreAttribute(const Attribute&) override; 77 bool shouldIgnoreAttribute(const Attribute&) override;
82 bool rewriteLink(const Element&, String& rewrittenLink) override; 78 bool rewriteLink(const Element&, String& rewrittenLink) override;
83 79 bool shouldSkipResource(const KURL&) override;
84 #if ENABLE(OILPAN)
85 void trace(Visitor* visitor) { visitor->trace(m_frameToContentID); }
86 #endif
87 80
88 private: 81 private:
89 const ContentIDMap& m_frameToContentID; 82 WebPageSerializer::MHTMLPartsGenerationDelegate& m_webDelegate;
90 }; 83 };
91 84
92 MHTMLPageSerializerDelegate::MHTMLPageSerializerDelegate( 85 MHTMLPageSerializerDelegate::MHTMLPageSerializerDelegate(
93 const ContentIDMap& frameToContentID) 86 WebPageSerializer::MHTMLPartsGenerationDelegate& webDelegate)
94 : m_frameToContentID(frameToContentID) 87 : m_webDelegate(webDelegate)
95 { 88 {
96 } 89 }
97 90
98 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu te) 91 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu te)
99 { 92 {
100 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i mages, as only the value of src 93 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i mages, as only the value of src
101 // is pulled into the archive. Discarding srcset prevents the problem. Long term we should make sure to MHTML 94 // is pulled into the archive. Discarding srcset prevents the problem. Long term we should make sure to MHTML
102 // plays nicely with srcset. 95 // plays nicely with srcset.
103 return attribute.localName() == HTMLNames::srcsetAttr; 96 return attribute.localName() == HTMLNames::srcsetAttr;
104 } 97 }
105 98
106 bool MHTMLPageSerializerDelegate::rewriteLink( 99 bool MHTMLPageSerializerDelegate::rewriteLink(
107 const Element& element, 100 const Element& element,
108 String& rewrittenLink) 101 String& rewrittenLink)
109 { 102 {
110 if (!element.isFrameOwnerElement()) 103 if (!element.isFrameOwnerElement())
111 return false; 104 return false;
112 105
113 auto* frameOwnerElement = toHTMLFrameOwnerElement(&element); 106 auto* frameOwnerElement = toHTMLFrameOwnerElement(&element);
114 Frame* frame = frameOwnerElement->contentFrame(); 107 Frame* frame = frameOwnerElement->contentFrame();
115 if (!frame) 108 if (!frame)
116 return false; 109 return false;
117 110
118 KURL cidURI = MHTMLParser::convertContentIDToURI(m_frameToContentID.get(fram e)); 111 WebString contentID = m_webDelegate.getContentID(*WebFrame::fromFrame(frame) );
112 KURL cidURI = MHTMLParser::convertContentIDToURI(contentID);
119 ASSERT(cidURI.isValid()); 113 ASSERT(cidURI.isValid());
120 114
121 if (isHTMLFrameElementBase(&element)) { 115 if (isHTMLFrameElementBase(&element)) {
122 rewrittenLink = cidURI.string(); 116 rewrittenLink = cidURI.string();
123 return true; 117 return true;
124 } 118 }
125 119
126 if (isHTMLObjectElement(&element)) { 120 if (isHTMLObjectElement(&element)) {
127 Document* doc = frameOwnerElement->contentDocument(); 121 Document* doc = frameOwnerElement->contentDocument();
128 bool isHandledBySerializer = doc->isHTMLDocument() 122 bool isHandledBySerializer = doc->isHTMLDocument()
129 || doc->isXHTMLDocument() || doc->isImageDocument(); 123 || doc->isXHTMLDocument() || doc->isImageDocument();
130 if (isHandledBySerializer) { 124 if (isHandledBySerializer) {
131 rewrittenLink = cidURI.string(); 125 rewrittenLink = cidURI.string();
132 return true; 126 return true;
133 } 127 }
134 } 128 }
135 129
136 return false; 130 return false;
137 } 131 }
138 132
139 ContentIDMap createFrameToContentIDMap( 133 bool MHTMLPageSerializerDelegate::shouldSkipResource(const KURL& url)
140 const WebVector<std::pair<WebFrame*, WebString>>& webFrameToContentID)
141 { 134 {
142 ContentIDMap result; 135 return m_webDelegate.shouldSkipResource(url);
143 for (const auto& it : webFrameToContentID) {
144 WebFrame* webFrame = it.first;
145 const WebString& webContentID = it.second;
146
147 Frame* frame = webFrame->toImplBase()->frame();
148 String contentID(webContentID);
149
150 result.add(frame, contentID);
151 }
152 return result;
153 } 136 }
154 137
155 } // namespace 138 } // namespace
156 139
157 WebData WebPageSerializer::generateMHTMLHeader( 140 WebData WebPageSerializer::generateMHTMLHeader(
158 const WebString& boundary, WebLocalFrame* frame) 141 const WebString& boundary, WebLocalFrame* frame)
159 { 142 {
160 Document* document = toWebLocalFrameImpl(frame)->frame()->document(); 143 Document* document = toWebLocalFrameImpl(frame)->frame()->document();
161 144
162 RefPtr<SharedBuffer> buffer = SharedBuffer::create(); 145 RefPtr<SharedBuffer> buffer = SharedBuffer::create();
163 MHTMLArchive::generateMHTMLHeader( 146 MHTMLArchive::generateMHTMLHeader(
164 boundary, document->title(), document->suggestedMIMEType(), 147 boundary, document->title(), document->suggestedMIMEType(),
165 *buffer); 148 *buffer);
166 return buffer.release(); 149 return buffer.release();
167 } 150 }
168 151
169 WebData WebPageSerializer::generateMHTMLParts( 152 WebData WebPageSerializer::generateMHTMLParts(
170 const WebString& boundary, WebLocalFrame* webFrame, bool useBinaryEncoding, 153 const WebString& boundary, WebLocalFrame* webFrame, bool useBinaryEncoding,
171 const WebVector<std::pair<WebFrame*, WebString>>& webFrameToContentID) 154 MHTMLPartsGenerationDelegate& webDelegate)
172 { 155 {
173 // Translate arguments from public to internal blink APIs. 156 // Translate arguments from public to internal blink APIs.
174 LocalFrame* frame = toWebLocalFrameImpl(webFrame)->frame(); 157 LocalFrame* frame = toWebLocalFrameImpl(webFrame)->frame();
175 MHTMLArchive::EncodingPolicy encodingPolicy = useBinaryEncoding 158 MHTMLArchive::EncodingPolicy encodingPolicy = useBinaryEncoding
176 ? MHTMLArchive::EncodingPolicy::UseBinaryEncoding 159 ? MHTMLArchive::EncodingPolicy::UseBinaryEncoding
177 : MHTMLArchive::EncodingPolicy::UseDefaultEncoding; 160 : MHTMLArchive::EncodingPolicy::UseDefaultEncoding;
178 ContentIDMap frameToContentID = createFrameToContentIDMap(webFrameToContentI D);
179 161
180 // Serialize. 162 // Serialize.
181 Vector<SerializedResource> resources; 163 Vector<SerializedResource> resources;
182 MHTMLPageSerializerDelegate delegate(frameToContentID); 164 MHTMLPageSerializerDelegate coreDelegate(webDelegate);
183 PageSerializer serializer(resources, &delegate); 165 PageSerializer serializer(resources, &coreDelegate);
184 serializer.serializeFrame(*frame); 166 serializer.serializeFrame(*frame);
185 167
168 // Get Content-ID for the frame being serialized.
169 String frameContentID = webDelegate.getContentID(*webFrame);
170 ASSERT(!frameContentID.isEmpty());
171
186 // Encode serializer's output as MHTML. 172 // Encode serializer's output as MHTML.
187 RefPtr<SharedBuffer> output = SharedBuffer::create(); 173 RefPtr<SharedBuffer> output = SharedBuffer::create();
188 bool isFirstResource = true; 174 bool isFirstResource = true;
189 for (const SerializedResource& resource : resources) { 175 for (const SerializedResource& resource : resources) {
190 // Frame is the 1st resource (see PageSerializer::serializeFrame doc 176 // Frame is the 1st resource (see PageSerializer::serializeFrame doc
191 // comment). Frames need a Content-ID header. 177 // comment). Frames get a Content-ID header.
192 String contentID = isFirstResource ? frameToContentID.get(frame) : Strin g(); 178 String contentID = isFirstResource ? frameContentID : String();
193 179
194 MHTMLArchive::generateMHTMLPart( 180 MHTMLArchive::generateMHTMLPart(
195 boundary, contentID, encodingPolicy, resource, *output); 181 boundary, contentID, encodingPolicy, resource, *output);
196 182
197 isFirstResource = false; 183 isFirstResource = false;
198 } 184 }
199 return output.release(); 185 return output.release();
200 } 186 }
201 187
202 WebData WebPageSerializer::generateMHTMLFooter(const WebString& boundary) 188 WebData WebPageSerializer::generateMHTMLFooter(const WebString& boundary)
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
234 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar get) 220 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar get)
235 { 221 {
236 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|. 222 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|.
237 if (baseTarget.isEmpty()) 223 if (baseTarget.isEmpty())
238 return String("<base href=\".\">"); 224 return String("<base href=\".\">");
239 String baseString = "<base href=\".\" target=\"" + static_cast<const String& >(baseTarget) + "\">"; 225 String baseString = "<base href=\".\" target=\"" + static_cast<const String& >(baseTarget) + "\">";
240 return baseString; 226 return baseString;
241 } 227 }
242 228
243 } // namespace blink 229 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698