Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: third_party/WebKit/Source/web/WebPageSerializer.cpp

Issue 1417323006: OOPIFs: Deduplicating MHTML parts across frames. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-serialization-per-frame
Patch Set: Addressed CR feedback from rdsmith@. Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2009 Google Inc. All rights reserved. 2 * Copyright (C) 2009 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
62 #include "wtf/HashMap.h" 62 #include "wtf/HashMap.h"
63 #include "wtf/HashSet.h" 63 #include "wtf/HashSet.h"
64 #include "wtf/Noncopyable.h" 64 #include "wtf/Noncopyable.h"
65 #include "wtf/Vector.h" 65 #include "wtf/Vector.h"
66 #include "wtf/text/StringConcatenate.h" 66 #include "wtf/text/StringConcatenate.h"
67 67
68 namespace blink { 68 namespace blink {
69 69
70 namespace { 70 namespace {
71 71
72 using ContentIDMap = WillBeHeapHashMap<RawPtrWillBeMember<Frame>, String>; 72 class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate {
73
74 class MHTMLPageSerializerDelegate final :
75 public NoBaseWillBeGarbageCollected<MHTMLPageSerializerDelegate>,
76 public PageSerializer::Delegate {
77 WTF_MAKE_NONCOPYABLE(MHTMLPageSerializerDelegate); 73 WTF_MAKE_NONCOPYABLE(MHTMLPageSerializerDelegate);
78 public: 74 public:
79 MHTMLPageSerializerDelegate(const ContentIDMap& frameToContentID); 75 explicit MHTMLPageSerializerDelegate(WebPageSerializer::MHTMLPartsGeneration Delegate&);
80 bool shouldIgnoreAttribute(const Attribute&) override; 76 bool shouldIgnoreAttribute(const Attribute&) override;
81 bool rewriteLink(const Element&, String& rewrittenLink) override; 77 bool rewriteLink(const Element&, String& rewrittenLink) override;
82 78 bool shouldSkipResource(const KURL&) override;
83 #if ENABLE(OILPAN)
84 void trace(Visitor* visitor) { visitor->trace(m_frameToContentID); }
85 #endif
86 79
87 private: 80 private:
88 const ContentIDMap& m_frameToContentID; 81 WebPageSerializer::MHTMLPartsGenerationDelegate& m_webDelegate;
89 }; 82 };
90 83
91 MHTMLPageSerializerDelegate::MHTMLPageSerializerDelegate( 84 MHTMLPageSerializerDelegate::MHTMLPageSerializerDelegate(
92 const ContentIDMap& frameToContentID) 85 WebPageSerializer::MHTMLPartsGenerationDelegate& webDelegate)
93 : m_frameToContentID(frameToContentID) 86 : m_webDelegate(webDelegate)
94 { 87 {
95 } 88 }
96 89
97 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu te) 90 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu te)
98 { 91 {
99 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i mages, as only the value of src 92 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i mages, as only the value of src
100 // is pulled into the archive. Discarding srcset prevents the problem. Long term we should make sure to MHTML 93 // is pulled into the archive. Discarding srcset prevents the problem. Long term we should make sure to MHTML
101 // plays nicely with srcset. 94 // plays nicely with srcset.
102 return attribute.localName() == HTMLNames::srcsetAttr; 95 return attribute.localName() == HTMLNames::srcsetAttr;
103 } 96 }
104 97
105 bool MHTMLPageSerializerDelegate::rewriteLink( 98 bool MHTMLPageSerializerDelegate::rewriteLink(
106 const Element& element, 99 const Element& element,
107 String& rewrittenLink) 100 String& rewrittenLink)
108 { 101 {
109 if (!element.isFrameOwnerElement()) 102 if (!element.isFrameOwnerElement())
110 return false; 103 return false;
111 104
112 auto* frameOwnerElement = toHTMLFrameOwnerElement(&element); 105 auto* frameOwnerElement = toHTMLFrameOwnerElement(&element);
113 Frame* frame = frameOwnerElement->contentFrame(); 106 Frame* frame = frameOwnerElement->contentFrame();
114 if (!frame) 107 if (!frame)
115 return false; 108 return false;
116 109
117 KURL cidURI = MHTMLParser::convertContentIDToURI(m_frameToContentID.get(fram e)); 110 WebString contentID = m_webDelegate.getContentID(*WebFrame::fromFrame(frame) );
111 KURL cidURI = MHTMLParser::convertContentIDToURI(contentID);
118 ASSERT(cidURI.isValid()); 112 ASSERT(cidURI.isValid());
119 113
120 if (isHTMLFrameElementBase(&element)) { 114 if (isHTMLFrameElementBase(&element)) {
121 rewrittenLink = cidURI.string(); 115 rewrittenLink = cidURI.string();
122 return true; 116 return true;
123 } 117 }
124 118
125 if (isHTMLObjectElement(&element)) { 119 if (isHTMLObjectElement(&element)) {
126 Document* doc = frameOwnerElement->contentDocument(); 120 Document* doc = frameOwnerElement->contentDocument();
127 bool isHandledBySerializer = doc->isHTMLDocument() 121 bool isHandledBySerializer = doc->isHTMLDocument()
128 || doc->isXHTMLDocument() || doc->isImageDocument(); 122 || doc->isXHTMLDocument() || doc->isImageDocument();
129 if (isHandledBySerializer) { 123 if (isHandledBySerializer) {
130 rewrittenLink = cidURI.string(); 124 rewrittenLink = cidURI.string();
131 return true; 125 return true;
132 } 126 }
133 } 127 }
134 128
135 return false; 129 return false;
136 } 130 }
137 131
138 ContentIDMap createFrameToContentIDMap( 132 bool MHTMLPageSerializerDelegate::shouldSkipResource(const KURL& url)
139 const WebVector<std::pair<WebFrame*, WebString>>& webFrameToContentID)
140 { 133 {
141 ContentIDMap result; 134 return m_webDelegate.shouldSkipResource(url);
142 for (const auto& it : webFrameToContentID) {
143 WebFrame* webFrame = it.first;
144 const WebString& webContentID = it.second;
145
146 Frame* frame = webFrame->toImplBase()->frame();
147 String contentID(webContentID);
148
149 result.add(frame, contentID);
150 }
151 return result;
152 } 135 }
153 136
154 } // namespace 137 } // namespace
155 138
156 WebData WebPageSerializer::generateMHTMLHeader( 139 WebData WebPageSerializer::generateMHTMLHeader(
157 const WebString& boundary, WebLocalFrame* frame) 140 const WebString& boundary, WebLocalFrame* frame)
158 { 141 {
159 Document* document = toWebLocalFrameImpl(frame)->frame()->document(); 142 Document* document = toWebLocalFrameImpl(frame)->frame()->document();
160 143
161 RefPtr<SharedBuffer> buffer = SharedBuffer::create(); 144 RefPtr<SharedBuffer> buffer = SharedBuffer::create();
162 MHTMLArchive::generateMHTMLHeader( 145 MHTMLArchive::generateMHTMLHeader(
163 boundary, document->title(), document->suggestedMIMEType(), 146 boundary, document->title(), document->suggestedMIMEType(),
164 *buffer); 147 *buffer);
165 return buffer.release(); 148 return buffer.release();
166 } 149 }
167 150
168 WebData WebPageSerializer::generateMHTMLParts( 151 WebData WebPageSerializer::generateMHTMLParts(
169 const WebString& boundary, WebLocalFrame* webFrame, bool useBinaryEncoding, 152 const WebString& boundary, WebLocalFrame* webFrame, bool useBinaryEncoding,
170 const WebVector<std::pair<WebFrame*, WebString>>& webFrameToContentID) 153 MHTMLPartsGenerationDelegate* webDelegate)
171 { 154 {
155 ASSERT(webFrame);
156 ASSERT(webDelegate);
157
172 // Translate arguments from public to internal blink APIs. 158 // Translate arguments from public to internal blink APIs.
173 LocalFrame* frame = toWebLocalFrameImpl(webFrame)->frame(); 159 LocalFrame* frame = toWebLocalFrameImpl(webFrame)->frame();
174 MHTMLArchive::EncodingPolicy encodingPolicy = useBinaryEncoding 160 MHTMLArchive::EncodingPolicy encodingPolicy = useBinaryEncoding
175 ? MHTMLArchive::EncodingPolicy::UseBinaryEncoding 161 ? MHTMLArchive::EncodingPolicy::UseBinaryEncoding
176 : MHTMLArchive::EncodingPolicy::UseDefaultEncoding; 162 : MHTMLArchive::EncodingPolicy::UseDefaultEncoding;
177 ContentIDMap frameToContentID = createFrameToContentIDMap(webFrameToContentI D);
178 163
179 // Serialize. 164 // Serialize.
180 Vector<SerializedResource> resources; 165 Vector<SerializedResource> resources;
181 MHTMLPageSerializerDelegate delegate(frameToContentID); 166 MHTMLPageSerializerDelegate coreDelegate(*webDelegate);
182 PageSerializer serializer(resources, &delegate); 167 PageSerializer serializer(resources, coreDelegate);
183 serializer.serializeFrame(*frame); 168 serializer.serializeFrame(*frame);
184 169
170 // Get Content-ID for the frame being serialized.
171 String frameContentID = webDelegate->getContentID(*webFrame);
172 ASSERT(!frameContentID.isEmpty());
173
185 // Encode serializer's output as MHTML. 174 // Encode serializer's output as MHTML.
186 RefPtr<SharedBuffer> output = SharedBuffer::create(); 175 RefPtr<SharedBuffer> output = SharedBuffer::create();
187 bool isFirstResource = true; 176 bool isFirstResource = true;
188 for (const SerializedResource& resource : resources) { 177 for (const SerializedResource& resource : resources) {
189 // Frame is the 1st resource (see PageSerializer::serializeFrame doc 178 // Frame is the 1st resource (see PageSerializer::serializeFrame doc
190 // comment). Frames need a Content-ID header. 179 // comment). Frames get a Content-ID header.
191 String contentID = isFirstResource ? frameToContentID.get(frame) : Strin g(); 180 String contentID = isFirstResource ? frameContentID : String();
192 181
193 MHTMLArchive::generateMHTMLPart( 182 MHTMLArchive::generateMHTMLPart(
194 boundary, contentID, encodingPolicy, resource, *output); 183 boundary, contentID, encodingPolicy, resource, *output);
195 184
196 isFirstResource = false; 185 isFirstResource = false;
197 } 186 }
198 return output.release(); 187 return output.release();
199 } 188 }
200 189
201 WebData WebPageSerializer::generateMHTMLFooter(const WebString& boundary) 190 WebData WebPageSerializer::generateMHTMLFooter(const WebString& boundary)
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
233 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar get) 222 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar get)
234 { 223 {
235 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|. 224 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|.
236 if (baseTarget.isEmpty()) 225 if (baseTarget.isEmpty())
237 return String("<base href=\".\">"); 226 return String("<base href=\".\">");
238 String baseString = "<base href=\".\" target=\"" + static_cast<const String& >(baseTarget) + "\">"; 227 String baseString = "<base href=\".\" target=\"" + static_cast<const String& >(baseTarget) + "\">";
239 return baseString; 228 return baseString;
240 } 229 }
241 230
242 } // namespace blink 231 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698