Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(226)

Side by Side Diff: third_party/WebKit/Source/web/WebPageSerializer.cpp

Issue 1441553002: Generating CIDs in Blink during MHTML serialization. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-per-frame-page-serializer-only
Patch Set: Replace list Replaced initializer lists with array initialization. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2009 Google Inc. All rights reserved. 2 * Copyright (C) 2009 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 27 matching lines...) Expand all
38 #include "core/html/HTMLAllCollection.h" 38 #include "core/html/HTMLAllCollection.h"
39 #include "core/html/HTMLFrameElementBase.h" 39 #include "core/html/HTMLFrameElementBase.h"
40 #include "core/html/HTMLFrameOwnerElement.h" 40 #include "core/html/HTMLFrameOwnerElement.h"
41 #include "core/html/HTMLInputElement.h" 41 #include "core/html/HTMLInputElement.h"
42 #include "core/html/HTMLTableElement.h" 42 #include "core/html/HTMLTableElement.h"
43 #include "core/loader/DocumentLoader.h" 43 #include "core/loader/DocumentLoader.h"
44 #include "core/page/Page.h" 44 #include "core/page/Page.h"
45 #include "core/page/PageSerializer.h" 45 #include "core/page/PageSerializer.h"
46 #include "platform/SerializedResource.h" 46 #include "platform/SerializedResource.h"
47 #include "platform/mhtml/MHTMLArchive.h" 47 #include "platform/mhtml/MHTMLArchive.h"
48 #include "platform/mhtml/MHTMLParser.h"
48 #include "platform/weborigin/KURL.h" 49 #include "platform/weborigin/KURL.h"
49 #include "public/platform/WebCString.h" 50 #include "public/platform/WebCString.h"
50 #include "public/platform/WebString.h" 51 #include "public/platform/WebString.h"
51 #include "public/platform/WebURL.h" 52 #include "public/platform/WebURL.h"
52 #include "public/platform/WebVector.h" 53 #include "public/platform/WebVector.h"
53 #include "public/web/WebFrame.h" 54 #include "public/web/WebFrame.h"
54 #include "public/web/WebPageSerializerClient.h" 55 #include "public/web/WebPageSerializerClient.h"
55 #include "public/web/WebView.h" 56 #include "public/web/WebView.h"
56 #include "web/WebLocalFrameImpl.h" 57 #include "web/WebLocalFrameImpl.h"
57 #include "web/WebPageSerializerImpl.h" 58 #include "web/WebPageSerializerImpl.h"
58 #include "web/WebViewImpl.h" 59 #include "web/WebViewImpl.h"
60 #include "wtf/Assertions.h"
61 #include "wtf/HashMap.h"
62 #include "wtf/Noncopyable.h"
59 #include "wtf/Vector.h" 63 #include "wtf/Vector.h"
60 #include "wtf/text/StringConcatenate.h" 64 #include "wtf/text/StringConcatenate.h"
61 65
62 namespace blink { 66 namespace blink {
63 67
64 namespace { 68 namespace {
65 69
66 class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate { 70 using ContentIDMap = WillBeHeapHashMap<RawPtrWillBeMember<Frame>, String>;
71
72 class MHTMLPageSerializerDelegate final :
73 public NoBaseWillBeGarbageCollected<MHTMLPageSerializerDelegate>,
74 public PageSerializer::Delegate {
75 WTF_MAKE_NONCOPYABLE(MHTMLPageSerializerDelegate);
67 public: 76 public:
68 ~MHTMLPageSerializerDelegate() override; 77 MHTMLPageSerializerDelegate(const ContentIDMap& frameToContentID);
69 bool shouldIgnoreAttribute(const Attribute&) override; 78 bool shouldIgnoreAttribute(const Attribute&) override;
79 bool rewriteLink(const Element&, String& rewrittenLink) override;
80
81 #if ENABLE(OILPAN)
82 void trace(Visitor* visitor) { visitor->trace(m_frameToContentID); }
83 #endif
84
85 private:
86 const ContentIDMap& m_frameToContentID;
70 }; 87 };
71 88
72 89 MHTMLPageSerializerDelegate::MHTMLPageSerializerDelegate(
73 MHTMLPageSerializerDelegate::~MHTMLPageSerializerDelegate() 90 const ContentIDMap& frameToContentID)
91 : m_frameToContentID(frameToContentID)
74 { 92 {
75 } 93 }
76 94
77 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu te) 95 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu te)
78 { 96 {
79 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i mages, as only the value of src 97 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i mages, as only the value of src
80 // is pulled into the archive. Discarding srcset prevents the problem. Long term we should make sure to MHTML 98 // is pulled into the archive. Discarding srcset prevents the problem. Long term we should make sure to MHTML
81 // plays nicely with srcset. 99 // plays nicely with srcset.
82 return attribute.localName() == HTMLNames::srcsetAttr; 100 return attribute.localName() == HTMLNames::srcsetAttr;
83 } 101 }
84 102
85 } // namespace 103 bool MHTMLPageSerializerDelegate::rewriteLink(
104 const Element& element,
105 String& rewrittenLink)
106 {
107 if (!element.isFrameOwnerElement())
108 return false;
86 109
87 static PassRefPtr<SharedBuffer> serializePageToMHTML(Page* page, MHTMLArchive::E ncodingPolicy encodingPolicy) 110 auto* frameOwnerElement = toHTMLFrameOwnerElement(&element);
111 Frame* frame = frameOwnerElement->contentFrame();
112 if (!frame)
113 return false;
114
115 KURL cidURI = MHTMLParser::convertContentIDToURI(m_frameToContentID.get(fram e));
116 ASSERT(cidURI.isValid());
117
118 if (isHTMLFrameElementBase(&element)) {
119 rewrittenLink = cidURI.string();
120 return true;
121 }
122
123 if (isHTMLObjectElement(&element)) {
124 Document* doc = frameOwnerElement->contentDocument();
125 bool isHandledBySerializer = doc->isHTMLDocument()
126 || doc->isXHTMLDocument() || doc->isImageDocument();
127 if (isHandledBySerializer) {
128 rewrittenLink = cidURI.string();
129 return true;
130 }
131 }
132
133 return false;
134 }
135
136 ContentIDMap generateFrameContentIDs(Page* page)
137 {
138 ContentIDMap frameToContentID;
139 int frameID = 0;
140 for (Frame* frame = page->mainFrame(); frame; frame = frame->tree().traverse Next()) {
141 // TODO(lukasza): Move cid generation to the browser + use base/guid.h
142 // (see the draft at crrev.com/1386873003).
143 StringBuilder contentIDBuilder;
144 contentIDBuilder.appendLiteral("<frame");
145 contentIDBuilder.appendNumber(frameID++);
146 contentIDBuilder.appendLiteral("@mhtml.blink>");
147
148 frameToContentID.add(frame, contentIDBuilder.toString());
149 }
150 return frameToContentID;
151 }
152
153 PassRefPtr<SharedBuffer> serializePageToMHTML(Page* page, MHTMLArchive::Encoding Policy encodingPolicy)
88 { 154 {
89 Vector<SerializedResource> resources; 155 Vector<SerializedResource> resources;
90 PageSerializer serializer(&resources, adoptPtr(new MHTMLPageSerializerDelega te)); 156 ContentIDMap frameToContentID = generateFrameContentIDs(page);
157 MHTMLPageSerializerDelegate delegate(frameToContentID);
158 PageSerializer serializer(resources, &delegate);
91 159
92 RefPtr<SharedBuffer> output = SharedBuffer::create(); 160 RefPtr<SharedBuffer> output = SharedBuffer::create();
93 String boundary = MHTMLArchive::generateMHTMLBoundary(); 161 String boundary = MHTMLArchive::generateMHTMLBoundary();
94 162
95 Document* document = page->deprecatedLocalMainFrame()->document(); 163 Document* document = page->deprecatedLocalMainFrame()->document();
96 MHTMLArchive::generateMHTMLHeader( 164 MHTMLArchive::generateMHTMLHeader(
97 boundary, document->title(), document->suggestedMIMEType(), *output); 165 boundary, document->title(), document->suggestedMIMEType(), *output);
98 166
99 for (Frame* frame = page->deprecatedLocalMainFrame(); frame; frame = frame-> tree().traverseNext()) { 167 for (Frame* frame = page->deprecatedLocalMainFrame(); frame; frame = frame-> tree().traverseNext()) {
100 // TODO(lukasza): This causes incomplete MHTML for OOPIFs. 168 // TODO(lukasza): This causes incomplete MHTML for OOPIFs.
101 // (crbug.com/538766) 169 // (crbug.com/538766)
102 if (!frame->isLocalFrame()) 170 if (!frame->isLocalFrame())
103 continue; 171 continue;
104 172
105 resources.clear(); 173 resources.clear();
106 serializer.serializeFrame(*toLocalFrame(frame)); 174 serializer.serializeFrame(*toLocalFrame(frame));
107 175
108 for (const auto& resource : resources) { 176 bool isFirstResource = true;
177 for (const SerializedResource& resource : resources) {
178 // Frame is the 1st resource (see PageSerializer::serializeFrame doc
179 // comment). Frames need a Content-ID header.
180 String contentID = isFirstResource ? frameToContentID.get(frame) : S tring();
181
109 MHTMLArchive::generateMHTMLPart( 182 MHTMLArchive::generateMHTMLPart(
110 boundary, encodingPolicy, resource, *output); 183 boundary, contentID, encodingPolicy, resource, *output);
184
185 isFirstResource = false;
111 } 186 }
112 } 187 }
113 188
114 MHTMLArchive::generateMHTMLFooter(boundary, *output); 189 MHTMLArchive::generateMHTMLFooter(boundary, *output);
115 return output.release(); 190 return output.release();
116 } 191 }
117 192
193 } // namespace
194
118 WebCString WebPageSerializer::serializeToMHTML(WebView* view) 195 WebCString WebPageSerializer::serializeToMHTML(WebView* view)
119 { 196 {
120 RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page( ), MHTMLArchive::UseDefaultEncoding); 197 RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page( ), MHTMLArchive::UseDefaultEncoding);
121 // FIXME: we are copying all the data here. Idealy we would have a WebShared Data(). 198 // FIXME: we are copying all the data here. Idealy we would have a WebShared Data().
122 return WebCString(mhtml->data(), mhtml->size()); 199 return WebCString(mhtml->data(), mhtml->size());
123 } 200 }
124 201
125 WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view) 202 WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view)
126 { 203 {
127 RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page( ), MHTMLArchive::UseBinaryEncoding); 204 RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page( ), MHTMLArchive::UseBinaryEncoding);
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
159 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar get) 236 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar get)
160 { 237 {
161 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|. 238 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|.
162 if (baseTarget.isEmpty()) 239 if (baseTarget.isEmpty())
163 return String("<base href=\".\">"); 240 return String("<base href=\".\">");
164 String baseString = "<base href=\".\" target=\"" + static_cast<const String& >(baseTarget) + "\">"; 241 String baseString = "<base href=\".\" target=\"" + static_cast<const String& >(baseTarget) + "\">";
165 return baseString; 242 return baseString;
166 } 243 }
167 244
168 } // namespace blink 245 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/mhtml/MHTMLParser.cpp ('k') | third_party/WebKit/Source/web/tests/MHTMLTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698