Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(81)

Side by Side Diff: third_party/WebKit/Source/web/WebPageSerializer.cpp

Issue 1441553002: Generating CIDs in Blink during MHTML serialization. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-per-frame-page-serializer-only
Patch Set: Addressed more CR feedback from yosin@. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2009 Google Inc. All rights reserved. 2 * Copyright (C) 2009 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 27 matching lines...) Expand all
38 #include "core/html/HTMLAllCollection.h" 38 #include "core/html/HTMLAllCollection.h"
39 #include "core/html/HTMLFrameElementBase.h" 39 #include "core/html/HTMLFrameElementBase.h"
40 #include "core/html/HTMLFrameOwnerElement.h" 40 #include "core/html/HTMLFrameOwnerElement.h"
41 #include "core/html/HTMLInputElement.h" 41 #include "core/html/HTMLInputElement.h"
42 #include "core/html/HTMLTableElement.h" 42 #include "core/html/HTMLTableElement.h"
43 #include "core/loader/DocumentLoader.h" 43 #include "core/loader/DocumentLoader.h"
44 #include "core/page/Page.h" 44 #include "core/page/Page.h"
45 #include "core/page/PageSerializer.h" 45 #include "core/page/PageSerializer.h"
46 #include "platform/SerializedResource.h" 46 #include "platform/SerializedResource.h"
47 #include "platform/mhtml/MHTMLArchive.h" 47 #include "platform/mhtml/MHTMLArchive.h"
48 #include "platform/mhtml/MHTMLParser.h"
48 #include "platform/weborigin/KURL.h" 49 #include "platform/weborigin/KURL.h"
49 #include "public/platform/WebCString.h" 50 #include "public/platform/WebCString.h"
50 #include "public/platform/WebString.h" 51 #include "public/platform/WebString.h"
51 #include "public/platform/WebURL.h" 52 #include "public/platform/WebURL.h"
52 #include "public/platform/WebVector.h" 53 #include "public/platform/WebVector.h"
53 #include "public/web/WebFrame.h" 54 #include "public/web/WebFrame.h"
54 #include "public/web/WebPageSerializerClient.h" 55 #include "public/web/WebPageSerializerClient.h"
55 #include "public/web/WebView.h" 56 #include "public/web/WebView.h"
56 #include "web/WebLocalFrameImpl.h" 57 #include "web/WebLocalFrameImpl.h"
57 #include "web/WebPageSerializerImpl.h" 58 #include "web/WebPageSerializerImpl.h"
58 #include "web/WebViewImpl.h" 59 #include "web/WebViewImpl.h"
60 #include "wtf/Assertions.h"
61 #include "wtf/HashMap.h"
59 #include "wtf/Vector.h" 62 #include "wtf/Vector.h"
60 #include "wtf/text/StringConcatenate.h" 63 #include "wtf/text/StringConcatenate.h"
61 64
62 namespace blink { 65 namespace blink {
63 66
64 namespace { 67 namespace {
65 68
66 class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate { 69 class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate {
67 public: 70 public:
68 ~MHTMLPageSerializerDelegate() override; 71 MHTMLPageSerializerDelegate(HashMap<Frame*, String>* frameToContentID);
69 bool shouldIgnoreAttribute(const Attribute&) override; 72 bool shouldIgnoreAttribute(const Attribute&) override;
73 String rewriteLink(const Element&) override;
74 private:
75 HashMap<Frame*, String>* m_frameToContentID;
70 }; 76 };
71 77
72 78 MHTMLPageSerializerDelegate::MHTMLPageSerializerDelegate(
73 MHTMLPageSerializerDelegate::~MHTMLPageSerializerDelegate() 79 HashMap<Frame*, String>* frameToContentID)
80 : m_frameToContentID(frameToContentID)
74 { 81 {
75 } 82 }
76 83
77 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu te) 84 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu te)
78 { 85 {
79 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i mages, as only the value of src 86 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i mages, as only the value of src
80 // is pulled into the archive. Discarding srcset prevents the problem. Long term we should make sure to MHTML 87 // is pulled into the archive. Discarding srcset prevents the problem. Long term we should make sure to MHTML
81 // plays nicely with srcset. 88 // plays nicely with srcset.
82 return attribute.localName() == HTMLNames::srcsetAttr; 89 return attribute.localName() == HTMLNames::srcsetAttr;
83 } 90 }
84 91
92 String MHTMLPageSerializerDelegate::rewriteLink(const Element& element)
93 {
94 if (!element.isFrameOwnerElement())
95 return String();
96
97 auto* frameOwnerElement = toHTMLFrameOwnerElement(&element);
98 Frame* frame = frameOwnerElement->contentFrame();
99 if (!frame)
100 return String();
101
102 KURL cidURI = MHTMLParser::convertContentIDToURI(m_frameToContentID->get(fra me));
103 ASSERT(cidURI.isValid());
104
105 if (isHTMLFrameElementBase(&element))
106 return cidURI.string();
107
108 if (isHTMLObjectElement(&element)) {
109 Document* doc = frameOwnerElement->contentDocument();
110 bool isHandledBySerializer = doc->isHTMLDocument()
111 || doc->isXHTMLDocument() || doc->isImageDocument();
112 if (isHandledBySerializer)
113 return cidURI.string();
114 }
115
116 return String();
117 }
118
85 } // namespace 119 } // namespace
86 120
121 static HashMap<Frame*, String> generateFrameContentIDs(Page* page)
122 {
123 HashMap<Frame*, String> frameToContentID;
124 int frameID = 0;
125 for (Frame* frame = page->mainFrame(); frame; frame = frame->tree().traverse Next()) {
126 // TODO(lukasza): Move cid generation to the browser + use base/guid.h
127 // (see the draft at crrev.com/1386873003).
128 StringBuilder contentIDBuilder;
129 contentIDBuilder.appendLiteral("<frame");
dcheng 2015/11/25 18:41:53 How about just leaving off the angle brackets here
Łukasz Anforowicz 2015/11/25 19:16:49 convertContentIDToURI strips the angle brackets, b
130 contentIDBuilder.appendNumber(frameID++);
131 contentIDBuilder.appendLiteral("@mhtml.blink>");
132
133 frameToContentID.add(frame, contentIDBuilder.toString());
134 }
135 return frameToContentID;
136 }
137
87 static PassRefPtr<SharedBuffer> serializePageToMHTML(Page* page, MHTMLArchive::E ncodingPolicy encodingPolicy) 138 static PassRefPtr<SharedBuffer> serializePageToMHTML(Page* page, MHTMLArchive::E ncodingPolicy encodingPolicy)
88 { 139 {
89 Vector<SerializedResource> resources; 140 Vector<SerializedResource> resources;
90 PageSerializer serializer(&resources, adoptPtr(new MHTMLPageSerializerDelega te)); 141 HashMap<Frame*, String> frameToContentID = generateFrameContentIDs(page);
142 MHTMLPageSerializerDelegate delegate(&frameToContentID);
143 PageSerializer serializer(&resources, &delegate);
91 144
92 RefPtr<SharedBuffer> output = SharedBuffer::create(); 145 RefPtr<SharedBuffer> output = SharedBuffer::create();
93 String boundary = MHTMLArchive::generateMHTMLBoundary(); 146 String boundary = MHTMLArchive::generateMHTMLBoundary();
94 147
95 Document* document = page->deprecatedLocalMainFrame()->document(); 148 Document* document = page->deprecatedLocalMainFrame()->document();
96 MHTMLArchive::generateMHTMLHeader( 149 MHTMLArchive::generateMHTMLHeader(
97 boundary, document->title(), document->suggestedMIMEType(), *output); 150 boundary, document->title(), document->suggestedMIMEType(), *output);
98 151
99 for (Frame* frame = page->deprecatedLocalMainFrame(); frame; frame = frame-> tree().traverseNext()) { 152 for (Frame* frame = page->deprecatedLocalMainFrame(); frame; frame = frame-> tree().traverseNext()) {
100 // TODO(lukasza): This causes incomplete MHTML for OOPIFs. 153 // TODO(lukasza): This causes incomplete MHTML for OOPIFs.
101 // (crbug.com/538766) 154 // (crbug.com/538766)
102 if (!frame->isLocalFrame()) 155 if (!frame->isLocalFrame())
103 continue; 156 continue;
104 157
105 resources.clear(); 158 resources.clear();
106 serializer.serializeFrame(*toLocalFrame(frame)); 159 serializer.serializeFrame(*toLocalFrame(frame));
107 160
108 for (const auto& resource : resources) { 161 bool isFirstResource = true;
162 for (const SerializedResource& resource : resources) {
163 // Frame is the 1st resource (see PageSerializer::serializeFrame doc
164 // comment). Frames need a Content-ID header.
165 String contentID = isFirstResource ? frameToContentID.get(frame) : S tring();
166
109 MHTMLArchive::generateMHTMLPart( 167 MHTMLArchive::generateMHTMLPart(
110 boundary, encodingPolicy, resource, *output); 168 boundary, contentID, encodingPolicy, resource, *output);
169
170 isFirstResource = false;
111 } 171 }
112 } 172 }
113 173
114 MHTMLArchive::generateMHTMLFooter(boundary, *output); 174 MHTMLArchive::generateMHTMLFooter(boundary, *output);
115 return output.release(); 175 return output.release();
116 } 176 }
117 177
118 WebCString WebPageSerializer::serializeToMHTML(WebView* view) 178 WebCString WebPageSerializer::serializeToMHTML(WebView* view)
119 { 179 {
120 RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page( ), MHTMLArchive::UseDefaultEncoding); 180 RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page( ), MHTMLArchive::UseDefaultEncoding);
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
159 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar get) 219 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar get)
160 { 220 {
161 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|. 221 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|.
162 if (baseTarget.isEmpty()) 222 if (baseTarget.isEmpty())
163 return String("<base href=\".\">"); 223 return String("<base href=\".\">");
164 String baseString = "<base href=\".\" target=\"" + static_cast<const String& >(baseTarget) + "\">"; 224 String baseString = "<base href=\".\" target=\"" + static_cast<const String& >(baseTarget) + "\">";
165 return baseString; 225 return baseString;
166 } 226 }
167 227
168 } // namespace blink 228 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/mhtml/MHTMLParser.cpp ('k') | third_party/WebKit/Source/web/tests/MHTMLTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698