Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(240)

Side by Side Diff: Source/core/page/PageSerializer.cpp

Issue 1177733002: Merge page serializers [11/12] (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Adjust TODO comment style Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Source/core/page/PageSerializer.h ('k') | Source/web/tests/PageSerializerTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved. 2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
92 { 92 {
93 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isC harsetSpecifyingNode(element); 93 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isC harsetSpecifyingNode(element);
94 } 94 }
95 95
96 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerEleme nt& frameOwner) 96 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerEleme nt& frameOwner)
97 { 97 {
98 // FIXME: We should support all frame owners including applets. 98 // FIXME: We should support all frame owners including applets.
99 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::sr cAttr; 99 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::sr cAttr;
100 } 100 }
101 101
102 class SerializerMarkupAccumulator final : public MarkupAccumulator { 102 class SerializerMarkupAccumulator : public MarkupAccumulator {
103 STACK_ALLOCATED(); 103 STACK_ALLOCATED();
104 public: 104 public:
105 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect or<RawPtrWillBeMember<Node>>&); 105 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect or<RawPtrWillBeMember<Node>>&);
106 virtual ~SerializerMarkupAccumulator(); 106 virtual ~SerializerMarkupAccumulator();
107 107
108 protected: 108 protected:
109 virtual void appendText(StringBuilder& out, Text&) override; 109 virtual void appendText(StringBuilder& out, Text&) override;
110 virtual bool shouldIgnoreAttribute(const Attribute&) override; 110 virtual bool shouldIgnoreAttribute(const Attribute&) override;
111 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) overri de; 111 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) overri de;
112 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Name spaces*) override; 112 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Name spaces*) override;
113 virtual void appendStartTag(Node&, Namespaces* = nullptr) override; 113 virtual void appendStartTag(Node&, Namespaces* = nullptr) override;
114 virtual void appendEndTag(const Element&) override; 114 virtual void appendEndTag(const Element&) override;
115 115
116 const Document& document();
117
116 private: 118 private:
117 PageSerializer* m_serializer; 119 PageSerializer* m_serializer;
118 RawPtrWillBeMember<const Document> m_document; 120 RawPtrWillBeMember<const Document> m_document;
119 121
120 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document 122 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document
121 // included into serialized text then extracts image, object, etc. The size 123 // included into serialized text then extracts image, object, etc. The size
122 // of this vector isn't small for large document. It is better to use 124 // of this vector isn't small for large document. It is better to use
123 // callback like functionality. 125 // callback like functionality.
124 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes; 126 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes;
125 }; 127 };
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
197 MarkupAccumulator::appendStartTag(node, namespaces); 199 MarkupAccumulator::appendStartTag(node, namespaces);
198 m_nodes.append(&node); 200 m_nodes.append(&node);
199 } 201 }
200 202
201 void SerializerMarkupAccumulator::appendEndTag(const Element& element) 203 void SerializerMarkupAccumulator::appendEndTag(const Element& element)
202 { 204 {
203 if (!shouldIgnoreElement(element)) 205 if (!shouldIgnoreElement(element))
204 MarkupAccumulator::appendEndTag(element); 206 MarkupAccumulator::appendEndTag(element);
205 } 207 }
206 208
209 const Document& SerializerMarkupAccumulator::document()
210 {
211 return *m_document;
212 }
213
214 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS
215 // documents which leads to bugs like <https://crbug.com/251898>. Not being
216 // able to rewrite URLs inside CSS documents means that resources imported from
217 // url(...) statements in CSS might not work when rewriting links for the
218 // "Webpage, Complete" method of saving a page. It will take some work but it
219 // needs to be done if we want to continue to support non-MHTML saved pages.
220 //
221 // Once that is fixed it would make sense to make link rewriting a bit more
222 // general. A new method, String& rewriteURL(String&) or similar, could be added
223 // to PageSerializer.Delegate that would allow clients to control this. Some of
224 // the change link logic could be moved back to WebPageSerializer.
225 //
226 // The remaining code in LinkChangeSerializerMarkupAccumulator could probably
227 // be merged back into SerializerMarkupAccumulator with additional methods in
228 // PageSerializer.Delegate to control MOTW and Base tag rewrite.
229 class LinkChangeSerializerMarkupAccumulator final : public SerializerMarkupAccum ulator {
230 STACK_ALLOCATED();
231 public:
232 LinkChangeSerializerMarkupAccumulator(PageSerializer*, const Document&, Will BeHeapVector<RawPtrWillBeMember<Node>>&, HashMap<String, String>& rewriteURLs, c onst String& rewriteFolder);
233
234 private:
235 void appendElement(StringBuilder&, Element&, Namespaces*) override;
236 void appendAttribute(StringBuilder&, const Element&, const Attribute&, Names paces*) override;
237
238 // m_rewriteURLs include all pairs of local resource paths and corresponding original links.
239 HashMap<String, String> m_rewriteURLs;
240 String m_rewriteFolder;
241 };
242
243 LinkChangeSerializerMarkupAccumulator::LinkChangeSerializerMarkupAccumulator(Pag eSerializer* serializer, const Document& document, WillBeHeapVector<RawPtrWillBe Member<Node>>& nodes, HashMap<String, String>& rewriteURLs, const String& rewrit eFolder)
244 : SerializerMarkupAccumulator(serializer, document, nodes)
245 , m_rewriteURLs(rewriteURLs)
246 , m_rewriteFolder(rewriteFolder)
247 {
248 }
249
250 void LinkChangeSerializerMarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces)
251 {
252 if (element.hasTagName(HTMLNames::htmlTag)) {
253 // Add MOTW (Mark of the Web) declaration before html tag.
254 // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
255 result.append('\n');
256 MarkupFormatter::appendComment(result, String::format(" saved from url=( %04d)%s ",
257 static_cast<int>(document().url().string().utf8().length()),
258 document().url().string().utf8().data()));
259 result.append('\n');
260 }
261
262 if (element.hasTagName(HTMLNames::baseTag)) {
263 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an element like this, without special cases for XHTML
264 // Append a new base tag declaration.
265 result.appendLiteral("<base href=\".\"");
266 if (!document().baseTarget().isEmpty()) {
267 result.appendLiteral(" target=\"");
268 MarkupFormatter::appendAttributeValue(result, document().baseTarget( ), document().isHTMLDocument());
269 result.append('"');
270 }
271 if (document().isXHTMLDocument())
272 result.appendLiteral(" />");
273 else
274 result.appendLiteral(">");
275 } else {
276 SerializerMarkupAccumulator::appendElement(result, element, namespaces);
277 }
278 }
279
280 void LinkChangeSerializerMarkupAccumulator::appendAttribute(StringBuilder& resul t, const Element& element, const Attribute& attribute, Namespaces* namespaces)
281 {
282 if (!m_rewriteURLs.isEmpty() && element.isURLAttribute(attribute)) {
283
284 String completeURL = document().completeURL(attribute.value());
285
286 if (m_rewriteURLs.contains(completeURL)) {
287 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an attribute like this.
288 result.append(' ');
289 result.append(attribute.name().toString());
290 result.appendLiteral("=\"");
291 if (!m_rewriteFolder.isEmpty())
292 MarkupFormatter::appendAttributeValue(result, m_rewriteFolder + "/", document().isHTMLDocument());
293 MarkupFormatter::appendAttributeValue(result, m_rewriteURLs.get(comp leteURL), document().isHTMLDocument());
294 result.appendLiteral("\"");
295 return;
296 }
297 }
298 MarkupAccumulator::appendAttribute(result, element, attribute, namespaces);
299 }
300
301
207 PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr <Delegate> delegate) 302 PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr <Delegate> delegate)
208 : m_resources(resources) 303 : m_resources(resources)
209 , m_blankFrameCounter(0) 304 , m_blankFrameCounter(0)
210 , m_delegate(delegate) 305 , m_delegate(delegate)
211 { 306 {
212 } 307 }
213 308
214 void PageSerializer::serialize(Page* page) 309 void PageSerializer::serialize(Page* page)
215 { 310 {
216 serializeFrame(page->deprecatedLocalMainFrame()); 311 serializeFrame(page->deprecatedLocalMainFrame());
(...skipping 18 matching lines...) Expand all
235 } 330 }
236 331
237 // If frame is an image document, add the image and don't continue 332 // If frame is an image document, add the image and don't continue
238 if (document.isImageDocument()) { 333 if (document.isImageDocument()) {
239 ImageDocument& imageDocument = toImageDocument(document); 334 ImageDocument& imageDocument = toImageDocument(document);
240 addImageToResources(imageDocument.cachedImage(), imageDocument.imageElem ent()->layoutObject(), url); 335 addImageToResources(imageDocument.cachedImage(), imageDocument.imageElem ent()->layoutObject(), url);
241 return; 336 return;
242 } 337 }
243 338
244 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; 339 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes;
245 SerializerMarkupAccumulator accumulator(this, document, serializedNodes); 340 String text;
246 String text = serializeNodes<EditingStrategy>(accumulator, document, Include Node); 341 if (!m_rewriteURLs.isEmpty()) {
342 LinkChangeSerializerMarkupAccumulator accumulator(this, document, serial izedNodes, m_rewriteURLs, m_rewriteFolder);
343 text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNod e);
344 } else {
345 SerializerMarkupAccumulator accumulator(this, document, serializedNodes) ;
346 text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNod e);
347 }
348
247 CString frameHTML = document.encoding().normalizeAndEncode(text, WTF::Entiti esForUnencodables); 349 CString frameHTML = document.encoding().normalizeAndEncode(text, WTF::Entiti esForUnencodables);
248 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh aredBuffer::create(frameHTML.data(), frameHTML.length()))); 350 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh aredBuffer::create(frameHTML.data(), frameHTML.length())));
249 m_resourceURLs.add(url); 351 m_resourceURLs.add(url);
250 352
251 for (Node* node: serializedNodes) { 353 for (Node* node: serializedNodes) {
252 ASSERT(node); 354 ASSERT(node);
253 if (!node->isElementNode()) 355 if (!node->isElementNode())
254 continue; 356 continue;
255 357
256 Element& element = toElement(*node); 358 Element& element = toElement(*node);
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
441 } 543 }
442 544
443 addFontToResources(fontFaceSrcValue->fetch(&document)); 545 addFontToResources(fontFaceSrcValue->fetch(&document));
444 } else if (cssValue->isValueList()) { 546 } else if (cssValue->isValueList()) {
445 CSSValueList* cssValueList = toCSSValueList(cssValue); 547 CSSValueList* cssValueList = toCSSValueList(cssValue);
446 for (unsigned i = 0; i < cssValueList->length(); i++) 548 for (unsigned i = 0; i < cssValueList->length(); i++)
447 retrieveResourcesForCSSValue(cssValueList->item(i), document); 549 retrieveResourcesForCSSValue(cssValueList->item(i), document);
448 } 550 }
449 } 551 }
450 552
553 void PageSerializer::registerRewriteURL(const String& from, const String& to)
554 {
555 m_rewriteURLs.set(from, to);
556 }
557
558 void PageSerializer::setRewriteURLFolder(const String& rewriteFolder)
559 {
560 m_rewriteFolder = rewriteFolder;
561 }
562
451 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame) 563 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame)
452 { 564 {
453 BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame); 565 BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame);
454 if (iter != m_blankFrameURLs.end()) 566 if (iter != m_blankFrameURLs.end())
455 return iter->value; 567 return iter->value;
456 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++); 568 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++);
457 KURL fakeURL(ParsedURLString, url); 569 KURL fakeURL(ParsedURLString, url);
458 m_blankFrameURLs.add(frame, fakeURL); 570 m_blankFrameURLs.add(frame, fakeURL);
459 571
460 return fakeURL; 572 return fakeURL;
461 } 573 }
462 574
463 PageSerializer::Delegate* PageSerializer::delegate() 575 PageSerializer::Delegate* PageSerializer::delegate()
464 { 576 {
465 return m_delegate.get(); 577 return m_delegate.get();
466 } 578 }
467 579
468 } // namespace blink 580 } // namespace blink
OLDNEW
« no previous file with comments | « Source/core/page/PageSerializer.h ('k') | Source/web/tests/PageSerializerTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698