Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(609)

Side by Side Diff: Source/core/page/PageSerializer.cpp

Issue 1177733002: Merge page serializers [11/12] (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Add TODO for future coding Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Source/core/page/PageSerializer.h ('k') | Source/web/tests/PageSerializerTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved. 2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
92 { 92 {
93 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isC harsetSpecifyingNode(element); 93 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isC harsetSpecifyingNode(element);
94 } 94 }
95 95
96 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerEleme nt& frameOwner) 96 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerEleme nt& frameOwner)
97 { 97 {
98 // FIXME: We should support all frame owners including applets. 98 // FIXME: We should support all frame owners including applets.
99 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::sr cAttr; 99 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::sr cAttr;
100 } 100 }
101 101
102 class SerializerMarkupAccumulator final : public MarkupAccumulator { 102 class SerializerMarkupAccumulator : public MarkupAccumulator {
103 STACK_ALLOCATED(); 103 STACK_ALLOCATED();
104 public: 104 public:
105 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect or<RawPtrWillBeMember<Node>>&); 105 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect or<RawPtrWillBeMember<Node>>&);
106 virtual ~SerializerMarkupAccumulator(); 106 virtual ~SerializerMarkupAccumulator();
107 107
108 protected: 108 protected:
109 virtual void appendText(StringBuilder& out, Text&) override; 109 virtual void appendText(StringBuilder& out, Text&) override;
110 virtual bool shouldIgnoreAttribute(const Attribute&) override; 110 virtual bool shouldIgnoreAttribute(const Attribute&) override;
111 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) overri de; 111 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) overri de;
112 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Name spaces*) override; 112 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Name spaces*) override;
113 virtual void appendStartTag(Node&, Namespaces* = nullptr) override; 113 virtual void appendStartTag(Node&, Namespaces* = nullptr) override;
114 virtual void appendEndTag(const Element&) override; 114 virtual void appendEndTag(const Element&) override;
115 115
116 const Document& document();
117
116 private: 118 private:
117 PageSerializer* m_serializer; 119 PageSerializer* m_serializer;
118 RawPtrWillBeMember<const Document> m_document; 120 RawPtrWillBeMember<const Document> m_document;
119 121
120 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document 122 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document
121 // included into serialized text then extracts image, object, etc. The size 123 // included into serialized text then extracts image, object, etc. The size
122 // of this vector isn't small for large document. It is better to use 124 // of this vector isn't small for large document. It is better to use
123 // callback like functionality. 125 // callback like functionality.
124 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes; 126 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes;
125 }; 127 };
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
197 MarkupAccumulator::appendStartTag(node, namespaces); 199 MarkupAccumulator::appendStartTag(node, namespaces);
198 m_nodes.append(&node); 200 m_nodes.append(&node);
199 } 201 }
200 202
201 void SerializerMarkupAccumulator::appendEndTag(const Element& element) 203 void SerializerMarkupAccumulator::appendEndTag(const Element& element)
202 { 204 {
203 if (!shouldIgnoreElement(element)) 205 if (!shouldIgnoreElement(element))
204 MarkupAccumulator::appendEndTag(element); 206 MarkupAccumulator::appendEndTag(element);
205 } 207 }
206 208
209 const Document& SerializerMarkupAccumulator::document()
210 {
211 return *m_document;
212 }
213
214 /* TODO(tiger): Right now there is no support for rewriting URLs inside CSS
philipj_slow 2015/07/01 12:26:12 http://google-styleguide.googlecode.com/svn/trunk/
215 * documents which leads to bugs like <https://crbug.com/251898>. Not being
216 * able to rewrite URLs inside CSS documents means that resources imported from
217 * url(...) statements in CSS might not work when rewriting links for the
218 * "Webpage, Complete" method of saving a page. It will take some work but it
219 * needs to be done if we want to continue to support non-MHTML saved pages.
220 *
221 * Once that is fixed it would make sense to make link rewriting a bit more
222 * general. A new method, String& rewriteURL(String&) or similar, could be added
223 * to PageSerializer.Delegate that would allow clients to control this. Some of
224 * the change link logic could be moved back to WebPageSerializer.
225 *
226 * The remaining code in LinkChangeSerializerMarkupAccumulator could probably
227 * be merged back into SerializerMarkupAccumulator with additional methods in
228 * PageSerializer.Delegate to control MOTW and Base tag rewrite.
229 */
230 class LinkChangeSerializerMarkupAccumulator final : public SerializerMarkupAccum ulator {
231 STACK_ALLOCATED();
232 public:
233 LinkChangeSerializerMarkupAccumulator(PageSerializer*, const Document&, Will BeHeapVector<RawPtrWillBeMember<Node>>&, HashMap<String, String>& rewriteURLs, c onst String& rewriteFolder);
234
235 private:
236 void appendElement(StringBuilder&, Element&, Namespaces*) override;
237 void appendAttribute(StringBuilder&, const Element&, const Attribute&, Names paces*) override;
238
239 // m_rewriteURLs include all pairs of local resource paths and corresponding original links.
240 HashMap<String, String> m_rewriteURLs;
241 String m_rewriteFolder;
242 };
243
244 LinkChangeSerializerMarkupAccumulator::LinkChangeSerializerMarkupAccumulator(Pag eSerializer* serializer, const Document& document, WillBeHeapVector<RawPtrWillBe Member<Node>>& nodes, HashMap<String, String>& rewriteURLs, const String& rewrit eFolder)
245 : SerializerMarkupAccumulator(serializer, document, nodes)
246 , m_rewriteURLs(rewriteURLs)
247 , m_rewriteFolder(rewriteFolder)
248 {
249 }
250
251 void LinkChangeSerializerMarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces)
252 {
253 if (element.hasTagName(HTMLNames::htmlTag)) {
254 // Add MOTW (Mark of the Web) declaration before html tag.
255 // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
256 result.append('\n');
257 MarkupFormatter::appendComment(result, String::format(" saved from url=( %04d)%s ",
258 static_cast<int>(document().url().string().utf8().length()),
259 document().url().string().utf8().data()));
260 result.append('\n');
261 }
262
263 if (element.hasTagName(HTMLNames::baseTag)) {
264 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an element like this, without special cases for XHTML
265 // Append a new base tag declaration.
266 result.appendLiteral("<base href=\".\"");
267 if (!document().baseTarget().isEmpty()) {
268 result.appendLiteral(" target=\"");
269 MarkupFormatter::appendAttributeValue(result, document().baseTarget( ), document().isHTMLDocument());
270 result.append('"');
271 }
272 if (document().isXHTMLDocument())
273 result.appendLiteral(" />");
274 else
275 result.appendLiteral(">");
276 } else {
277 SerializerMarkupAccumulator::appendElement(result, element, namespaces);
278 }
279 }
280
281 void LinkChangeSerializerMarkupAccumulator::appendAttribute(StringBuilder& resul t, const Element& element, const Attribute& attribute, Namespaces* namespaces)
282 {
283 if (!m_rewriteURLs.isEmpty() && element.isURLAttribute(attribute)) {
284
285 String completeURL = document().completeURL(attribute.value());
286
287 if (m_rewriteURLs.contains(completeURL)) {
288 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an attribute like this.
289 result.append(' ');
290 result.append(attribute.name().toString());
291 result.appendLiteral("=\"");
292 if (!m_rewriteFolder.isEmpty())
293 MarkupFormatter::appendAttributeValue(result, m_rewriteFolder + "/", document().isHTMLDocument());
294 MarkupFormatter::appendAttributeValue(result, m_rewriteURLs.get(comp leteURL), document().isHTMLDocument());
295 result.appendLiteral("\"");
296 return;
297 }
298 }
299 MarkupAccumulator::appendAttribute(result, element, attribute, namespaces);
300 }
301
302
207 PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr <Delegate> delegate) 303 PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr <Delegate> delegate)
208 : m_resources(resources) 304 : m_resources(resources)
209 , m_blankFrameCounter(0) 305 , m_blankFrameCounter(0)
210 , m_delegate(delegate) 306 , m_delegate(delegate)
211 { 307 {
212 } 308 }
213 309
214 void PageSerializer::serialize(Page* page) 310 void PageSerializer::serialize(Page* page)
215 { 311 {
216 serializeFrame(page->deprecatedLocalMainFrame()); 312 serializeFrame(page->deprecatedLocalMainFrame());
(...skipping 18 matching lines...) Expand all
235 } 331 }
236 332
237 // If frame is an image document, add the image and don't continue 333 // If frame is an image document, add the image and don't continue
238 if (document.isImageDocument()) { 334 if (document.isImageDocument()) {
239 ImageDocument& imageDocument = toImageDocument(document); 335 ImageDocument& imageDocument = toImageDocument(document);
240 addImageToResources(imageDocument.cachedImage(), imageDocument.imageElem ent()->layoutObject(), url); 336 addImageToResources(imageDocument.cachedImage(), imageDocument.imageElem ent()->layoutObject(), url);
241 return; 337 return;
242 } 338 }
243 339
244 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; 340 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes;
245 SerializerMarkupAccumulator accumulator(this, document, serializedNodes); 341 String text;
246 String text = serializeNodes<EditingStrategy>(accumulator, document, Include Node); 342 if (!m_rewriteURLs.isEmpty()) {
343 LinkChangeSerializerMarkupAccumulator accumulator(this, document, serial izedNodes, m_rewriteURLs, m_rewriteFolder);
344 text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNod e);
345 } else {
346 SerializerMarkupAccumulator accumulator(this, document, serializedNodes) ;
347 text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNod e);
348 }
349
247 CString frameHTML = document.encoding().normalizeAndEncode(text, WTF::Entiti esForUnencodables); 350 CString frameHTML = document.encoding().normalizeAndEncode(text, WTF::Entiti esForUnencodables);
248 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh aredBuffer::create(frameHTML.data(), frameHTML.length()))); 351 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh aredBuffer::create(frameHTML.data(), frameHTML.length())));
249 m_resourceURLs.add(url); 352 m_resourceURLs.add(url);
250 353
251 for (Node* node: serializedNodes) { 354 for (Node* node: serializedNodes) {
252 ASSERT(node); 355 ASSERT(node);
253 if (!node->isElementNode()) 356 if (!node->isElementNode())
254 continue; 357 continue;
255 358
256 Element& element = toElement(*node); 359 Element& element = toElement(*node);
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
441 } 544 }
442 545
443 addFontToResources(fontFaceSrcValue->fetch(&document)); 546 addFontToResources(fontFaceSrcValue->fetch(&document));
444 } else if (cssValue->isValueList()) { 547 } else if (cssValue->isValueList()) {
445 CSSValueList* cssValueList = toCSSValueList(cssValue); 548 CSSValueList* cssValueList = toCSSValueList(cssValue);
446 for (unsigned i = 0; i < cssValueList->length(); i++) 549 for (unsigned i = 0; i < cssValueList->length(); i++)
447 retrieveResourcesForCSSValue(cssValueList->item(i), document); 550 retrieveResourcesForCSSValue(cssValueList->item(i), document);
448 } 551 }
449 } 552 }
450 553
554 void PageSerializer::registerRewriteURL(const String& from, const String& to)
555 {
556 m_rewriteURLs.set(from, to);
557 }
558
559 void PageSerializer::setRewriteURLFolder(const String& rewriteFolder)
560 {
561 m_rewriteFolder = rewriteFolder;
562 }
563
451 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame) 564 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame)
452 { 565 {
453 BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame); 566 BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame);
454 if (iter != m_blankFrameURLs.end()) 567 if (iter != m_blankFrameURLs.end())
455 return iter->value; 568 return iter->value;
456 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++); 569 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++);
457 KURL fakeURL(ParsedURLString, url); 570 KURL fakeURL(ParsedURLString, url);
458 m_blankFrameURLs.add(frame, fakeURL); 571 m_blankFrameURLs.add(frame, fakeURL);
459 572
460 return fakeURL; 573 return fakeURL;
461 } 574 }
462 575
463 PageSerializer::Delegate* PageSerializer::delegate() 576 PageSerializer::Delegate* PageSerializer::delegate()
464 { 577 {
465 return m_delegate.get(); 578 return m_delegate.get();
466 } 579 }
467 580
468 } // namespace blink 581 } // namespace blink
OLDNEW
« no previous file with comments | « Source/core/page/PageSerializer.h ('k') | Source/web/tests/PageSerializerTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698