Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(184)

Side by Side Diff: Source/core/page/PageSerializer.cpp

Issue 1177733002: Merge page serializers [11/12] (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Rebase + issues Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved. 2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
80 HTMLAttributeList attributeList; 80 HTMLAttributeList attributeList;
81 AttributeCollection attributes = element.attributes(); 81 AttributeCollection attributes = element.attributes();
82 for (const Attribute& attr: attributes) { 82 for (const Attribute& attr: attributes) {
83 // FIXME: We should deal appropriately with the attribute if they have a namespace. 83 // FIXME: We should deal appropriately with the attribute if they have a namespace.
84 attributeList.append(std::make_pair(attr.name().localName(), attr.value( ).string())); 84 attributeList.append(std::make_pair(attr.name().localName(), attr.value( ).string()));
85 } 85 }
86 WTF::TextEncoding textEncoding = encodingFromMetaAttributes(attributeList); 86 WTF::TextEncoding textEncoding = encodingFromMetaAttributes(attributeList);
87 return textEncoding.isValid(); 87 return textEncoding.isValid();
88 } 88 }
89 89
90 static bool shouldIgnoreElement(const Element& element)
91 {
92 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isC harsetSpecifyingNode(element);
93 }
94
95 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerEleme nt& frameOwner) 90 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerEleme nt& frameOwner)
96 { 91 {
97 // FIXME: We should support all frame owners including applets. 92 // FIXME: We should support all frame owners including applets.
98 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::sr cAttr; 93 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::sr cAttr;
99 } 94 }
100 95
101 class SerializerMarkupAccumulator final : public MarkupAccumulator { 96 class SerializerMarkupAccumulator : public MarkupAccumulator {
102 STACK_ALLOCATED(); 97 STACK_ALLOCATED();
103 public: 98 public:
104 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect or<RawPtrWillBeMember<Node>>&); 99 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect or<RawPtrWillBeMember<Node>>&);
105 virtual ~SerializerMarkupAccumulator(); 100 virtual ~SerializerMarkupAccumulator();
106 101
107 protected: 102 protected:
108 virtual void appendText(StringBuilder& out, Text&) override; 103 virtual void appendText(StringBuilder& out, Text&) override;
109 virtual bool shouldIgnoreAttribute(const Attribute&) override; 104 virtual bool shouldIgnoreAttribute(const Attribute&) override;
110 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) overri de; 105 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) overri de;
111 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Name spaces*) override; 106 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Name spaces*) override;
112 virtual void appendStartTag(Node&, Namespaces* = nullptr) override; 107 virtual void appendStartTag(Node&, Namespaces* = nullptr) override;
113 virtual void appendEndTag(const Element&) override; 108 virtual void appendEndTag(const Element&) override;
114 109
110 virtual bool shouldIgnoreElement(const Element&) const;
111
112 PageSerializer* pageSerializer();
philipj_slow 2015/06/26 09:07:55 I can't see this actually used in the patch... me
Tiger (Sony Mobile) 2015/06/30 11:31:52 And now, neither can I. Removing.
113 const Document& document();
114
115 private: 115 private:
116 PageSerializer* m_serializer; 116 PageSerializer* m_serializer;
117 RawPtrWillBeMember<const Document> m_document; 117 RawPtrWillBeMember<const Document> m_document;
118 118
119 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document 119 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document
120 // included into serialized text then extracts image, object, etc. The size 120 // included into serialized text then extracts image, object, etc. The size
121 // of this vector isn't small for large document. It is better to use 121 // of this vector isn't small for large document. It is better to use
122 // callback like functionality. 122 // callback like functionality.
123 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes; 123 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes;
124 }; 124 };
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
196 MarkupAccumulator::appendStartTag(node, namespaces); 196 MarkupAccumulator::appendStartTag(node, namespaces);
197 m_nodes.append(&node); 197 m_nodes.append(&node);
198 } 198 }
199 199
200 void SerializerMarkupAccumulator::appendEndTag(const Element& element) 200 void SerializerMarkupAccumulator::appendEndTag(const Element& element)
201 { 201 {
202 if (!shouldIgnoreElement(element)) 202 if (!shouldIgnoreElement(element))
203 MarkupAccumulator::appendEndTag(element); 203 MarkupAccumulator::appendEndTag(element);
204 } 204 }
205 205
206 bool SerializerMarkupAccumulator::shouldIgnoreElement(const Element& element) co nst
207 {
208 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isC harsetSpecifyingNode(element);
209 }
210
211 PageSerializer* SerializerMarkupAccumulator::pageSerializer()
212 {
213 return m_serializer;
214 }
215
216 const Document& SerializerMarkupAccumulator::document()
217 {
218 return *m_document;
219 }
220
221 class LinkChangeSerializerMarkupAccumulator final : public SerializerMarkupAccum ulator {
philipj_slow 2015/06/26 09:07:55 Should this be STACK_ALLOCATED()?
Tiger (Sony Mobile) 2015/06/30 11:31:53 Probably, yes. Adding.
222 public:
223 LinkChangeSerializerMarkupAccumulator(PageSerializer*, const Document&, Will BeHeapVector<RawPtrWillBeMember<Node>>&, HashMap<String, String>& rewriteURLs, c onst String& rewriteFolder);
224
225 private:
226 void appendElement(StringBuilder&, Element&, Namespaces*) override;
227 void appendAttribute(StringBuilder&, const Element&, const Attribute&, Names paces*) override;
228
229 bool shouldIgnoreElement(const Element&) const override;
230
231 // m_rewriteURLs include all pair of local resource path and corresponding o riginal link.
philipj_slow 2015/06/26 09:07:56 Plural pairs, paths and links.
Tiger (Sony Mobile) 2015/06/30 11:31:52 Done.
232 HashMap<String, String> m_rewriteURLs;
233 String m_rewriteFolder;
234 };
235
236 LinkChangeSerializerMarkupAccumulator::LinkChangeSerializerMarkupAccumulator(Pag eSerializer* serializer, const Document& document, WillBeHeapVector<RawPtrWillB eMember<Node>>& nodes, HashMap<String, String>& rewriteURLs, const String& rewri teFolder)
philipj_slow 2015/06/26 09:07:55 Double space on this line.
Tiger (Sony Mobile) 2015/06/30 11:31:53 Done.
237 : SerializerMarkupAccumulator(serializer, document, nodes)
238 , m_rewriteURLs(rewriteURLs)
239 , m_rewriteFolder(rewriteFolder)
240 {
241 }
242
243 void LinkChangeSerializerMarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces)
244 {
245 if (element.hasTagName(HTMLNames::htmlTag)) {
246 // Add MOTW (Mark of the Web) declaration before html tag.
247 // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
philipj_slow 2015/06/26 09:07:56 This format is wonderfully strange :)
Tiger (Sony Mobile) 2015/06/30 11:31:52 It is :)
248 result.append('\n');
249 MarkupFormatter::appendComment(result, String::format(" saved from url=( %04d)%s ",
250 static_cast<int>(document().url().string().utf8().length()),
philipj_slow 2015/06/26 09:07:56 Can you use %u or whatever the type is to avoid th
Tiger (Sony Mobile) 2015/06/30 11:31:53 Is there a format like PRIuS from format_macros.h
philipj_slow 2015/06/30 11:56:27 I guess the assumption is that the URL will be 7-b
Tiger (Sony Mobile) 2015/06/30 12:59:02 That sounds more reasonable yes, and it seems that
251 document().url().string().utf8().data()));
252 result.append('\n');
253 }
254
255 SerializerMarkupAccumulator::appendElement(result, element, namespaces);
256
257 if (element.hasTagName(HTMLNames::baseTag)) {
258 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an element like this, without special cases for XHTML
259 // Append a new base tag declaration.
philipj_slow 2015/06/26 09:07:56 I'm not sure I understand this. Is the main thing
Tiger (Sony Mobile) 2015/06/30 11:31:53 The main thing is to change the URL. This could be
philipj_slow 2015/06/30 11:56:27 The original code for this is WebPageSerializerImp
Tiger (Sony Mobile) 2015/06/30 12:59:02 I think it would make sense, and it would work for
Tiger (Sony Mobile) 2015/07/01 12:14:46 I've added a small TODO section about this.
260 result.appendLiteral("<base href=\".\"");
261 if (!document().baseTarget().isEmpty()) {
262 result.appendLiteral(" target=\"");
263 MarkupFormatter::appendAttributeValue(result, document().baseTarget( ), document().isHTMLDocument());
264 result.append('"');
265 }
266 if (document().isXHTMLDocument())
267 result.appendLiteral(" />");
268 else
269 result.appendLiteral(">");
270 }
271 }
272
273 void LinkChangeSerializerMarkupAccumulator::appendAttribute(StringBuilder& resul t, const Element& element, const Attribute& attribute, Namespaces* namespaces)
274 {
275 if (!m_rewriteURLs.isEmpty() && element.isURLAttribute(attribute) && !elemen t.isJavaScriptURLAttribute(attribute)) {
philipj_slow 2015/06/26 09:07:56 Does the javascript: case matter here? There's no
Tiger (Sony Mobile) 2015/06/30 11:31:52 It doesn't really matter no, it was just added to
philipj_slow 2015/06/30 11:56:27 Forgot to remove it, or do you mean to do it later
Tiger (Sony Mobile) 2015/06/30 12:59:02 Removing now.
276
277 String completeURL = document().completeURL(attribute.value());
278
279 if (m_rewriteURLs.contains(completeURL)) {
280 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an attribute like this.
philipj_slow 2015/06/26 09:07:55 Do you mean to refactor this so that one can rewri
Tiger (Sony Mobile) 2015/06/30 11:31:53 Exactly so.
281 result.append(' ');
282 result.append(attribute.name().toString());
283 result.appendLiteral("=\"");
284 if (!m_rewriteFolder.isEmpty()) {
philipj_slow 2015/06/26 09:07:56 Don't need the {} here.
Tiger (Sony Mobile) 2015/06/30 11:31:52 Done.
285 MarkupFormatter::appendAttributeValue(result, m_rewriteFolder + "/", document().isHTMLDocument());
286 }
287 MarkupFormatter::appendAttributeValue(result, m_rewriteURLs.get(comp leteURL), document().isHTMLDocument());
288 result.appendLiteral("\"");
289 return;
290 }
291 }
292 MarkupAccumulator::appendAttribute(result, element, attribute, namespaces);
293 }
294
295 bool LinkChangeSerializerMarkupAccumulator::shouldIgnoreElement(const Element& e lement) const
296 {
297 return SerializerMarkupAccumulator::shouldIgnoreElement(element) || isHTMLBa seElement(element);
philipj_slow 2015/06/26 09:07:56 Instead of overriding this, can't LinkChangeSerial
Tiger (Sony Mobile) 2015/06/30 11:31:52 True! Fixing.
philipj_slow 2015/06/30 11:56:27 With that, I think you can also undo the other cha
Tiger (Sony Mobile) 2015/06/30 12:59:02 Done.
298 }
299
300
206 PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr <Delegate> delegate) 301 PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr <Delegate> delegate)
207 : m_resources(resources) 302 : m_resources(resources)
208 , m_blankFrameCounter(0) 303 , m_blankFrameCounter(0)
209 , m_delegate(delegate) 304 , m_delegate(delegate)
210 { 305 {
211 } 306 }
212 307
213 void PageSerializer::serialize(Page* page) 308 void PageSerializer::serialize(Page* page)
214 { 309 {
215 serializeFrame(page->deprecatedLocalMainFrame()); 310 serializeFrame(page->deprecatedLocalMainFrame());
(...skipping 18 matching lines...) Expand all
234 } 329 }
235 330
236 // If frame is an image document, add the image and don't continue 331 // If frame is an image document, add the image and don't continue
237 if (document.isImageDocument()) { 332 if (document.isImageDocument()) {
238 ImageDocument& imageDocument = toImageDocument(document); 333 ImageDocument& imageDocument = toImageDocument(document);
239 addImageToResources(imageDocument.cachedImage(), imageDocument.imageElem ent()->layoutObject(), url); 334 addImageToResources(imageDocument.cachedImage(), imageDocument.imageElem ent()->layoutObject(), url);
240 return; 335 return;
241 } 336 }
242 337
243 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; 338 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes;
244 SerializerMarkupAccumulator accumulator(this, document, serializedNodes); 339 String text;
245 String text = serializeNodes<EditingStrategy>(accumulator, document, Include Node); 340 if (!m_rewriteURLs.isEmpty()) {
246 CString frameHTML = document.encoding().normalizeAndEncode(text, WTF::Entiti esForUnencodables); 341 LinkChangeSerializerMarkupAccumulator accumulator(this, document, serial izedNodes, m_rewriteURLs, m_rewriteFolder);
342 text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNod e);
343 } else {
344 SerializerMarkupAccumulator accumulator(this, document, serializedNodes) ;
345 text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNod e);
346 }
347
348 WTF::TextEncoding textEncoding(document.charset());
philipj_slow 2015/06/26 09:07:55 document.encoding() is already a WTF::TextEncoding
Tiger (Sony Mobile) 2015/06/30 11:31:52 Done.
349 CString frameHTML = textEncoding.normalizeAndEncode(text, WTF::EntitiesForUn encodables);
247 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh aredBuffer::create(frameHTML.data(), frameHTML.length()))); 350 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh aredBuffer::create(frameHTML.data(), frameHTML.length())));
248 m_resourceURLs.add(url); 351 m_resourceURLs.add(url);
249 352
250 for (Node* node: serializedNodes) { 353 for (Node* node: serializedNodes) {
251 ASSERT(node); 354 ASSERT(node);
252 if (!node->isElementNode()) 355 if (!node->isElementNode())
253 continue; 356 continue;
254 357
255 Element& element = toElement(*node); 358 Element& element = toElement(*node);
256 // We have to process in-line style as it might contain some resources ( typically background images). 359 // We have to process in-line style as it might contain some resources ( typically background images).
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
405 } 508 }
406 509
407 addFontToResources(fontFaceSrcValue->fetch(&document)); 510 addFontToResources(fontFaceSrcValue->fetch(&document));
408 } else if (cssValue->isValueList()) { 511 } else if (cssValue->isValueList()) {
409 CSSValueList* cssValueList = toCSSValueList(cssValue); 512 CSSValueList* cssValueList = toCSSValueList(cssValue);
410 for (unsigned i = 0; i < cssValueList->length(); i++) 513 for (unsigned i = 0; i < cssValueList->length(); i++)
411 retrieveResourcesForCSSValue(cssValueList->item(i), document); 514 retrieveResourcesForCSSValue(cssValueList->item(i), document);
412 } 515 }
413 } 516 }
414 517
518 void PageSerializer::registerRewriteURL(const String& from, const String& to)
519 {
520 m_rewriteURLs.set(from, to);
521 }
522
523 void PageSerializer::setRewriteURLFolder(const String& rewriteFolder)
524 {
525 m_rewriteFolder = rewriteFolder;
526 }
527
415 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame) 528 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame)
416 { 529 {
417 BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame); 530 BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame);
418 if (iter != m_blankFrameURLs.end()) 531 if (iter != m_blankFrameURLs.end())
419 return iter->value; 532 return iter->value;
420 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++); 533 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++);
421 KURL fakeURL(ParsedURLString, url); 534 KURL fakeURL(ParsedURLString, url);
422 m_blankFrameURLs.add(frame, fakeURL); 535 m_blankFrameURLs.add(frame, fakeURL);
423 536
424 return fakeURL; 537 return fakeURL;
425 } 538 }
426 539
427 PageSerializer::Delegate* PageSerializer::delegate() 540 PageSerializer::Delegate* PageSerializer::delegate()
428 { 541 {
429 return m_delegate.get(); 542 return m_delegate.get();
430 } 543 }
431 544
432 } // namespace blink 545 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698