OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
92 { | 92 { |
93 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isC harsetSpecifyingNode(element); | 93 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isC harsetSpecifyingNode(element); |
94 } | 94 } |
95 | 95 |
96 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerEleme nt& frameOwner) | 96 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerEleme nt& frameOwner) |
97 { | 97 { |
98 // FIXME: We should support all frame owners including applets. | 98 // FIXME: We should support all frame owners including applets. |
99 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::sr cAttr; | 99 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::sr cAttr; |
100 } | 100 } |
101 | 101 |
102 class SerializerMarkupAccumulator final : public MarkupAccumulator { | 102 class SerializerMarkupAccumulator : public MarkupAccumulator { |
103 STACK_ALLOCATED(); | 103 STACK_ALLOCATED(); |
104 public: | 104 public: |
105 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect or<RawPtrWillBeMember<Node>>&); | 105 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect or<RawPtrWillBeMember<Node>>&); |
106 virtual ~SerializerMarkupAccumulator(); | 106 virtual ~SerializerMarkupAccumulator(); |
107 | 107 |
108 protected: | 108 protected: |
109 virtual void appendText(StringBuilder& out, Text&) override; | 109 virtual void appendText(StringBuilder& out, Text&) override; |
110 virtual bool shouldIgnoreAttribute(const Attribute&) override; | 110 virtual bool shouldIgnoreAttribute(const Attribute&) override; |
111 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) overri de; | 111 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) overri de; |
112 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Name spaces*) override; | 112 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Name spaces*) override; |
113 virtual void appendStartTag(Node&, Namespaces* = nullptr) override; | 113 virtual void appendStartTag(Node&, Namespaces* = nullptr) override; |
114 virtual void appendEndTag(const Element&) override; | 114 virtual void appendEndTag(const Element&) override; |
115 | 115 |
116 const Document& document(); | |
117 | |
116 private: | 118 private: |
117 PageSerializer* m_serializer; | 119 PageSerializer* m_serializer; |
118 RawPtrWillBeMember<const Document> m_document; | 120 RawPtrWillBeMember<const Document> m_document; |
119 | 121 |
120 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document | 122 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document |
121 // included into serialized text then extracts image, object, etc. The size | 123 // included into serialized text then extracts image, object, etc. The size |
122 // of this vector isn't small for large document. It is better to use | 124 // of this vector isn't small for large document. It is better to use |
123 // callback like functionality. | 125 // callback like functionality. |
124 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes; | 126 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes; |
125 }; | 127 }; |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
197 MarkupAccumulator::appendStartTag(node, namespaces); | 199 MarkupAccumulator::appendStartTag(node, namespaces); |
198 m_nodes.append(&node); | 200 m_nodes.append(&node); |
199 } | 201 } |
200 | 202 |
201 void SerializerMarkupAccumulator::appendEndTag(const Element& element) | 203 void SerializerMarkupAccumulator::appendEndTag(const Element& element) |
202 { | 204 { |
203 if (!shouldIgnoreElement(element)) | 205 if (!shouldIgnoreElement(element)) |
204 MarkupAccumulator::appendEndTag(element); | 206 MarkupAccumulator::appendEndTag(element); |
205 } | 207 } |
206 | 208 |
209 const Document& SerializerMarkupAccumulator::document() | |
210 { | |
211 return *m_document; | |
212 } | |
213 | |
214 /* TODO(tiger): Right now there is no support for rewriting URLs inside CSS | |
philipj_slow
2015/07/01 12:26:12
http://google-styleguide.googlecode.com/svn/trunk/
| |
215 * documents which leads to bugs like <https://crbug.com/251898>. Not being | |
216 * able to rewrite URLs inside CSS documents means that resources imported from | |
217 * url(...) statements in CSS might not work when rewriting links for the | |
218 * "Webpage, Complete" method of saving a page. It will take some work but it | |
219 * needs to be done if we want to continue to support non-MHTML saved pages. | |
220 * | |
221 * Once that is fixed it would make sense to make link rewriting a bit more | |
222 * general. A new method, String& rewriteURL(String&) or similar, could be added | |
223 * to PageSerializer.Delegate that would allow clients to control this. Some of | |
224 * the change link logic could be moved back to WebPageSerializer. | |
225 * | |
226 * The remaining code in LinkChangeSerializerMarkupAccumulator could probably | |
227 * be merged back into SerializerMarkupAccumulator with additional methods in | |
228 * PageSerializer.Delegate to control MOTW and Base tag rewrite. | |
229 */ | |
230 class LinkChangeSerializerMarkupAccumulator final : public SerializerMarkupAccum ulator { | |
231 STACK_ALLOCATED(); | |
232 public: | |
233 LinkChangeSerializerMarkupAccumulator(PageSerializer*, const Document&, Will BeHeapVector<RawPtrWillBeMember<Node>>&, HashMap<String, String>& rewriteURLs, c onst String& rewriteFolder); | |
234 | |
235 private: | |
236 void appendElement(StringBuilder&, Element&, Namespaces*) override; | |
237 void appendAttribute(StringBuilder&, const Element&, const Attribute&, Names paces*) override; | |
238 | |
239 // m_rewriteURLs include all pairs of local resource paths and corresponding original links. | |
240 HashMap<String, String> m_rewriteURLs; | |
241 String m_rewriteFolder; | |
242 }; | |
243 | |
244 LinkChangeSerializerMarkupAccumulator::LinkChangeSerializerMarkupAccumulator(Pag eSerializer* serializer, const Document& document, WillBeHeapVector<RawPtrWillBe Member<Node>>& nodes, HashMap<String, String>& rewriteURLs, const String& rewrit eFolder) | |
245 : SerializerMarkupAccumulator(serializer, document, nodes) | |
246 , m_rewriteURLs(rewriteURLs) | |
247 , m_rewriteFolder(rewriteFolder) | |
248 { | |
249 } | |
250 | |
251 void LinkChangeSerializerMarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces) | |
252 { | |
253 if (element.hasTagName(HTMLNames::htmlTag)) { | |
254 // Add MOTW (Mark of the Web) declaration before html tag. | |
255 // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx. | |
256 result.append('\n'); | |
257 MarkupFormatter::appendComment(result, String::format(" saved from url=( %04d)%s ", | |
258 static_cast<int>(document().url().string().utf8().length()), | |
259 document().url().string().utf8().data())); | |
260 result.append('\n'); | |
261 } | |
262 | |
263 if (element.hasTagName(HTMLNames::baseTag)) { | |
264 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an element like this, without special cases for XHTML | |
265 // Append a new base tag declaration. | |
266 result.appendLiteral("<base href=\".\""); | |
267 if (!document().baseTarget().isEmpty()) { | |
268 result.appendLiteral(" target=\""); | |
269 MarkupFormatter::appendAttributeValue(result, document().baseTarget( ), document().isHTMLDocument()); | |
270 result.append('"'); | |
271 } | |
272 if (document().isXHTMLDocument()) | |
273 result.appendLiteral(" />"); | |
274 else | |
275 result.appendLiteral(">"); | |
276 } else { | |
277 SerializerMarkupAccumulator::appendElement(result, element, namespaces); | |
278 } | |
279 } | |
280 | |
281 void LinkChangeSerializerMarkupAccumulator::appendAttribute(StringBuilder& resul t, const Element& element, const Attribute& attribute, Namespaces* namespaces) | |
282 { | |
283 if (!m_rewriteURLs.isEmpty() && element.isURLAttribute(attribute)) { | |
284 | |
285 String completeURL = document().completeURL(attribute.value()); | |
286 | |
287 if (m_rewriteURLs.contains(completeURL)) { | |
288 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an attribute like this. | |
289 result.append(' '); | |
290 result.append(attribute.name().toString()); | |
291 result.appendLiteral("=\""); | |
292 if (!m_rewriteFolder.isEmpty()) | |
293 MarkupFormatter::appendAttributeValue(result, m_rewriteFolder + "/", document().isHTMLDocument()); | |
294 MarkupFormatter::appendAttributeValue(result, m_rewriteURLs.get(comp leteURL), document().isHTMLDocument()); | |
295 result.appendLiteral("\""); | |
296 return; | |
297 } | |
298 } | |
299 MarkupAccumulator::appendAttribute(result, element, attribute, namespaces); | |
300 } | |
301 | |
302 | |
207 PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr <Delegate> delegate) | 303 PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr <Delegate> delegate) |
208 : m_resources(resources) | 304 : m_resources(resources) |
209 , m_blankFrameCounter(0) | 305 , m_blankFrameCounter(0) |
210 , m_delegate(delegate) | 306 , m_delegate(delegate) |
211 { | 307 { |
212 } | 308 } |
213 | 309 |
214 void PageSerializer::serialize(Page* page) | 310 void PageSerializer::serialize(Page* page) |
215 { | 311 { |
216 serializeFrame(page->deprecatedLocalMainFrame()); | 312 serializeFrame(page->deprecatedLocalMainFrame()); |
(...skipping 18 matching lines...) Expand all Loading... | |
235 } | 331 } |
236 | 332 |
237 // If frame is an image document, add the image and don't continue | 333 // If frame is an image document, add the image and don't continue |
238 if (document.isImageDocument()) { | 334 if (document.isImageDocument()) { |
239 ImageDocument& imageDocument = toImageDocument(document); | 335 ImageDocument& imageDocument = toImageDocument(document); |
240 addImageToResources(imageDocument.cachedImage(), imageDocument.imageElem ent()->layoutObject(), url); | 336 addImageToResources(imageDocument.cachedImage(), imageDocument.imageElem ent()->layoutObject(), url); |
241 return; | 337 return; |
242 } | 338 } |
243 | 339 |
244 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; | 340 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; |
245 SerializerMarkupAccumulator accumulator(this, document, serializedNodes); | 341 String text; |
246 String text = serializeNodes<EditingStrategy>(accumulator, document, Include Node); | 342 if (!m_rewriteURLs.isEmpty()) { |
343 LinkChangeSerializerMarkupAccumulator accumulator(this, document, serial izedNodes, m_rewriteURLs, m_rewriteFolder); | |
344 text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNod e); | |
345 } else { | |
346 SerializerMarkupAccumulator accumulator(this, document, serializedNodes) ; | |
347 text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNod e); | |
348 } | |
349 | |
247 CString frameHTML = document.encoding().normalizeAndEncode(text, WTF::Entiti esForUnencodables); | 350 CString frameHTML = document.encoding().normalizeAndEncode(text, WTF::Entiti esForUnencodables); |
248 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh aredBuffer::create(frameHTML.data(), frameHTML.length()))); | 351 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh aredBuffer::create(frameHTML.data(), frameHTML.length()))); |
249 m_resourceURLs.add(url); | 352 m_resourceURLs.add(url); |
250 | 353 |
251 for (Node* node: serializedNodes) { | 354 for (Node* node: serializedNodes) { |
252 ASSERT(node); | 355 ASSERT(node); |
253 if (!node->isElementNode()) | 356 if (!node->isElementNode()) |
254 continue; | 357 continue; |
255 | 358 |
256 Element& element = toElement(*node); | 359 Element& element = toElement(*node); |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
441 } | 544 } |
442 | 545 |
443 addFontToResources(fontFaceSrcValue->fetch(&document)); | 546 addFontToResources(fontFaceSrcValue->fetch(&document)); |
444 } else if (cssValue->isValueList()) { | 547 } else if (cssValue->isValueList()) { |
445 CSSValueList* cssValueList = toCSSValueList(cssValue); | 548 CSSValueList* cssValueList = toCSSValueList(cssValue); |
446 for (unsigned i = 0; i < cssValueList->length(); i++) | 549 for (unsigned i = 0; i < cssValueList->length(); i++) |
447 retrieveResourcesForCSSValue(cssValueList->item(i), document); | 550 retrieveResourcesForCSSValue(cssValueList->item(i), document); |
448 } | 551 } |
449 } | 552 } |
450 | 553 |
554 void PageSerializer::registerRewriteURL(const String& from, const String& to) | |
555 { | |
556 m_rewriteURLs.set(from, to); | |
557 } | |
558 | |
559 void PageSerializer::setRewriteURLFolder(const String& rewriteFolder) | |
560 { | |
561 m_rewriteFolder = rewriteFolder; | |
562 } | |
563 | |
451 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame) | 564 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame) |
452 { | 565 { |
453 BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame); | 566 BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame); |
454 if (iter != m_blankFrameURLs.end()) | 567 if (iter != m_blankFrameURLs.end()) |
455 return iter->value; | 568 return iter->value; |
456 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++); | 569 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++); |
457 KURL fakeURL(ParsedURLString, url); | 570 KURL fakeURL(ParsedURLString, url); |
458 m_blankFrameURLs.add(frame, fakeURL); | 571 m_blankFrameURLs.add(frame, fakeURL); |
459 | 572 |
460 return fakeURL; | 573 return fakeURL; |
461 } | 574 } |
462 | 575 |
463 PageSerializer::Delegate* PageSerializer::delegate() | 576 PageSerializer::Delegate* PageSerializer::delegate() |
464 { | 577 { |
465 return m_delegate.get(); | 578 return m_delegate.get(); |
466 } | 579 } |
467 | 580 |
468 } // namespace blink | 581 } // namespace blink |
OLD | NEW |