OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
92 { | 92 { |
93 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isC
harsetSpecifyingNode(element); | 93 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isC
harsetSpecifyingNode(element); |
94 } | 94 } |
95 | 95 |
96 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerEleme
nt& frameOwner) | 96 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerEleme
nt& frameOwner) |
97 { | 97 { |
98 // FIXME: We should support all frame owners including applets. | 98 // FIXME: We should support all frame owners including applets. |
99 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::sr
cAttr; | 99 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::sr
cAttr; |
100 } | 100 } |
101 | 101 |
102 class SerializerMarkupAccumulator final : public MarkupAccumulator { | 102 class SerializerMarkupAccumulator : public MarkupAccumulator { |
103 STACK_ALLOCATED(); | 103 STACK_ALLOCATED(); |
104 public: | 104 public: |
105 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect
or<RawPtrWillBeMember<Node>>&); | 105 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect
or<RawPtrWillBeMember<Node>>&); |
106 virtual ~SerializerMarkupAccumulator(); | 106 virtual ~SerializerMarkupAccumulator(); |
107 | 107 |
108 protected: | 108 protected: |
109 virtual void appendText(StringBuilder& out, Text&) override; | 109 virtual void appendText(StringBuilder& out, Text&) override; |
110 virtual bool shouldIgnoreAttribute(const Attribute&) override; | 110 virtual bool shouldIgnoreAttribute(const Attribute&) override; |
111 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) overri
de; | 111 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) overri
de; |
112 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Name
spaces*) override; | 112 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Name
spaces*) override; |
113 virtual void appendStartTag(Node&, Namespaces* = nullptr) override; | 113 virtual void appendStartTag(Node&, Namespaces* = nullptr) override; |
114 virtual void appendEndTag(const Element&) override; | 114 virtual void appendEndTag(const Element&) override; |
115 | 115 |
| 116 const Document& document(); |
| 117 |
116 private: | 118 private: |
117 PageSerializer* m_serializer; | 119 PageSerializer* m_serializer; |
118 RawPtrWillBeMember<const Document> m_document; | 120 RawPtrWillBeMember<const Document> m_document; |
119 | 121 |
120 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document | 122 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document |
121 // included into serialized text then extracts image, object, etc. The size | 123 // included into serialized text then extracts image, object, etc. The size |
122 // of this vector isn't small for large document. It is better to use | 124 // of this vector isn't small for large document. It is better to use |
123 // callback like functionality. | 125 // callback like functionality. |
124 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes; | 126 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes; |
125 }; | 127 }; |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
197 MarkupAccumulator::appendStartTag(node, namespaces); | 199 MarkupAccumulator::appendStartTag(node, namespaces); |
198 m_nodes.append(&node); | 200 m_nodes.append(&node); |
199 } | 201 } |
200 | 202 |
201 void SerializerMarkupAccumulator::appendEndTag(const Element& element) | 203 void SerializerMarkupAccumulator::appendEndTag(const Element& element) |
202 { | 204 { |
203 if (!shouldIgnoreElement(element)) | 205 if (!shouldIgnoreElement(element)) |
204 MarkupAccumulator::appendEndTag(element); | 206 MarkupAccumulator::appendEndTag(element); |
205 } | 207 } |
206 | 208 |
| 209 const Document& SerializerMarkupAccumulator::document() |
| 210 { |
| 211 return *m_document; |
| 212 } |
| 213 |
| 214 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS |
| 215 // documents which leads to bugs like <https://crbug.com/251898>. Not being |
| 216 // able to rewrite URLs inside CSS documents means that resources imported from |
| 217 // url(...) statements in CSS might not work when rewriting links for the |
| 218 // "Webpage, Complete" method of saving a page. It will take some work but it |
| 219 // needs to be done if we want to continue to support non-MHTML saved pages. |
| 220 // |
| 221 // Once that is fixed it would make sense to make link rewriting a bit more |
| 222 // general. A new method, String& rewriteURL(String&) or similar, could be added |
| 223 // to PageSerializer.Delegate that would allow clients to control this. Some of |
| 224 // the change link logic could be moved back to WebPageSerializer. |
| 225 // |
| 226 // The remaining code in LinkChangeSerializerMarkupAccumulator could probably |
| 227 // be merged back into SerializerMarkupAccumulator with additional methods in |
| 228 // PageSerializer.Delegate to control MOTW and Base tag rewrite. |
| 229 class LinkChangeSerializerMarkupAccumulator final : public SerializerMarkupAccum
ulator { |
| 230 STACK_ALLOCATED(); |
| 231 public: |
| 232 LinkChangeSerializerMarkupAccumulator(PageSerializer*, const Document&, Will
BeHeapVector<RawPtrWillBeMember<Node>>&, HashMap<String, String>& rewriteURLs, c
onst String& rewriteFolder); |
| 233 |
| 234 private: |
| 235 void appendElement(StringBuilder&, Element&, Namespaces*) override; |
| 236 void appendAttribute(StringBuilder&, const Element&, const Attribute&, Names
paces*) override; |
| 237 |
| 238 // m_rewriteURLs include all pairs of local resource paths and corresponding
original links. |
| 239 HashMap<String, String> m_rewriteURLs; |
| 240 String m_rewriteFolder; |
| 241 }; |
| 242 |
| 243 LinkChangeSerializerMarkupAccumulator::LinkChangeSerializerMarkupAccumulator(Pag
eSerializer* serializer, const Document& document, WillBeHeapVector<RawPtrWillBe
Member<Node>>& nodes, HashMap<String, String>& rewriteURLs, const String& rewrit
eFolder) |
| 244 : SerializerMarkupAccumulator(serializer, document, nodes) |
| 245 , m_rewriteURLs(rewriteURLs) |
| 246 , m_rewriteFolder(rewriteFolder) |
| 247 { |
| 248 } |
| 249 |
| 250 void LinkChangeSerializerMarkupAccumulator::appendElement(StringBuilder& result,
Element& element, Namespaces* namespaces) |
| 251 { |
| 252 if (element.hasTagName(HTMLNames::htmlTag)) { |
| 253 // Add MOTW (Mark of the Web) declaration before html tag. |
| 254 // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx. |
| 255 result.append('\n'); |
| 256 MarkupFormatter::appendComment(result, String::format(" saved from url=(
%04d)%s ", |
| 257 static_cast<int>(document().url().string().utf8().length()), |
| 258 document().url().string().utf8().data())); |
| 259 result.append('\n'); |
| 260 } |
| 261 |
| 262 if (element.hasTagName(HTMLNames::baseTag)) { |
| 263 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an
element like this, without special cases for XHTML |
| 264 // Append a new base tag declaration. |
| 265 result.appendLiteral("<base href=\".\""); |
| 266 if (!document().baseTarget().isEmpty()) { |
| 267 result.appendLiteral(" target=\""); |
| 268 MarkupFormatter::appendAttributeValue(result, document().baseTarget(
), document().isHTMLDocument()); |
| 269 result.append('"'); |
| 270 } |
| 271 if (document().isXHTMLDocument()) |
| 272 result.appendLiteral(" />"); |
| 273 else |
| 274 result.appendLiteral(">"); |
| 275 } else { |
| 276 SerializerMarkupAccumulator::appendElement(result, element, namespaces); |
| 277 } |
| 278 } |
| 279 |
| 280 void LinkChangeSerializerMarkupAccumulator::appendAttribute(StringBuilder& resul
t, const Element& element, const Attribute& attribute, Namespaces* namespaces) |
| 281 { |
| 282 if (!m_rewriteURLs.isEmpty() && element.isURLAttribute(attribute)) { |
| 283 |
| 284 String completeURL = document().completeURL(attribute.value()); |
| 285 |
| 286 if (m_rewriteURLs.contains(completeURL)) { |
| 287 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append
an attribute like this. |
| 288 result.append(' '); |
| 289 result.append(attribute.name().toString()); |
| 290 result.appendLiteral("=\""); |
| 291 if (!m_rewriteFolder.isEmpty()) |
| 292 MarkupFormatter::appendAttributeValue(result, m_rewriteFolder +
"/", document().isHTMLDocument()); |
| 293 MarkupFormatter::appendAttributeValue(result, m_rewriteURLs.get(comp
leteURL), document().isHTMLDocument()); |
| 294 result.appendLiteral("\""); |
| 295 return; |
| 296 } |
| 297 } |
| 298 MarkupAccumulator::appendAttribute(result, element, attribute, namespaces); |
| 299 } |
| 300 |
| 301 |
207 PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr
<Delegate> delegate) | 302 PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr
<Delegate> delegate) |
208 : m_resources(resources) | 303 : m_resources(resources) |
209 , m_blankFrameCounter(0) | 304 , m_blankFrameCounter(0) |
210 , m_delegate(delegate) | 305 , m_delegate(delegate) |
211 { | 306 { |
212 } | 307 } |
213 | 308 |
214 void PageSerializer::serialize(Page* page) | 309 void PageSerializer::serialize(Page* page) |
215 { | 310 { |
216 serializeFrame(page->deprecatedLocalMainFrame()); | 311 serializeFrame(page->deprecatedLocalMainFrame()); |
(...skipping 18 matching lines...) Expand all Loading... |
235 } | 330 } |
236 | 331 |
237 // If frame is an image document, add the image and don't continue | 332 // If frame is an image document, add the image and don't continue |
238 if (document.isImageDocument()) { | 333 if (document.isImageDocument()) { |
239 ImageDocument& imageDocument = toImageDocument(document); | 334 ImageDocument& imageDocument = toImageDocument(document); |
240 addImageToResources(imageDocument.cachedImage(), imageDocument.imageElem
ent()->layoutObject(), url); | 335 addImageToResources(imageDocument.cachedImage(), imageDocument.imageElem
ent()->layoutObject(), url); |
241 return; | 336 return; |
242 } | 337 } |
243 | 338 |
244 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; | 339 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; |
245 SerializerMarkupAccumulator accumulator(this, document, serializedNodes); | 340 String text; |
246 String text = serializeNodes<EditingStrategy>(accumulator, document, Include
Node); | 341 if (!m_rewriteURLs.isEmpty()) { |
| 342 LinkChangeSerializerMarkupAccumulator accumulator(this, document, serial
izedNodes, m_rewriteURLs, m_rewriteFolder); |
| 343 text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNod
e); |
| 344 } else { |
| 345 SerializerMarkupAccumulator accumulator(this, document, serializedNodes)
; |
| 346 text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNod
e); |
| 347 } |
| 348 |
247 CString frameHTML = document.encoding().normalizeAndEncode(text, WTF::Entiti
esForUnencodables); | 349 CString frameHTML = document.encoding().normalizeAndEncode(text, WTF::Entiti
esForUnencodables); |
248 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh
aredBuffer::create(frameHTML.data(), frameHTML.length()))); | 350 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh
aredBuffer::create(frameHTML.data(), frameHTML.length()))); |
249 m_resourceURLs.add(url); | 351 m_resourceURLs.add(url); |
250 | 352 |
251 for (Node* node: serializedNodes) { | 353 for (Node* node: serializedNodes) { |
252 ASSERT(node); | 354 ASSERT(node); |
253 if (!node->isElementNode()) | 355 if (!node->isElementNode()) |
254 continue; | 356 continue; |
255 | 357 |
256 Element& element = toElement(*node); | 358 Element& element = toElement(*node); |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
441 } | 543 } |
442 | 544 |
443 addFontToResources(fontFaceSrcValue->fetch(&document)); | 545 addFontToResources(fontFaceSrcValue->fetch(&document)); |
444 } else if (cssValue->isValueList()) { | 546 } else if (cssValue->isValueList()) { |
445 CSSValueList* cssValueList = toCSSValueList(cssValue); | 547 CSSValueList* cssValueList = toCSSValueList(cssValue); |
446 for (unsigned i = 0; i < cssValueList->length(); i++) | 548 for (unsigned i = 0; i < cssValueList->length(); i++) |
447 retrieveResourcesForCSSValue(cssValueList->item(i), document); | 549 retrieveResourcesForCSSValue(cssValueList->item(i), document); |
448 } | 550 } |
449 } | 551 } |
450 | 552 |
| 553 void PageSerializer::registerRewriteURL(const String& from, const String& to) |
| 554 { |
| 555 m_rewriteURLs.set(from, to); |
| 556 } |
| 557 |
| 558 void PageSerializer::setRewriteURLFolder(const String& rewriteFolder) |
| 559 { |
| 560 m_rewriteFolder = rewriteFolder; |
| 561 } |
| 562 |
451 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame) | 563 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame) |
452 { | 564 { |
453 BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame); | 565 BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame); |
454 if (iter != m_blankFrameURLs.end()) | 566 if (iter != m_blankFrameURLs.end()) |
455 return iter->value; | 567 return iter->value; |
456 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++); | 568 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++); |
457 KURL fakeURL(ParsedURLString, url); | 569 KURL fakeURL(ParsedURLString, url); |
458 m_blankFrameURLs.add(frame, fakeURL); | 570 m_blankFrameURLs.add(frame, fakeURL); |
459 | 571 |
460 return fakeURL; | 572 return fakeURL; |
461 } | 573 } |
462 | 574 |
463 PageSerializer::Delegate* PageSerializer::delegate() | 575 PageSerializer::Delegate* PageSerializer::delegate() |
464 { | 576 { |
465 return m_delegate.get(); | 577 return m_delegate.get(); |
466 } | 578 } |
467 | 579 |
468 } // namespace blink | 580 } // namespace blink |
OLD | NEW |