OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
77 if (isHTMLScriptElement(element)) | 77 if (isHTMLScriptElement(element)) |
78 return true; | 78 return true; |
79 if (isHTMLNoScriptElement(element)) | 79 if (isHTMLNoScriptElement(element)) |
80 return true; | 80 return true; |
81 return isHTMLMetaElement(element) && toHTMLMetaElement(element).computeEncod
ing().isValid(); | 81 return isHTMLMetaElement(element) && toHTMLMetaElement(element).computeEncod
ing().isValid(); |
82 } | 82 } |
83 | 83 |
84 class SerializerMarkupAccumulator : public MarkupAccumulator { | 84 class SerializerMarkupAccumulator : public MarkupAccumulator { |
85 STACK_ALLOCATED(); | 85 STACK_ALLOCATED(); |
86 public: | 86 public: |
87 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect
or<RawPtrWillBeMember<Node>>&); | 87 SerializerMarkupAccumulator(PageSerializer::Delegate&, const Document&, Will
BeHeapVector<RawPtrWillBeMember<Node>>&); |
88 ~SerializerMarkupAccumulator() override; | 88 ~SerializerMarkupAccumulator() override; |
89 | 89 |
90 protected: | 90 protected: |
91 void appendText(StringBuilder& out, Text&) override; | 91 void appendText(StringBuilder& out, Text&) override; |
92 bool shouldIgnoreAttribute(const Attribute&) override; | 92 bool shouldIgnoreAttribute(const Attribute&) override; |
93 void appendElement(StringBuilder& out, Element&, Namespaces*) override; | 93 void appendElement(StringBuilder& out, Element&, Namespaces*) override; |
94 void appendAttribute(StringBuilder& out, const Element&, const Attribute&, N
amespaces*) override; | 94 void appendAttribute(StringBuilder& out, const Element&, const Attribute&, N
amespaces*) override; |
95 void appendStartTag(Node&, Namespaces* = nullptr) override; | 95 void appendStartTag(Node&, Namespaces* = nullptr) override; |
96 void appendEndTag(const Element&) override; | 96 void appendEndTag(const Element&) override; |
97 | 97 |
98 private: | 98 private: |
99 void appendAttributeValue(StringBuilder& out, const String& attributeValue); | 99 void appendAttributeValue(StringBuilder& out, const String& attributeValue); |
100 void appendRewrittenAttribute( | 100 void appendRewrittenAttribute( |
101 StringBuilder& out, | 101 StringBuilder& out, |
102 const Element&, | 102 const Element&, |
103 const String& attributeName, | 103 const String& attributeName, |
104 const String& attributeValue); | 104 const String& attributeValue); |
105 | 105 |
106 PageSerializer* m_serializer; | 106 PageSerializer::Delegate& m_delegate; |
107 RawPtrWillBeMember<const Document> m_document; | 107 RawPtrWillBeMember<const Document> m_document; |
108 | 108 |
109 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document | 109 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document |
110 // included into serialized text then extracts image, object, etc. The size | 110 // included into serialized text then extracts image, object, etc. The size |
111 // of this vector isn't small for large document. It is better to use | 111 // of this vector isn't small for large document. It is better to use |
112 // callback like functionality. | 112 // callback like functionality. |
113 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes; | 113 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes; |
114 | 114 |
115 // Elements with links rewritten via appendAttribute method. | 115 // Elements with links rewritten via appendAttribute method. |
116 WillBeHeapHashSet<RawPtrWillBeMember<const Element>> m_elementsWithRewritten
Links; | 116 WillBeHeapHashSet<RawPtrWillBeMember<const Element>> m_elementsWithRewritten
Links; |
117 }; | 117 }; |
118 | 118 |
119 SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer* seriali
zer, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node>>& nodes
) | 119 SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer::Delegat
e& delegate, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node>
>& nodes) |
120 : MarkupAccumulator(ResolveAllURLs) | 120 : MarkupAccumulator(ResolveAllURLs) |
121 , m_serializer(serializer) | 121 , m_delegate(delegate) |
122 , m_document(&document) | 122 , m_document(&document) |
123 , m_nodes(nodes) | 123 , m_nodes(nodes) |
124 { | 124 { |
125 } | 125 } |
126 | 126 |
127 SerializerMarkupAccumulator::~SerializerMarkupAccumulator() | 127 SerializerMarkupAccumulator::~SerializerMarkupAccumulator() |
128 { | 128 { |
129 } | 129 } |
130 | 130 |
131 void SerializerMarkupAccumulator::appendText(StringBuilder& result, Text& text) | 131 void SerializerMarkupAccumulator::appendText(StringBuilder& result, Text& text) |
132 { | 132 { |
133 Element* parent = text.parentElement(); | 133 Element* parent = text.parentElement(); |
134 if (parent && !shouldIgnoreElement(*parent)) | 134 if (parent && !shouldIgnoreElement(*parent)) |
135 MarkupAccumulator::appendText(result, text); | 135 MarkupAccumulator::appendText(result, text); |
136 } | 136 } |
137 | 137 |
138 bool SerializerMarkupAccumulator::shouldIgnoreAttribute(const Attribute& attribu
te) | 138 bool SerializerMarkupAccumulator::shouldIgnoreAttribute(const Attribute& attribu
te) |
139 { | 139 { |
140 PageSerializer::Delegate* delegate = m_serializer->delegate(); | 140 return m_delegate.shouldIgnoreAttribute(attribute); |
141 if (delegate) | |
142 return delegate->shouldIgnoreAttribute(attribute); | |
143 | |
144 return MarkupAccumulator::shouldIgnoreAttribute(attribute); | |
145 } | 141 } |
146 | 142 |
147 void SerializerMarkupAccumulator::appendElement(StringBuilder& result, Element&
element, Namespaces* namespaces) | 143 void SerializerMarkupAccumulator::appendElement(StringBuilder& result, Element&
element, Namespaces* namespaces) |
148 { | 144 { |
149 if (!shouldIgnoreElement(element)) | 145 if (!shouldIgnoreElement(element)) |
150 MarkupAccumulator::appendElement(result, element, namespaces); | 146 MarkupAccumulator::appendElement(result, element, namespaces); |
151 | 147 |
152 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an elem
ent like this, without special cases for XHTML | 148 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an elem
ent like this, without special cases for XHTML |
153 if (isHTMLHeadElement(element)) { | 149 if (isHTMLHeadElement(element)) { |
154 result.appendLiteral("<meta http-equiv=\"Content-Type\" content=\""); | 150 result.appendLiteral("<meta http-equiv=\"Content-Type\" content=\""); |
(...skipping 14 matching lines...) Expand all Loading... |
169 const Element& element, | 165 const Element& element, |
170 const Attribute& attribute, | 166 const Attribute& attribute, |
171 Namespaces* namespaces) | 167 Namespaces* namespaces) |
172 { | 168 { |
173 // Check if link rewriting can affect the attribute. | 169 // Check if link rewriting can affect the attribute. |
174 bool isLinkAttribute = element.hasLegalLinkAttribute(attribute.name()); | 170 bool isLinkAttribute = element.hasLegalLinkAttribute(attribute.name()); |
175 bool isSrcDocAttribute = isHTMLFrameElementBase(element) | 171 bool isSrcDocAttribute = isHTMLFrameElementBase(element) |
176 && attribute.name() == HTMLNames::srcdocAttr; | 172 && attribute.name() == HTMLNames::srcdocAttr; |
177 if (isLinkAttribute || isSrcDocAttribute) { | 173 if (isLinkAttribute || isSrcDocAttribute) { |
178 // Check if the delegate wants to do link rewriting for the element. | 174 // Check if the delegate wants to do link rewriting for the element. |
179 PageSerializer::Delegate* delegate = m_serializer->delegate(); | |
180 String newLinkForTheElement; | 175 String newLinkForTheElement; |
181 if (delegate && delegate->rewriteLink(element, newLinkForTheElement)) { | 176 if (m_delegate.rewriteLink(element, newLinkForTheElement)) { |
182 if (isLinkAttribute) { | 177 if (isLinkAttribute) { |
183 // Rewrite element links. | 178 // Rewrite element links. |
184 appendRewrittenAttribute( | 179 appendRewrittenAttribute( |
185 out, element, attribute.name().toString(), newLinkForTheElem
ent); | 180 out, element, attribute.name().toString(), newLinkForTheElem
ent); |
186 } else { | 181 } else { |
187 ASSERT(isSrcDocAttribute); | 182 ASSERT(isSrcDocAttribute); |
188 // Emit src instead of srcdoc attribute for frame elements - we
want the | 183 // Emit src instead of srcdoc attribute for frame elements - we
want the |
189 // serialized subframe to use html contents from the link provid
ed by | 184 // serialized subframe to use html contents from the link provid
ed by |
190 // Delegate::rewriteLink rather than html contents from srcdoc | 185 // Delegate::rewriteLink rather than html contents from srcdoc |
191 // attribute. | 186 // attribute. |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
240 | 235 |
241 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS | 236 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS |
242 // documents which leads to bugs like <https://crbug.com/251898>. Not being | 237 // documents which leads to bugs like <https://crbug.com/251898>. Not being |
243 // able to rewrite URLs inside CSS documents means that resources imported from | 238 // able to rewrite URLs inside CSS documents means that resources imported from |
244 // url(...) statements in CSS might not work when rewriting links for the | 239 // url(...) statements in CSS might not work when rewriting links for the |
245 // "Webpage, Complete" method of saving a page. It will take some work but it | 240 // "Webpage, Complete" method of saving a page. It will take some work but it |
246 // needs to be done if we want to continue to support non-MHTML saved pages. | 241 // needs to be done if we want to continue to support non-MHTML saved pages. |
247 | 242 |
248 PageSerializer::PageSerializer( | 243 PageSerializer::PageSerializer( |
249 Vector<SerializedResource>& resources, | 244 Vector<SerializedResource>& resources, |
250 Delegate* delegate) | 245 Delegate& delegate) |
251 : m_resources(&resources) | 246 : m_resources(&resources) |
252 , m_delegate(delegate) | 247 , m_delegate(delegate) |
253 { | 248 { |
254 } | 249 } |
255 | 250 |
256 void PageSerializer::serializeFrame(const LocalFrame& frame) | 251 void PageSerializer::serializeFrame(const LocalFrame& frame) |
257 { | 252 { |
258 ASSERT(frame.document()); | 253 ASSERT(frame.document()); |
259 Document& document = *frame.document(); | 254 Document& document = *frame.document(); |
260 KURL url = document.url(); | 255 KURL url = document.url(); |
261 | 256 |
262 // If frame is an image document, add the image and don't continue | 257 // If frame is an image document, add the image and don't continue |
263 if (document.isImageDocument()) { | 258 if (document.isImageDocument()) { |
264 ImageDocument& imageDocument = toImageDocument(document); | 259 ImageDocument& imageDocument = toImageDocument(document); |
265 addImageToResources(imageDocument.cachedImage(), url); | 260 addImageToResources(imageDocument.cachedImage(), url); |
266 return; | 261 return; |
267 } | 262 } |
268 | 263 |
269 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; | 264 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; |
270 SerializerMarkupAccumulator accumulator(this, document, serializedNodes); | 265 SerializerMarkupAccumulator accumulator(m_delegate, document, serializedNode
s); |
271 String text = serializeNodes<EditingStrategy>(accumulator, document, Include
Node); | 266 String text = serializeNodes<EditingStrategy>(accumulator, document, Include
Node); |
272 | 267 |
273 CString frameHTML = document.encoding().encode(text, WTF::EntitiesForUnencod
ables); | 268 CString frameHTML = document.encoding().encode(text, WTF::EntitiesForUnencod
ables); |
274 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh
aredBuffer::create(frameHTML.data(), frameHTML.length()))); | 269 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh
aredBuffer::create(frameHTML.data(), frameHTML.length()))); |
275 | 270 |
276 for (Node* node: serializedNodes) { | 271 for (Node* node: serializedNodes) { |
277 ASSERT(node); | 272 ASSERT(node); |
278 if (!node->isElementNode()) | 273 if (!node->isElementNode()) |
279 continue; | 274 continue; |
280 | 275 |
(...skipping 14 matching lines...) Expand all Loading... |
295 if (inputElement.type() == InputTypeNames::image && inputElement.ima
geLoader()) { | 290 if (inputElement.type() == InputTypeNames::image && inputElement.ima
geLoader()) { |
296 KURL url = inputElement.src(); | 291 KURL url = inputElement.src(); |
297 ImageResource* cachedImage = inputElement.imageLoader()->image()
; | 292 ImageResource* cachedImage = inputElement.imageLoader()->image()
; |
298 addImageToResources(cachedImage, url); | 293 addImageToResources(cachedImage, url); |
299 } | 294 } |
300 } else if (isHTMLLinkElement(element)) { | 295 } else if (isHTMLLinkElement(element)) { |
301 HTMLLinkElement& linkElement = toHTMLLinkElement(element); | 296 HTMLLinkElement& linkElement = toHTMLLinkElement(element); |
302 if (CSSStyleSheet* sheet = linkElement.sheet()) { | 297 if (CSSStyleSheet* sheet = linkElement.sheet()) { |
303 KURL url = document.completeURL(linkElement.getAttribute(HTMLNam
es::hrefAttr)); | 298 KURL url = document.completeURL(linkElement.getAttribute(HTMLNam
es::hrefAttr)); |
304 serializeCSSStyleSheet(*sheet, url); | 299 serializeCSSStyleSheet(*sheet, url); |
305 ASSERT(m_resourceURLs.contains(url)); | |
306 } | 300 } |
307 } else if (isHTMLStyleElement(element)) { | 301 } else if (isHTMLStyleElement(element)) { |
308 HTMLStyleElement& styleElement = toHTMLStyleElement(element); | 302 HTMLStyleElement& styleElement = toHTMLStyleElement(element); |
309 if (CSSStyleSheet* sheet = styleElement.sheet()) | 303 if (CSSStyleSheet* sheet = styleElement.sheet()) |
310 serializeCSSStyleSheet(*sheet, KURL()); | 304 serializeCSSStyleSheet(*sheet, KURL()); |
311 } | 305 } |
312 } | 306 } |
313 } | 307 } |
314 | 308 |
315 void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet& styleSheet, const KUR
L& url) | 309 void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet& styleSheet, const KUR
L& url) |
316 { | 310 { |
317 StringBuilder cssText; | 311 StringBuilder cssText; |
318 cssText.appendLiteral("@charset \""); | 312 cssText.appendLiteral("@charset \""); |
319 cssText.append(styleSheet.contents()->charset().lower()); | 313 cssText.append(styleSheet.contents()->charset().lower()); |
320 cssText.appendLiteral("\";\n\n"); | 314 cssText.appendLiteral("\";\n\n"); |
321 | 315 |
322 for (unsigned i = 0; i < styleSheet.length(); ++i) { | 316 for (unsigned i = 0; i < styleSheet.length(); ++i) { |
323 CSSRule* rule = styleSheet.item(i); | 317 CSSRule* rule = styleSheet.item(i); |
324 String itemText = rule->cssText(); | 318 String itemText = rule->cssText(); |
325 if (!itemText.isEmpty()) { | 319 if (!itemText.isEmpty()) { |
326 cssText.append(itemText); | 320 cssText.append(itemText); |
327 if (i < styleSheet.length() - 1) | 321 if (i < styleSheet.length() - 1) |
328 cssText.appendLiteral("\n\n"); | 322 cssText.appendLiteral("\n\n"); |
329 } | 323 } |
330 | 324 |
331 // Some rules have resources associated with them that we need to retrie
ve. | 325 // Some rules have resources associated with them that we need to retrie
ve. |
332 serializeCSSRule(rule); | 326 serializeCSSRule(rule); |
333 } | 327 } |
334 | 328 |
335 if (url.isValid() && !m_resourceURLs.contains(url)) { | 329 if (shouldAddURL(url)) { |
336 WTF::TextEncoding textEncoding(styleSheet.contents()->charset()); | 330 WTF::TextEncoding textEncoding(styleSheet.contents()->charset()); |
337 ASSERT(textEncoding.isValid()); | 331 ASSERT(textEncoding.isValid()); |
338 String textString = cssText.toString(); | 332 String textString = cssText.toString(); |
339 CString text = textEncoding.encode(textString, WTF::EntitiesForUnencodab
les); | 333 CString text = textEncoding.encode(textString, WTF::EntitiesForUnencodab
les); |
340 m_resources->append(SerializedResource(url, String("text/css"), SharedBu
ffer::create(text.data(), text.length()))); | 334 m_resources->append(SerializedResource(url, String("text/css"), SharedBu
ffer::create(text.data(), text.length()))); |
341 m_resourceURLs.add(url); | 335 m_resourceURLs.add(url); |
342 } | 336 } |
343 } | 337 } |
344 | 338 |
345 void PageSerializer::serializeCSSRule(CSSRule* rule) | 339 void PageSerializer::serializeCSSRule(CSSRule* rule) |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
385 case CSSRule::VIEWPORT_RULE: | 379 case CSSRule::VIEWPORT_RULE: |
386 break; | 380 break; |
387 | 381 |
388 default: | 382 default: |
389 ASSERT_NOT_REACHED(); | 383 ASSERT_NOT_REACHED(); |
390 } | 384 } |
391 } | 385 } |
392 | 386 |
393 bool PageSerializer::shouldAddURL(const KURL& url) | 387 bool PageSerializer::shouldAddURL(const KURL& url) |
394 { | 388 { |
395 return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData
(); | 389 return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData
() |
| 390 && !m_delegate.shouldSkipResource(url); |
396 } | 391 } |
397 | 392 |
398 void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer>
data, const KURL& url) | 393 void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer>
data, const KURL& url) |
399 { | 394 { |
400 if (!data) { | 395 if (!data) { |
401 WTF_LOG_ERROR("No data for resource %s", url.string().utf8().data()); | 396 WTF_LOG_ERROR("No data for resource %s", url.string().utf8().data()); |
402 return; | 397 return; |
403 } | 398 } |
404 | 399 |
405 String mimeType = resource->response().mimeType(); | 400 String mimeType = resource->response().mimeType(); |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
462 } | 457 } |
463 | 458 |
464 addFontToResources(fontFaceSrcValue->fetch(&document)); | 459 addFontToResources(fontFaceSrcValue->fetch(&document)); |
465 } else if (cssValue->isValueList()) { | 460 } else if (cssValue->isValueList()) { |
466 CSSValueList* cssValueList = toCSSValueList(cssValue); | 461 CSSValueList* cssValueList = toCSSValueList(cssValue); |
467 for (unsigned i = 0; i < cssValueList->length(); i++) | 462 for (unsigned i = 0; i < cssValueList->length(); i++) |
468 retrieveResourcesForCSSValue(cssValueList->item(i), document); | 463 retrieveResourcesForCSSValue(cssValueList->item(i), document); |
469 } | 464 } |
470 } | 465 } |
471 | 466 |
472 PageSerializer::Delegate* PageSerializer::delegate() | |
473 { | |
474 return m_delegate; | |
475 } | |
476 | |
477 // Returns MOTW (Mark of the Web) declaration before html tag which is in | 467 // Returns MOTW (Mark of the Web) declaration before html tag which is in |
478 // HTML comment, e.g. "<!-- saved from url=(%04d)%s -->" | 468 // HTML comment, e.g. "<!-- saved from url=(%04d)%s -->" |
479 // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx. | 469 // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx. |
480 String PageSerializer::markOfTheWebDeclaration(const KURL& url) | 470 String PageSerializer::markOfTheWebDeclaration(const KURL& url) |
481 { | 471 { |
482 StringBuilder builder; | 472 StringBuilder builder; |
483 bool emitsMinus = false; | 473 bool emitsMinus = false; |
484 CString orignalUrl = url.string().ascii(); | 474 CString orignalUrl = url.string().ascii(); |
485 for (const char* string = orignalUrl.data(); *string; ++string) { | 475 for (const char* string = orignalUrl.data(); *string; ++string) { |
486 const char ch = *string; | 476 const char ch = *string; |
487 if (ch == '-' && emitsMinus) { | 477 if (ch == '-' && emitsMinus) { |
488 builder.append("%2D"); | 478 builder.append("%2D"); |
489 emitsMinus = false; | 479 emitsMinus = false; |
490 continue; | 480 continue; |
491 } | 481 } |
492 emitsMinus = ch == '-'; | 482 emitsMinus = ch == '-'; |
493 builder.append(ch); | 483 builder.append(ch); |
494 } | 484 } |
495 CString escapedUrl = builder.toString().ascii(); | 485 CString escapedUrl = builder.toString().ascii(); |
496 return String::format("saved from url=(%04d)%s", static_cast<int>(escapedUrl
.length()), escapedUrl.data()); | 486 return String::format("saved from url=(%04d)%s", static_cast<int>(escapedUrl
.length()), escapedUrl.data()); |
497 } | 487 } |
498 | 488 |
499 } // namespace blink | 489 } // namespace blink |
OLD | NEW |