Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(341)

Side by Side Diff: third_party/WebKit/Source/core/page/PageSerializer.cpp

Issue 1417323006: OOPIFs: Deduplicating MHTML parts across frames. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-serialization-per-frame
Patch Set: Rebasing... Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved. 2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 if (isHTMLScriptElement(element)) 77 if (isHTMLScriptElement(element))
78 return true; 78 return true;
79 if (isHTMLNoScriptElement(element)) 79 if (isHTMLNoScriptElement(element))
80 return true; 80 return true;
81 return isHTMLMetaElement(element) && toHTMLMetaElement(element).computeEncod ing().isValid(); 81 return isHTMLMetaElement(element) && toHTMLMetaElement(element).computeEncod ing().isValid();
82 } 82 }
83 83
84 class SerializerMarkupAccumulator : public MarkupAccumulator { 84 class SerializerMarkupAccumulator : public MarkupAccumulator {
85 STACK_ALLOCATED(); 85 STACK_ALLOCATED();
86 public: 86 public:
87 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVect or<RawPtrWillBeMember<Node>>&); 87 SerializerMarkupAccumulator(PageSerializer::Delegate&, const Document&, Will BeHeapVector<RawPtrWillBeMember<Node>>&);
88 ~SerializerMarkupAccumulator() override; 88 ~SerializerMarkupAccumulator() override;
89 89
90 protected: 90 protected:
91 void appendText(StringBuilder& out, Text&) override; 91 void appendText(StringBuilder& out, Text&) override;
92 bool shouldIgnoreAttribute(const Attribute&) override; 92 bool shouldIgnoreAttribute(const Attribute&) override;
93 void appendElement(StringBuilder& out, Element&, Namespaces*) override; 93 void appendElement(StringBuilder& out, Element&, Namespaces*) override;
94 void appendAttribute(StringBuilder& out, const Element&, const Attribute&, N amespaces*) override; 94 void appendAttribute(StringBuilder& out, const Element&, const Attribute&, N amespaces*) override;
95 void appendStartTag(Node&, Namespaces* = nullptr) override; 95 void appendStartTag(Node&, Namespaces* = nullptr) override;
96 void appendEndTag(const Element&) override; 96 void appendEndTag(const Element&) override;
97 97
98 private: 98 private:
99 void appendAttributeValue(StringBuilder& out, const String& attributeValue); 99 void appendAttributeValue(StringBuilder& out, const String& attributeValue);
100 void appendRewrittenAttribute( 100 void appendRewrittenAttribute(
101 StringBuilder& out, 101 StringBuilder& out,
102 const Element&, 102 const Element&,
103 const String& attributeName, 103 const String& attributeName,
104 const String& attributeValue); 104 const String& attributeValue);
105 105
106 PageSerializer* m_serializer; 106 PageSerializer::Delegate& m_delegate;
107 RawPtrWillBeMember<const Document> m_document; 107 RawPtrWillBeMember<const Document> m_document;
108 108
109 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document 109 // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document
110 // included into serialized text then extracts image, object, etc. The size 110 // included into serialized text then extracts image, object, etc. The size
111 // of this vector isn't small for large document. It is better to use 111 // of this vector isn't small for large document. It is better to use
112 // callback like functionality. 112 // callback like functionality.
113 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes; 113 WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes;
114 114
115 // Elements with links rewritten via appendAttribute method. 115 // Elements with links rewritten via appendAttribute method.
116 WillBeHeapHashSet<RawPtrWillBeMember<const Element>> m_elementsWithRewritten Links; 116 WillBeHeapHashSet<RawPtrWillBeMember<const Element>> m_elementsWithRewritten Links;
117 }; 117 };
118 118
119 SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer* seriali zer, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node>>& nodes ) 119 SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer::Delegat e& delegate, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node> >& nodes)
120 : MarkupAccumulator(ResolveAllURLs) 120 : MarkupAccumulator(ResolveAllURLs)
121 , m_serializer(serializer) 121 , m_delegate(delegate)
122 , m_document(&document) 122 , m_document(&document)
123 , m_nodes(nodes) 123 , m_nodes(nodes)
124 { 124 {
125 } 125 }
126 126
127 SerializerMarkupAccumulator::~SerializerMarkupAccumulator() 127 SerializerMarkupAccumulator::~SerializerMarkupAccumulator()
128 { 128 {
129 } 129 }
130 130
131 void SerializerMarkupAccumulator::appendText(StringBuilder& result, Text& text) 131 void SerializerMarkupAccumulator::appendText(StringBuilder& result, Text& text)
132 { 132 {
133 Element* parent = text.parentElement(); 133 Element* parent = text.parentElement();
134 if (parent && !shouldIgnoreElement(*parent)) 134 if (parent && !shouldIgnoreElement(*parent))
135 MarkupAccumulator::appendText(result, text); 135 MarkupAccumulator::appendText(result, text);
136 } 136 }
137 137
138 bool SerializerMarkupAccumulator::shouldIgnoreAttribute(const Attribute& attribu te) 138 bool SerializerMarkupAccumulator::shouldIgnoreAttribute(const Attribute& attribu te)
139 { 139 {
140 PageSerializer::Delegate* delegate = m_serializer->delegate(); 140 return m_delegate.shouldIgnoreAttribute(attribute);
141 if (delegate)
142 return delegate->shouldIgnoreAttribute(attribute);
143
144 return MarkupAccumulator::shouldIgnoreAttribute(attribute);
145 } 141 }
146 142
147 void SerializerMarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces) 143 void SerializerMarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces)
148 { 144 {
149 if (!shouldIgnoreElement(element)) 145 if (!shouldIgnoreElement(element))
150 MarkupAccumulator::appendElement(result, element, namespaces); 146 MarkupAccumulator::appendElement(result, element, namespaces);
151 147
152 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an elem ent like this, without special cases for XHTML 148 // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an elem ent like this, without special cases for XHTML
153 if (isHTMLHeadElement(element)) { 149 if (isHTMLHeadElement(element)) {
154 result.appendLiteral("<meta http-equiv=\"Content-Type\" content=\""); 150 result.appendLiteral("<meta http-equiv=\"Content-Type\" content=\"");
(...skipping 14 matching lines...) Expand all
169 const Element& element, 165 const Element& element,
170 const Attribute& attribute, 166 const Attribute& attribute,
171 Namespaces* namespaces) 167 Namespaces* namespaces)
172 { 168 {
173 // Check if link rewriting can affect the attribute. 169 // Check if link rewriting can affect the attribute.
174 bool isLinkAttribute = element.hasLegalLinkAttribute(attribute.name()); 170 bool isLinkAttribute = element.hasLegalLinkAttribute(attribute.name());
175 bool isSrcDocAttribute = isHTMLFrameElementBase(element) 171 bool isSrcDocAttribute = isHTMLFrameElementBase(element)
176 && attribute.name() == HTMLNames::srcdocAttr; 172 && attribute.name() == HTMLNames::srcdocAttr;
177 if (isLinkAttribute || isSrcDocAttribute) { 173 if (isLinkAttribute || isSrcDocAttribute) {
178 // Check if the delegate wants to do link rewriting for the element. 174 // Check if the delegate wants to do link rewriting for the element.
179 PageSerializer::Delegate* delegate = m_serializer->delegate();
180 String newLinkForTheElement; 175 String newLinkForTheElement;
181 if (delegate && delegate->rewriteLink(element, newLinkForTheElement)) { 176 if (m_delegate.rewriteLink(element, newLinkForTheElement)) {
182 if (isLinkAttribute) { 177 if (isLinkAttribute) {
183 // Rewrite element links. 178 // Rewrite element links.
184 appendRewrittenAttribute( 179 appendRewrittenAttribute(
185 out, element, attribute.name().toString(), newLinkForTheElem ent); 180 out, element, attribute.name().toString(), newLinkForTheElem ent);
186 } else { 181 } else {
187 ASSERT(isSrcDocAttribute); 182 ASSERT(isSrcDocAttribute);
188 // Emit src instead of srcdoc attribute for frame elements - we want the 183 // Emit src instead of srcdoc attribute for frame elements - we want the
189 // serialized subframe to use html contents from the link provid ed by 184 // serialized subframe to use html contents from the link provid ed by
190 // Delegate::rewriteLink rather than html contents from srcdoc 185 // Delegate::rewriteLink rather than html contents from srcdoc
191 // attribute. 186 // attribute.
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
240 235
241 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS 236 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS
242 // documents which leads to bugs like <https://crbug.com/251898>. Not being 237 // documents which leads to bugs like <https://crbug.com/251898>. Not being
243 // able to rewrite URLs inside CSS documents means that resources imported from 238 // able to rewrite URLs inside CSS documents means that resources imported from
244 // url(...) statements in CSS might not work when rewriting links for the 239 // url(...) statements in CSS might not work when rewriting links for the
245 // "Webpage, Complete" method of saving a page. It will take some work but it 240 // "Webpage, Complete" method of saving a page. It will take some work but it
246 // needs to be done if we want to continue to support non-MHTML saved pages. 241 // needs to be done if we want to continue to support non-MHTML saved pages.
247 242
248 PageSerializer::PageSerializer( 243 PageSerializer::PageSerializer(
249 Vector<SerializedResource>& resources, 244 Vector<SerializedResource>& resources,
250 Delegate* delegate) 245 Delegate& delegate)
251 : m_resources(&resources) 246 : m_resources(&resources)
252 , m_delegate(delegate) 247 , m_delegate(delegate)
253 { 248 {
254 } 249 }
255 250
256 void PageSerializer::serializeFrame(const LocalFrame& frame) 251 void PageSerializer::serializeFrame(const LocalFrame& frame)
257 { 252 {
258 ASSERT(frame.document()); 253 ASSERT(frame.document());
259 Document& document = *frame.document(); 254 Document& document = *frame.document();
260 KURL url = document.url(); 255 KURL url = document.url();
261 256
262 // If frame is an image document, add the image and don't continue 257 // If frame is an image document, add the image and don't continue
263 if (document.isImageDocument()) { 258 if (document.isImageDocument()) {
264 ImageDocument& imageDocument = toImageDocument(document); 259 ImageDocument& imageDocument = toImageDocument(document);
265 addImageToResources(imageDocument.cachedImage(), url); 260 addImageToResources(imageDocument.cachedImage(), url);
266 return; 261 return;
267 } 262 }
268 263
269 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; 264 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes;
270 SerializerMarkupAccumulator accumulator(this, document, serializedNodes); 265 SerializerMarkupAccumulator accumulator(m_delegate, document, serializedNode s);
271 String text = serializeNodes<EditingStrategy>(accumulator, document, Include Node); 266 String text = serializeNodes<EditingStrategy>(accumulator, document, Include Node);
272 267
273 CString frameHTML = document.encoding().encode(text, WTF::EntitiesForUnencod ables); 268 CString frameHTML = document.encoding().encode(text, WTF::EntitiesForUnencod ables);
274 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh aredBuffer::create(frameHTML.data(), frameHTML.length()))); 269 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), Sh aredBuffer::create(frameHTML.data(), frameHTML.length())));
275 270
276 for (Node* node: serializedNodes) { 271 for (Node* node: serializedNodes) {
277 ASSERT(node); 272 ASSERT(node);
278 if (!node->isElementNode()) 273 if (!node->isElementNode())
279 continue; 274 continue;
280 275
(...skipping 14 matching lines...) Expand all
295 if (inputElement.type() == InputTypeNames::image && inputElement.ima geLoader()) { 290 if (inputElement.type() == InputTypeNames::image && inputElement.ima geLoader()) {
296 KURL url = inputElement.src(); 291 KURL url = inputElement.src();
297 ImageResource* cachedImage = inputElement.imageLoader()->image() ; 292 ImageResource* cachedImage = inputElement.imageLoader()->image() ;
298 addImageToResources(cachedImage, url); 293 addImageToResources(cachedImage, url);
299 } 294 }
300 } else if (isHTMLLinkElement(element)) { 295 } else if (isHTMLLinkElement(element)) {
301 HTMLLinkElement& linkElement = toHTMLLinkElement(element); 296 HTMLLinkElement& linkElement = toHTMLLinkElement(element);
302 if (CSSStyleSheet* sheet = linkElement.sheet()) { 297 if (CSSStyleSheet* sheet = linkElement.sheet()) {
303 KURL url = document.completeURL(linkElement.getAttribute(HTMLNam es::hrefAttr)); 298 KURL url = document.completeURL(linkElement.getAttribute(HTMLNam es::hrefAttr));
304 serializeCSSStyleSheet(*sheet, url); 299 serializeCSSStyleSheet(*sheet, url);
305 ASSERT(m_resourceURLs.contains(url));
306 } 300 }
307 } else if (isHTMLStyleElement(element)) { 301 } else if (isHTMLStyleElement(element)) {
308 HTMLStyleElement& styleElement = toHTMLStyleElement(element); 302 HTMLStyleElement& styleElement = toHTMLStyleElement(element);
309 if (CSSStyleSheet* sheet = styleElement.sheet()) 303 if (CSSStyleSheet* sheet = styleElement.sheet())
310 serializeCSSStyleSheet(*sheet, KURL()); 304 serializeCSSStyleSheet(*sheet, KURL());
311 } 305 }
312 } 306 }
313 } 307 }
314 308
315 void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet& styleSheet, const KUR L& url) 309 void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet& styleSheet, const KUR L& url)
316 { 310 {
317 StringBuilder cssText; 311 StringBuilder cssText;
318 cssText.appendLiteral("@charset \""); 312 cssText.appendLiteral("@charset \"");
319 cssText.append(styleSheet.contents()->charset().lower()); 313 cssText.append(styleSheet.contents()->charset().lower());
320 cssText.appendLiteral("\";\n\n"); 314 cssText.appendLiteral("\";\n\n");
321 315
322 for (unsigned i = 0; i < styleSheet.length(); ++i) { 316 for (unsigned i = 0; i < styleSheet.length(); ++i) {
323 CSSRule* rule = styleSheet.item(i); 317 CSSRule* rule = styleSheet.item(i);
324 String itemText = rule->cssText(); 318 String itemText = rule->cssText();
325 if (!itemText.isEmpty()) { 319 if (!itemText.isEmpty()) {
326 cssText.append(itemText); 320 cssText.append(itemText);
327 if (i < styleSheet.length() - 1) 321 if (i < styleSheet.length() - 1)
328 cssText.appendLiteral("\n\n"); 322 cssText.appendLiteral("\n\n");
329 } 323 }
330 324
331 // Some rules have resources associated with them that we need to retrie ve. 325 // Some rules have resources associated with them that we need to retrie ve.
332 serializeCSSRule(rule); 326 serializeCSSRule(rule);
333 } 327 }
334 328
335 if (url.isValid() && !m_resourceURLs.contains(url)) { 329 if (shouldAddURL(url)) {
336 WTF::TextEncoding textEncoding(styleSheet.contents()->charset()); 330 WTF::TextEncoding textEncoding(styleSheet.contents()->charset());
337 ASSERT(textEncoding.isValid()); 331 ASSERT(textEncoding.isValid());
338 String textString = cssText.toString(); 332 String textString = cssText.toString();
339 CString text = textEncoding.encode(textString, WTF::EntitiesForUnencodab les); 333 CString text = textEncoding.encode(textString, WTF::EntitiesForUnencodab les);
340 m_resources->append(SerializedResource(url, String("text/css"), SharedBu ffer::create(text.data(), text.length()))); 334 m_resources->append(SerializedResource(url, String("text/css"), SharedBu ffer::create(text.data(), text.length())));
341 m_resourceURLs.add(url); 335 m_resourceURLs.add(url);
342 } 336 }
343 } 337 }
344 338
345 void PageSerializer::serializeCSSRule(CSSRule* rule) 339 void PageSerializer::serializeCSSRule(CSSRule* rule)
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
385 case CSSRule::VIEWPORT_RULE: 379 case CSSRule::VIEWPORT_RULE:
386 break; 380 break;
387 381
388 default: 382 default:
389 ASSERT_NOT_REACHED(); 383 ASSERT_NOT_REACHED();
390 } 384 }
391 } 385 }
392 386
393 bool PageSerializer::shouldAddURL(const KURL& url) 387 bool PageSerializer::shouldAddURL(const KURL& url)
394 { 388 {
395 return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData (); 389 return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData ()
390 && !m_delegate.shouldSkipResource(url);
396 } 391 }
397 392
398 void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer> data, const KURL& url) 393 void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer> data, const KURL& url)
399 { 394 {
400 if (!data) { 395 if (!data) {
401 WTF_LOG_ERROR("No data for resource %s", url.string().utf8().data()); 396 WTF_LOG_ERROR("No data for resource %s", url.string().utf8().data());
402 return; 397 return;
403 } 398 }
404 399
405 String mimeType = resource->response().mimeType(); 400 String mimeType = resource->response().mimeType();
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
462 } 457 }
463 458
464 addFontToResources(fontFaceSrcValue->fetch(&document)); 459 addFontToResources(fontFaceSrcValue->fetch(&document));
465 } else if (cssValue->isValueList()) { 460 } else if (cssValue->isValueList()) {
466 CSSValueList* cssValueList = toCSSValueList(cssValue); 461 CSSValueList* cssValueList = toCSSValueList(cssValue);
467 for (unsigned i = 0; i < cssValueList->length(); i++) 462 for (unsigned i = 0; i < cssValueList->length(); i++)
468 retrieveResourcesForCSSValue(cssValueList->item(i), document); 463 retrieveResourcesForCSSValue(cssValueList->item(i), document);
469 } 464 }
470 } 465 }
471 466
472 PageSerializer::Delegate* PageSerializer::delegate()
473 {
474 return m_delegate;
475 }
476
477 // Returns MOTW (Mark of the Web) declaration before html tag which is in 467 // Returns MOTW (Mark of the Web) declaration before html tag which is in
478 // HTML comment, e.g. "<!-- saved from url=(%04d)%s -->" 468 // HTML comment, e.g. "<!-- saved from url=(%04d)%s -->"
479 // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx. 469 // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
480 String PageSerializer::markOfTheWebDeclaration(const KURL& url) 470 String PageSerializer::markOfTheWebDeclaration(const KURL& url)
481 { 471 {
482 StringBuilder builder; 472 StringBuilder builder;
483 bool emitsMinus = false; 473 bool emitsMinus = false;
484 CString orignalUrl = url.string().ascii(); 474 CString orignalUrl = url.string().ascii();
485 for (const char* string = orignalUrl.data(); *string; ++string) { 475 for (const char* string = orignalUrl.data(); *string; ++string) {
486 const char ch = *string; 476 const char ch = *string;
487 if (ch == '-' && emitsMinus) { 477 if (ch == '-' && emitsMinus) {
488 builder.append("%2D"); 478 builder.append("%2D");
489 emitsMinus = false; 479 emitsMinus = false;
490 continue; 480 continue;
491 } 481 }
492 emitsMinus = ch == '-'; 482 emitsMinus = ch == '-';
493 builder.append(ch); 483 builder.append(ch);
494 } 484 }
495 CString escapedUrl = builder.toString().ascii(); 485 CString escapedUrl = builder.toString().ascii();
496 return String::format("saved from url=(%04d)%s", static_cast<int>(escapedUrl .length()), escapedUrl.data()); 486 return String::format("saved from url=(%04d)%s", static_cast<int>(escapedUrl .length()), escapedUrl.data());
497 } 487 }
498 488
499 } // namespace blink 489 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/page/PageSerializer.h ('k') | third_party/WebKit/Source/web/WebPageSerializer.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698