Index: Source/core/page/PageSerializer.cpp |
diff --git a/Source/core/page/PageSerializer.cpp b/Source/core/page/PageSerializer.cpp |
index f213024e85b45d6a60a00f0925a4b7920091a076..c72fd528a37e717cc8792894924f47a10d5c4a73 100644 |
--- a/Source/core/page/PageSerializer.cpp |
+++ b/Source/core/page/PageSerializer.cpp |
@@ -87,18 +87,13 @@ static bool isCharsetSpecifyingNode(const Node& node) |
return textEncoding.isValid(); |
} |
-static bool shouldIgnoreElement(const Element& element) |
-{ |
- return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isCharsetSpecifyingNode(element); |
-} |
- |
static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerElement& frameOwner) |
{ |
// FIXME: We should support all frame owners including applets. |
return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::srcAttr; |
} |
-class SerializerMarkupAccumulator final : public MarkupAccumulator { |
+class SerializerMarkupAccumulator : public MarkupAccumulator { |
STACK_ALLOCATED(); |
public: |
SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVector<RawPtrWillBeMember<Node>>&); |
@@ -112,6 +107,11 @@ protected: |
virtual void appendStartTag(Node&, Namespaces* = nullptr) override; |
virtual void appendEndTag(const Element&) override; |
+ virtual bool shouldIgnoreElement(const Element&) const; |
+ |
+ PageSerializer* pageSerializer(); |
philipj_slow
2015/06/26 09:07:55
I can't see this actually used in the patch... me
Tiger (Sony Mobile)
2015/06/30 11:31:52
And now, neither can I. Removing.
|
+ const Document& document(); |
+ |
private: |
PageSerializer* m_serializer; |
RawPtrWillBeMember<const Document> m_document; |
@@ -203,6 +203,101 @@ void SerializerMarkupAccumulator::appendEndTag(const Element& element) |
MarkupAccumulator::appendEndTag(element); |
} |
+bool SerializerMarkupAccumulator::shouldIgnoreElement(const Element& element) const |
+{ |
+ return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isCharsetSpecifyingNode(element); |
+} |
+ |
+PageSerializer* SerializerMarkupAccumulator::pageSerializer() |
+{ |
+ return m_serializer; |
+} |
+ |
+const Document& SerializerMarkupAccumulator::document() |
+{ |
+ return *m_document; |
+} |
+ |
+class LinkChangeSerializerMarkupAccumulator final : public SerializerMarkupAccumulator { |
philipj_slow
2015/06/26 09:07:55
Should this be STACK_ALLOCATED()?
Tiger (Sony Mobile)
2015/06/30 11:31:53
Probably, yes. Adding.
|
+public: |
+ LinkChangeSerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVector<RawPtrWillBeMember<Node>>&, HashMap<String, String>& rewriteURLs, const String& rewriteFolder); |
+ |
+private: |
+ void appendElement(StringBuilder&, Element&, Namespaces*) override; |
+ void appendAttribute(StringBuilder&, const Element&, const Attribute&, Namespaces*) override; |
+ |
+ bool shouldIgnoreElement(const Element&) const override; |
+ |
+ // m_rewriteURLs include all pair of local resource path and corresponding original link. |
philipj_slow
2015/06/26 09:07:56
Plural pairs, paths and links.
Tiger (Sony Mobile)
2015/06/30 11:31:52
Done.
|
+ HashMap<String, String> m_rewriteURLs; |
+ String m_rewriteFolder; |
+}; |
+ |
+LinkChangeSerializerMarkupAccumulator::LinkChangeSerializerMarkupAccumulator(PageSerializer* serializer, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node>>& nodes, HashMap<String, String>& rewriteURLs, const String& rewriteFolder) |
philipj_slow
2015/06/26 09:07:55
Double space on this line.
Tiger (Sony Mobile)
2015/06/30 11:31:53
Done.
|
+ : SerializerMarkupAccumulator(serializer, document, nodes) |
+ , m_rewriteURLs(rewriteURLs) |
+ , m_rewriteFolder(rewriteFolder) |
+{ |
+} |
+ |
+void LinkChangeSerializerMarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces) |
+{ |
+ if (element.hasTagName(HTMLNames::htmlTag)) { |
+ // Add MOTW (Mark of the Web) declaration before html tag. |
+ // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx. |
philipj_slow
2015/06/26 09:07:56
This format is wonderfully strange :)
Tiger (Sony Mobile)
2015/06/30 11:31:52
It is :)
|
+ result.append('\n'); |
+ MarkupFormatter::appendComment(result, String::format(" saved from url=(%04d)%s ", |
+ static_cast<int>(document().url().string().utf8().length()), |
philipj_slow
2015/06/26 09:07:56
Can you use %u or whatever the type is to avoid th
Tiger (Sony Mobile)
2015/06/30 11:31:53
Is there a format like PRIuS from format_macros.h
philipj_slow
2015/06/30 11:56:27
I guess the assumption is that the URL will be 7-b
Tiger (Sony Mobile)
2015/06/30 12:59:02
That sounds more reasonable yes, and it seems that
|
+ document().url().string().utf8().data())); |
+ result.append('\n'); |
+ } |
+ |
+ SerializerMarkupAccumulator::appendElement(result, element, namespaces); |
+ |
+ if (element.hasTagName(HTMLNames::baseTag)) { |
+ // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an element like this, without special cases for XHTML |
+ // Append a new base tag declaration. |
philipj_slow
2015/06/26 09:07:56
I'm not sure I understand this. Is the main thing
Tiger (Sony Mobile)
2015/06/30 11:31:53
The main thing is to change the URL. This could be
philipj_slow
2015/06/30 11:56:27
The original code for this is WebPageSerializerImp
Tiger (Sony Mobile)
2015/06/30 12:59:02
I think it would make sense, and it would work for
Tiger (Sony Mobile)
2015/07/01 12:14:46
I've added a small TODO section about this.
|
+ result.appendLiteral("<base href=\".\""); |
+ if (!document().baseTarget().isEmpty()) { |
+ result.appendLiteral(" target=\""); |
+ MarkupFormatter::appendAttributeValue(result, document().baseTarget(), document().isHTMLDocument()); |
+ result.append('"'); |
+ } |
+ if (document().isXHTMLDocument()) |
+ result.appendLiteral(" />"); |
+ else |
+ result.appendLiteral(">"); |
+ } |
+} |
+ |
+void LinkChangeSerializerMarkupAccumulator::appendAttribute(StringBuilder& result, const Element& element, const Attribute& attribute, Namespaces* namespaces) |
+{ |
+ if (!m_rewriteURLs.isEmpty() && element.isURLAttribute(attribute) && !element.isJavaScriptURLAttribute(attribute)) { |
philipj_slow
2015/06/26 09:07:56
Does the javascript: case matter here? There's no
Tiger (Sony Mobile)
2015/06/30 11:31:52
It doesn't really matter no, it was just added to
philipj_slow
2015/06/30 11:56:27
Forgot to remove it, or do you mean to do it later
Tiger (Sony Mobile)
2015/06/30 12:59:02
Removing now.
|
+ |
+ String completeURL = document().completeURL(attribute.value()); |
+ |
+ if (m_rewriteURLs.contains(completeURL)) { |
+ // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an attribute like this. |
philipj_slow
2015/06/26 09:07:55
Do you mean to refactor this so that one can rewri
Tiger (Sony Mobile)
2015/06/30 11:31:53
Exactly so.
|
+ result.append(' '); |
+ result.append(attribute.name().toString()); |
+ result.appendLiteral("=\""); |
+ if (!m_rewriteFolder.isEmpty()) { |
philipj_slow
2015/06/26 09:07:56
Don't need the {} here.
Tiger (Sony Mobile)
2015/06/30 11:31:52
Done.
|
+ MarkupFormatter::appendAttributeValue(result, m_rewriteFolder + "/", document().isHTMLDocument()); |
+ } |
+ MarkupFormatter::appendAttributeValue(result, m_rewriteURLs.get(completeURL), document().isHTMLDocument()); |
+ result.appendLiteral("\""); |
+ return; |
+ } |
+ } |
+ MarkupAccumulator::appendAttribute(result, element, attribute, namespaces); |
+} |
+ |
+bool LinkChangeSerializerMarkupAccumulator::shouldIgnoreElement(const Element& element) const |
+{ |
+ return SerializerMarkupAccumulator::shouldIgnoreElement(element) || isHTMLBaseElement(element); |
philipj_slow
2015/06/26 09:07:56
Instead of overriding this, can't LinkChangeSerial
Tiger (Sony Mobile)
2015/06/30 11:31:52
True! Fixing.
philipj_slow
2015/06/30 11:56:27
With that, I think you can also undo the other cha
Tiger (Sony Mobile)
2015/06/30 12:59:02
Done.
|
+} |
+ |
+ |
PageSerializer::PageSerializer(Vector<SerializedResource>* resources, PassOwnPtr<Delegate> delegate) |
: m_resources(resources) |
, m_blankFrameCounter(0) |
@@ -241,9 +336,17 @@ void PageSerializer::serializeFrame(LocalFrame* frame) |
} |
WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes; |
- SerializerMarkupAccumulator accumulator(this, document, serializedNodes); |
- String text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNode); |
- CString frameHTML = document.encoding().normalizeAndEncode(text, WTF::EntitiesForUnencodables); |
+ String text; |
+ if (!m_rewriteURLs.isEmpty()) { |
+ LinkChangeSerializerMarkupAccumulator accumulator(this, document, serializedNodes, m_rewriteURLs, m_rewriteFolder); |
+ text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNode); |
+ } else { |
+ SerializerMarkupAccumulator accumulator(this, document, serializedNodes); |
+ text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNode); |
+ } |
+ |
+ WTF::TextEncoding textEncoding(document.charset()); |
philipj_slow
2015/06/26 09:07:55
document.encoding() is already a WTF::TextEncoding
Tiger (Sony Mobile)
2015/06/30 11:31:52
Done.
|
+ CString frameHTML = textEncoding.normalizeAndEncode(text, WTF::EntitiesForUnencodables); |
m_resources->append(SerializedResource(url, document.suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length()))); |
m_resourceURLs.add(url); |
@@ -412,6 +515,16 @@ void PageSerializer::retrieveResourcesForCSSValue(CSSValue* cssValue, Document& |
} |
} |
+void PageSerializer::registerRewriteURL(const String& from, const String& to) |
+{ |
+ m_rewriteURLs.set(from, to); |
+} |
+ |
+void PageSerializer::setRewriteURLFolder(const String& rewriteFolder) |
+{ |
+ m_rewriteFolder = rewriteFolder; |
+} |
+ |
KURL PageSerializer::urlForBlankFrame(LocalFrame* frame) |
{ |
BlankFrameURLMap::iterator iter = m_blankFrameURLs.find(frame); |