OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2009 Google Inc. All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions are | |
6 * met: | |
7 * | |
8 * * Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * * Redistributions in binary form must reproduce the above | |
11 * copyright notice, this list of conditions and the following disclaimer | |
12 * in the documentation and/or other materials provided with the | |
13 * distribution. | |
14 * * Neither the name of Google Inc. nor the names of its | |
15 * contributors may be used to endorse or promote products derived from | |
16 * this software without specific prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 | |
31 #ifndef WebPageSerializerImpl_h | |
32 #define WebPageSerializerImpl_h | |
33 | |
34 #include "wtf/Forward.h" | |
35 #include "wtf/HashMap.h" | |
36 #include "wtf/Vector.h" | |
37 #include "wtf/text/StringBuilder.h" | |
38 #include "wtf/text/StringHash.h" | |
39 #include "wtf/text/WTFString.h" | |
40 | |
41 #include "public/platform/WebString.h" | |
42 #include "public/platform/WebURL.h" | |
43 #include "public/web/WebPageSerializer.h" | |
44 #include "public/web/WebPageSerializerClient.h" | |
45 #include "web/WebEntities.h" | |
46 | |
47 namespace WTF { | |
48 class TextEncoding; | |
49 } | |
50 | |
51 namespace blink { | |
52 | |
53 class Document; | |
54 class Element; | |
55 class Node; | |
56 class WebLocalFrameImpl; | |
57 | |
58 // Get html data by serializing all frames of current page with lists | |
59 // which contain all resource links that have local copy. | |
60 // contain all saved auxiliary files included all sub frames and resources. | |
61 // This function will find out all frames and serialize them to HTML data. | |
62 // We have a data buffer to temporary saving generated html data. We will | |
63 // sequentially call WebViewDelegate::SendSerializedHtmlData once the data | |
64 // buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData | |
65 // for getting more information. | |
66 class WebPageSerializerImpl { | |
67 STACK_ALLOCATED(); | |
68 public: | |
69 // Do serialization action. Return false means no available frame has been | |
70 // serialized, otherwise return true. | |
71 bool serialize(); | |
72 | |
73 // The parameter specifies which frame need to be serialized. | |
74 // The parameter recursive_serialization specifies whether we need to | |
75 // serialize all sub frames of the specified frame or not. | |
76 // The parameter delegate specifies the pointer of interface | |
77 // DomSerializerDelegate provide sink interface which can receive the | |
78 // individual chunks of data to be saved. | |
79 // The parameter links contain original URLs of all saved links. | |
80 // The parameter local_paths contain corresponding local file paths of all | |
81 // saved links, which matched with vector:links one by one. | |
82 // The parameter local_directory_name is relative path of directory which | |
83 // contain all saved auxiliary files included all sub frames and resources. | |
84 WebPageSerializerImpl(WebFrame* frame, | |
85 bool recursive, | |
86 WebPageSerializerClient* client, | |
87 const WebVector<WebURL>& links, | |
88 const WebVector<WebString>& localPaths, | |
89 const WebString& localDirectoryName); | |
90 | |
91 private: | |
92 // Specified frame which need to be serialized; | |
93 RawPtrWillBeMember<WebLocalFrameImpl> m_specifiedWebLocalFrameImpl; | |
94 // Pointer of WebPageSerializerClient | |
95 WebPageSerializerClient* m_client; | |
96 // This hash map is used to map resource URL of original link to its local | |
97 // file path. | |
98 typedef HashMap<WTF::String, WTF::String> LinkLocalPathMap; | |
99 // local_links_ include all pair of local resource path and corresponding | |
100 // original link. | |
101 LinkLocalPathMap m_localLinks; | |
102 // Data buffer for saving result of serialized DOM data. | |
103 StringBuilder m_dataBuffer; | |
104 // Passing true to recursive_serialization_ indicates we will serialize not | |
105 // only the specified frame but also all sub-frames in the specific frame. | |
106 // Otherwise we only serialize the specified frame excluded all sub-frames. | |
107 bool m_recursiveSerialization; | |
108 // Flag indicates whether we have collected all frames which need to be | |
109 // serialized or not; | |
110 bool m_framesCollected; | |
111 // Local directory name of all local resource files. | |
112 WTF::String m_localDirectoryName; | |
113 // Vector for saving all frames which need to be serialized. | |
114 WillBeHeapVector<RawPtrWillBeMember<WebLocalFrameImpl>> m_frames; | |
115 | |
116 // Web entities conversion maps. | |
117 WebEntities m_htmlEntities; | |
118 WebEntities m_xmlEntities; | |
119 | |
120 class SerializeDomParam { | |
121 STACK_ALLOCATED(); | |
122 public: | |
123 SerializeDomParam(const KURL&, const WTF::TextEncoding&, Document*, cons
t WTF::String& directoryName); | |
124 | |
125 const KURL& url; | |
126 const WTF::TextEncoding& textEncoding; | |
127 RawPtrWillBeMember<Document> document; | |
128 const WTF::String& directoryName; | |
129 bool isHTMLDocument; // document.isHTMLDocument() | |
130 bool haveSeenDocType; | |
131 bool haveAddedCharsetDeclaration; | |
132 // This meta element need to be skipped when serializing DOM. | |
133 RawPtrWillBeMember<const Element> skipMetaElement; | |
134 // Flag indicates we are in script or style tag. | |
135 bool isInScriptOrStyleTag; | |
136 bool haveAddedXMLProcessingDirective; | |
137 // Flag indicates whether we have added additional contents before end t
ag. | |
138 // This flag will be re-assigned in each call of function | |
139 // PostActionAfterSerializeOpenTag and it could be changed in function | |
140 // PreActionBeforeSerializeEndTag if the function adds new contents into | |
141 // serialization stream. | |
142 bool haveAddedContentsBeforeEnd; | |
143 }; | |
144 | |
145 // Collect all target frames which need to be serialized. | |
146 void collectTargetFrames(); | |
147 // Before we begin serializing open tag of a element, we give the target | |
148 // element a chance to do some work prior to add some additional data. | |
149 WTF::String preActionBeforeSerializeOpenTag(const Element*, | |
150 SerializeDomParam* param, | |
151 bool* needSkip); | |
152 // After we finish serializing open tag of a element, we give the target | |
153 // element a chance to do some post work to add some additional data. | |
154 WTF::String postActionAfterSerializeOpenTag(const Element*, | |
155 SerializeDomParam* param); | |
156 // Before we begin serializing end tag of a element, we give the target | |
157 // element a chance to do some work prior to add some additional data. | |
158 WTF::String preActionBeforeSerializeEndTag(const Element*, | |
159 SerializeDomParam* param, | |
160 bool* needSkip); | |
161 // After we finish serializing end tag of a element, we give the target | |
162 // element a chance to do some post work to add some additional data. | |
163 WTF::String postActionAfterSerializeEndTag(const Element*, | |
164 SerializeDomParam* param); | |
165 // Save generated html content to data buffer. | |
166 void saveHTMLContentToBuffer(const WTF::String& content, | |
167 SerializeDomParam* param); | |
168 | |
169 enum FlushOption { | |
170 ForceFlush, | |
171 DoNotForceFlush, | |
172 }; | |
173 | |
174 // Flushes the content buffer by encoding and sending the content to the | |
175 // WebPageSerializerClient. Content is not flushed if the buffer is not full | |
176 // unless force is 1. | |
177 void encodeAndFlushBuffer(WebPageSerializerClient::PageSerializationStatus s
tatus, | |
178 SerializeDomParam* param, | |
179 FlushOption); | |
180 // Serialize open tag of an specified element. | |
181 void openTagToString(Element*, | |
182 SerializeDomParam* param); | |
183 // Serialize end tag of an specified element. | |
184 void endTagToString(Element*, | |
185 SerializeDomParam* param); | |
186 // Build content for a specified node | |
187 void buildContentForNode(Node*, | |
188 SerializeDomParam* param); | |
189 }; | |
190 | |
191 } // namespace blink | |
192 | |
193 #endif | |
OLD | NEW |