OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2009 Google Inc. All rights reserved. |
| 3 * |
| 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are |
| 6 * met: |
| 7 * |
| 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above |
| 11 * copyright notice, this list of conditions and the following disclaimer |
| 12 * in the documentation and/or other materials provided with the |
| 13 * distribution. |
| 14 * * Neither the name of Google Inc. nor the names of its |
| 15 * contributors may be used to endorse or promote products derived from |
| 16 * this software without specific prior written permission. |
| 17 * |
| 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 */ |
| 30 |
| 31 #ifndef WebPageSerializerImpl_h |
| 32 #define WebPageSerializerImpl_h |
| 33 |
| 34 #include "wtf/Forward.h" |
| 35 #include "wtf/HashMap.h" |
| 36 #include "wtf/Vector.h" |
| 37 #include "wtf/text/StringBuilder.h" |
| 38 #include "wtf/text/StringHash.h" |
| 39 #include "wtf/text/WTFString.h" |
| 40 |
| 41 #include "public/platform/WebString.h" |
| 42 #include "public/platform/WebURL.h" |
| 43 #include "public/web/WebPageSerializer.h" |
| 44 #include "public/web/WebPageSerializerClient.h" |
| 45 #include "web/WebEntities.h" |
| 46 |
| 47 namespace WTF { |
| 48 class TextEncoding; |
| 49 } |
| 50 |
| 51 namespace blink { |
| 52 |
| 53 class Document; |
| 54 class Element; |
| 55 class Node; |
| 56 class WebLocalFrameImpl; |
| 57 |
| 58 // Get html data by serializing all frames of current page with lists |
| 59 // which contain all resource links that have local copy. |
| 60 // contain all saved auxiliary files included all sub frames and resources. |
| 61 // This function will find out all frames and serialize them to HTML data. |
| 62 // We have a data buffer to temporary saving generated html data. We will |
| 63 // sequentially call WebViewDelegate::SendSerializedHtmlData once the data |
| 64 // buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData |
| 65 // for getting more information. |
| 66 class WebPageSerializerImpl { |
| 67 STACK_ALLOCATED(); |
| 68 public: |
| 69 // Do serialization action. Return false means no available frame has been |
| 70 // serialized, otherwise return true. |
| 71 bool serialize(); |
| 72 |
| 73 // The parameter specifies which frame need to be serialized. |
| 74 // The parameter recursive_serialization specifies whether we need to |
| 75 // serialize all sub frames of the specified frame or not. |
| 76 // The parameter delegate specifies the pointer of interface |
| 77 // DomSerializerDelegate provide sink interface which can receive the |
| 78 // individual chunks of data to be saved. |
| 79 // The parameter links contain original URLs of all saved links. |
| 80 // The parameter local_paths contain corresponding local file paths of all |
| 81 // saved links, which matched with vector:links one by one. |
| 82 // The parameter local_directory_name is relative path of directory which |
| 83 // contain all saved auxiliary files included all sub frames and resources. |
| 84 WebPageSerializerImpl(WebFrame* frame, |
| 85 bool recursive, |
| 86 WebPageSerializerClient* client, |
| 87 const WebVector<WebURL>& links, |
| 88 const WebVector<WebString>& localPaths, |
| 89 const WebString& localDirectoryName); |
| 90 |
| 91 private: |
| 92 // Specified frame which need to be serialized; |
| 93 RawPtrWillBeMember<WebLocalFrameImpl> m_specifiedWebLocalFrameImpl; |
| 94 // Pointer of WebPageSerializerClient |
| 95 WebPageSerializerClient* m_client; |
| 96 // This hash map is used to map resource URL of original link to its local |
| 97 // file path. |
| 98 typedef HashMap<WTF::String, WTF::String> LinkLocalPathMap; |
| 99 // local_links_ include all pair of local resource path and corresponding |
| 100 // original link. |
| 101 LinkLocalPathMap m_localLinks; |
| 102 // Data buffer for saving result of serialized DOM data. |
| 103 StringBuilder m_dataBuffer; |
| 104 // Passing true to recursive_serialization_ indicates we will serialize not |
| 105 // only the specified frame but also all sub-frames in the specific frame. |
| 106 // Otherwise we only serialize the specified frame excluded all sub-frames. |
| 107 bool m_recursiveSerialization; |
| 108 // Flag indicates whether we have collected all frames which need to be |
| 109 // serialized or not; |
| 110 bool m_framesCollected; |
| 111 // Local directory name of all local resource files. |
| 112 WTF::String m_localDirectoryName; |
| 113 // Vector for saving all frames which need to be serialized. |
| 114 WillBeHeapVector<RawPtrWillBeMember<WebLocalFrameImpl>> m_frames; |
| 115 |
| 116 // Web entities conversion maps. |
| 117 WebEntities m_htmlEntities; |
| 118 WebEntities m_xmlEntities; |
| 119 |
| 120 class SerializeDomParam { |
| 121 STACK_ALLOCATED(); |
| 122 public: |
| 123 SerializeDomParam(const KURL&, const WTF::TextEncoding&, Document*, cons
t WTF::String& directoryName); |
| 124 |
| 125 const KURL& url; |
| 126 const WTF::TextEncoding& textEncoding; |
| 127 RawPtrWillBeMember<Document> document; |
| 128 const WTF::String& directoryName; |
| 129 bool isHTMLDocument; // document.isHTMLDocument() |
| 130 bool haveSeenDocType; |
| 131 bool haveAddedCharsetDeclaration; |
| 132 // This meta element need to be skipped when serializing DOM. |
| 133 RawPtrWillBeMember<const Element> skipMetaElement; |
| 134 // Flag indicates we are in script or style tag. |
| 135 bool isInScriptOrStyleTag; |
| 136 bool haveAddedXMLProcessingDirective; |
| 137 // Flag indicates whether we have added additional contents before end t
ag. |
| 138 // This flag will be re-assigned in each call of function |
| 139 // PostActionAfterSerializeOpenTag and it could be changed in function |
| 140 // PreActionBeforeSerializeEndTag if the function adds new contents into |
| 141 // serialization stream. |
| 142 bool haveAddedContentsBeforeEnd; |
| 143 }; |
| 144 |
| 145 // Collect all target frames which need to be serialized. |
| 146 void collectTargetFrames(); |
| 147 // Before we begin serializing open tag of a element, we give the target |
| 148 // element a chance to do some work prior to add some additional data. |
| 149 WTF::String preActionBeforeSerializeOpenTag(const Element*, |
| 150 SerializeDomParam* param, |
| 151 bool* needSkip); |
| 152 // After we finish serializing open tag of a element, we give the target |
| 153 // element a chance to do some post work to add some additional data. |
| 154 WTF::String postActionAfterSerializeOpenTag(const Element*, |
| 155 SerializeDomParam* param); |
| 156 // Before we begin serializing end tag of a element, we give the target |
| 157 // element a chance to do some work prior to add some additional data. |
| 158 WTF::String preActionBeforeSerializeEndTag(const Element*, |
| 159 SerializeDomParam* param, |
| 160 bool* needSkip); |
| 161 // After we finish serializing end tag of a element, we give the target |
| 162 // element a chance to do some post work to add some additional data. |
| 163 WTF::String postActionAfterSerializeEndTag(const Element*, |
| 164 SerializeDomParam* param); |
| 165 // Save generated html content to data buffer. |
| 166 void saveHTMLContentToBuffer(const WTF::String& content, |
| 167 SerializeDomParam* param); |
| 168 |
| 169 enum FlushOption { |
| 170 ForceFlush, |
| 171 DoNotForceFlush, |
| 172 }; |
| 173 |
| 174 // Flushes the content buffer by encoding and sending the content to the |
| 175 // WebPageSerializerClient. Content is not flushed if the buffer is not full |
| 176 // unless force is 1. |
| 177 void encodeAndFlushBuffer(WebPageSerializerClient::PageSerializationStatus s
tatus, |
| 178 SerializeDomParam* param, |
| 179 FlushOption); |
| 180 // Serialize open tag of an specified element. |
| 181 void openTagToString(Element*, |
| 182 SerializeDomParam* param); |
| 183 // Serialize end tag of an specified element. |
| 184 void endTagToString(Element*, |
| 185 SerializeDomParam* param); |
| 186 // Build content for a specified node |
| 187 void buildContentForNode(Node*, |
| 188 SerializeDomParam* param); |
| 189 }; |
| 190 |
| 191 } // namespace blink |
| 192 |
| 193 #endif |
OLD | NEW |