OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights
reserved. | |
3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved. | |
4 * | |
5 * Redistribution and use in source and binary forms, with or without | |
6 * modification, are permitted provided that the following conditions | |
7 * are met: | |
8 * 1. Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * 2. Redistributions in binary form must reproduce the above copyright | |
11 * notice, this list of conditions and the following disclaimer in the | |
12 * documentation and/or other materials provided with the distribution. | |
13 * | |
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
25 */ | |
26 | |
27 #include "config.h" | |
28 #include "core/editing/MarkupFormatter.h" | |
29 | |
30 #include "core/HTMLNames.h" | |
31 #include "core/XLinkNames.h" | |
32 #include "core/XMLNSNames.h" | |
33 #include "core/XMLNames.h" | |
34 #include "core/dom/CDATASection.h" | |
35 #include "core/dom/Comment.h" | |
36 #include "core/dom/Document.h" | |
37 #include "core/dom/DocumentFragment.h" | |
38 #include "core/dom/DocumentType.h" | |
39 #include "core/dom/ProcessingInstruction.h" | |
40 #include "core/editing/Editor.h" | |
41 #include "core/html/HTMLElement.h" | |
42 #include "core/html/HTMLTemplateElement.h" | |
43 #include "platform/weborigin/KURL.h" | |
44 #include "wtf/text/CharacterNames.h" | |
45 | |
46 namespace blink { | |
47 | |
48 using namespace HTMLNames; | |
49 | |
50 struct EntityDescription { | |
51 UChar entity; | |
52 const CString& reference; | |
53 EntityMask mask; | |
54 }; | |
55 | |
56 template <typename CharType> | |
57 static inline void appendCharactersReplacingEntitiesInternal(StringBuilder& resu
lt, CharType* text, unsigned length, const EntityDescription entityMaps[], unsig
ned entityMapsCount, EntityMask entityMask) | |
58 { | |
59 unsigned positionAfterLastEntity = 0; | |
60 for (unsigned i = 0; i < length; ++i) { | |
61 for (unsigned entityIndex = 0; entityIndex < entityMapsCount; ++entityIn
dex) { | |
62 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIn
dex].mask & entityMask) { | |
63 result.append(text + positionAfterLastEntity, i - positionAfterL
astEntity); | |
64 const CString& replacement = entityMaps[entityIndex].reference; | |
65 result.append(replacement.data(), replacement.length()); | |
66 positionAfterLastEntity = i + 1; | |
67 break; | |
68 } | |
69 } | |
70 } | |
71 result.append(text + positionAfterLastEntity, length - positionAfterLastEnti
ty); | |
72 } | |
73 | |
74 void MarkupFormatter::appendCharactersReplacingEntities(StringBuilder& result, c
onst String& source, unsigned offset, unsigned length, EntityMask entityMask) | |
75 { | |
76 DEFINE_STATIC_LOCAL(const CString, ampReference, ("&")); | |
77 DEFINE_STATIC_LOCAL(const CString, ltReference, ("<")); | |
78 DEFINE_STATIC_LOCAL(const CString, gtReference, (">")); | |
79 DEFINE_STATIC_LOCAL(const CString, quotReference, (""")); | |
80 DEFINE_STATIC_LOCAL(const CString, nbspReference, (" ")); | |
81 | |
82 static const EntityDescription entityMaps[] = { | |
83 { '&', ampReference, EntityAmp }, | |
84 { '<', ltReference, EntityLt }, | |
85 { '>', gtReference, EntityGt }, | |
86 { '"', quotReference, EntityQuot }, | |
87 { noBreakSpaceCharacter, nbspReference, EntityNbsp }, | |
88 }; | |
89 | |
90 if (!(offset + length)) | |
91 return; | |
92 | |
93 ASSERT(offset + length <= source.length()); | |
94 if (source.is8Bit()) | |
95 appendCharactersReplacingEntitiesInternal(result, source.characters8() +
offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask); | |
96 else | |
97 appendCharactersReplacingEntitiesInternal(result, source.characters16()
+ offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask); | |
98 } | |
99 | |
100 MarkupFormatter::MarkupFormatter(EAbsoluteURLs resolveUrlsMethod, SerializationT
ype serializationType) | |
101 : m_resolveURLsMethod(resolveUrlsMethod) | |
102 , m_serializationType(serializationType) | |
103 { | |
104 } | |
105 | |
106 MarkupFormatter::~MarkupFormatter() | |
107 { | |
108 } | |
109 | |
110 String MarkupFormatter::resolveURLIfNeeded(const Element& element, const String&
urlString) const | |
111 { | |
112 switch (m_resolveURLsMethod) { | |
113 case ResolveAllURLs: | |
114 return element.document().completeURL(urlString).string(); | |
115 | |
116 case ResolveNonLocalURLs: | |
117 if (!element.document().url().isLocalFile()) | |
118 return element.document().completeURL(urlString).string(); | |
119 break; | |
120 | |
121 case DoNotResolveURLs: | |
122 break; | |
123 } | |
124 return urlString; | |
125 } | |
126 | |
127 void MarkupFormatter::appendStartMarkup(StringBuilder& result, const Node& node,
Namespaces* namespaces) | |
128 { | |
129 switch (node.nodeType()) { | |
130 case Node::TEXT_NODE: | |
131 ASSERT_NOT_REACHED(); | |
132 break; | |
133 case Node::COMMENT_NODE: | |
134 appendComment(result, toComment(node).data()); | |
135 break; | |
136 case Node::DOCUMENT_NODE: | |
137 appendXMLDeclaration(result, toDocument(node)); | |
138 break; | |
139 case Node::DOCUMENT_FRAGMENT_NODE: | |
140 break; | |
141 case Node::DOCUMENT_TYPE_NODE: | |
142 appendDocumentType(result, toDocumentType(node)); | |
143 break; | |
144 case Node::PROCESSING_INSTRUCTION_NODE: | |
145 appendProcessingInstruction(result, toProcessingInstruction(node).target
(), toProcessingInstruction(node).data()); | |
146 break; | |
147 case Node::ELEMENT_NODE: | |
148 ASSERT_NOT_REACHED(); | |
149 break; | |
150 case Node::CDATA_SECTION_NODE: | |
151 appendCDATASection(result, toCDATASection(node).data()); | |
152 break; | |
153 case Node::ATTRIBUTE_NODE: | |
154 ASSERT_NOT_REACHED(); | |
155 break; | |
156 } | |
157 } | |
158 | |
159 static bool elementCannotHaveEndTag(const Node& node) | |
160 { | |
161 if (!node.isHTMLElement()) | |
162 return false; | |
163 | |
164 // FIXME: ieForbidsInsertHTML may not be the right function to call here | |
165 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML | |
166 // or createContextualFragment. It does not necessarily align with | |
167 // which elements should be serialized w/o end tags. | |
168 return toHTMLElement(node).ieForbidsInsertHTML(); | |
169 } | |
170 | |
171 void MarkupFormatter::appendEndMarkup(StringBuilder& result, const Element& elem
ent) | |
172 { | |
173 if (shouldSelfClose(element) || (!element.hasChildren() && elementCannotHave
EndTag(element))) | |
174 return; | |
175 | |
176 result.appendLiteral("</"); | |
177 result.append(element.tagQName().toString()); | |
178 result.append('>'); | |
179 } | |
180 | |
181 void MarkupFormatter::appendAttributeValue(StringBuilder& result, const String&
attribute, bool documentIsHTML) | |
182 { | |
183 appendCharactersReplacingEntities(result, attribute, 0, attribute.length(), | |
184 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeV
alue); | |
185 } | |
186 | |
187 void MarkupFormatter::appendQuotedURLAttributeValue(StringBuilder& result, const
Element& element, const Attribute& attribute) | |
188 { | |
189 ASSERT(element.isURLAttribute(attribute)); | |
190 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value
()); | |
191 UChar quoteChar = '"'; | |
192 String strippedURLString = resolvedURLString.stripWhiteSpace(); | |
193 if (protocolIsJavaScript(strippedURLString)) { | |
194 // minimal escaping for javascript urls | |
195 if (strippedURLString.contains('&')) | |
196 strippedURLString.replaceWithLiteral('&', "&"); | |
197 | |
198 if (strippedURLString.contains('"')) { | |
199 if (strippedURLString.contains('\'')) | |
200 strippedURLString.replaceWithLiteral('"', """); | |
201 else | |
202 quoteChar = '\''; | |
203 } | |
204 result.append(quoteChar); | |
205 result.append(strippedURLString); | |
206 result.append(quoteChar); | |
207 return; | |
208 } | |
209 | |
210 // FIXME: This does not fully match other browsers. Firefox percent-escapes
non-ASCII characters for innerHTML. | |
211 result.append(quoteChar); | |
212 appendAttributeValue(result, resolvedURLString, false); | |
213 result.append(quoteChar); | |
214 } | |
215 | |
216 void MarkupFormatter::appendNamespace(StringBuilder& result, const AtomicString&
prefix, const AtomicString& namespaceURI, Namespaces& namespaces) | |
217 { | |
218 if (namespaceURI.isEmpty()) | |
219 return; | |
220 | |
221 const AtomicString& lookupKey = (!prefix) ? emptyAtom : prefix; | |
222 AtomicString foundURI = namespaces.get(lookupKey); | |
223 if (foundURI != namespaceURI) { | |
224 namespaces.set(lookupKey, namespaceURI); | |
225 result.append(' '); | |
226 result.append(xmlnsAtom.string()); | |
227 if (!prefix.isEmpty()) { | |
228 result.append(':'); | |
229 result.append(prefix); | |
230 } | |
231 | |
232 result.appendLiteral("=\""); | |
233 appendAttributeValue(result, namespaceURI, false); | |
234 result.append('"'); | |
235 } | |
236 } | |
237 | |
238 void MarkupFormatter::appendText(StringBuilder& result, Text& text) | |
239 { | |
240 const String& str = text.data(); | |
241 appendCharactersReplacingEntities(result, str, 0, str.length(), entityMaskFo
rText(text)); | |
242 } | |
243 | |
244 void MarkupFormatter::appendComment(StringBuilder& result, const String& comment
) | |
245 { | |
246 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly ot
her callers) should raise an exception if it includes "-->". | |
247 result.appendLiteral("<!--"); | |
248 result.append(comment); | |
249 result.appendLiteral("-->"); | |
250 } | |
251 | |
252 void MarkupFormatter::appendXMLDeclaration(StringBuilder& result, const Document
& document) | |
253 { | |
254 if (!document.hasXMLDeclaration()) | |
255 return; | |
256 | |
257 result.appendLiteral("<?xml version=\""); | |
258 result.append(document.xmlVersion()); | |
259 const String& encoding = document.xmlEncoding(); | |
260 if (!encoding.isEmpty()) { | |
261 result.appendLiteral("\" encoding=\""); | |
262 result.append(encoding); | |
263 } | |
264 if (document.xmlStandaloneStatus() != Document::StandaloneUnspecified) { | |
265 result.appendLiteral("\" standalone=\""); | |
266 if (document.xmlStandalone()) | |
267 result.appendLiteral("yes"); | |
268 else | |
269 result.appendLiteral("no"); | |
270 } | |
271 | |
272 result.appendLiteral("\"?>"); | |
273 } | |
274 | |
275 void MarkupFormatter::appendDocumentType(StringBuilder& result, const DocumentTy
pe& n) | |
276 { | |
277 if (n.name().isEmpty()) | |
278 return; | |
279 | |
280 result.appendLiteral("<!DOCTYPE "); | |
281 result.append(n.name()); | |
282 if (!n.publicId().isEmpty()) { | |
283 result.appendLiteral(" PUBLIC \""); | |
284 result.append(n.publicId()); | |
285 result.append('"'); | |
286 if (!n.systemId().isEmpty()) { | |
287 result.appendLiteral(" \""); | |
288 result.append(n.systemId()); | |
289 result.append('"'); | |
290 } | |
291 } else if (!n.systemId().isEmpty()) { | |
292 result.appendLiteral(" SYSTEM \""); | |
293 result.append(n.systemId()); | |
294 result.append('"'); | |
295 } | |
296 result.append('>'); | |
297 } | |
298 | |
299 void MarkupFormatter::appendProcessingInstruction(StringBuilder& result, const S
tring& target, const String& data) | |
300 { | |
301 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other call
ers) this should raise an exception if it includes "?>". | |
302 result.appendLiteral("<?"); | |
303 result.append(target); | |
304 result.append(' '); | |
305 result.append(data); | |
306 result.appendLiteral("?>"); | |
307 } | |
308 | |
309 void MarkupFormatter::appendOpenTag(StringBuilder& result, const Element& elemen
t, Namespaces* namespaces) | |
310 { | |
311 result.append('<'); | |
312 result.append(element.tagQName().toString()); | |
313 if (!serializeAsHTMLDocument(element) && namespaces && shouldAddNamespaceEle
ment(element, *namespaces)) | |
314 appendNamespace(result, element.prefix(), element.namespaceURI(), *names
paces); | |
315 } | |
316 | |
317 void MarkupFormatter::appendCloseTag(StringBuilder& result, const Element& eleme
nt) | |
318 { | |
319 if (shouldSelfClose(element)) { | |
320 if (element.isHTMLElement()) | |
321 result.append(' '); // XHTML 1.0 <-> HTML compatibility. | |
322 result.append('/'); | |
323 } | |
324 result.append('>'); | |
325 } | |
326 | |
327 static inline bool attributeIsInSerializedNamespace(const Attribute& attribute) | |
328 { | |
329 return attribute.namespaceURI() == XMLNames::xmlNamespaceURI | |
330 || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI | |
331 || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI; | |
332 } | |
333 | |
334 void MarkupFormatter::appendAttribute(StringBuilder& result, const Element& elem
ent, const Attribute& attribute, Namespaces* namespaces) | |
335 { | |
336 bool documentIsHTML = serializeAsHTMLDocument(element); | |
337 | |
338 QualifiedName prefixedName = attribute.name(); | |
339 if (documentIsHTML && !attributeIsInSerializedNamespace(attribute)) { | |
340 result.append(' '); | |
341 result.append(attribute.name().localName()); | |
342 } else { | |
343 if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) { | |
344 if (!attribute.prefix() && attribute.localName() != xmlnsAtom) | |
345 prefixedName.setPrefix(xmlnsAtom); | |
346 if (namespaces) { // Account for the namespace attribute we're about
to append. | |
347 const AtomicString& lookupKey = (!attribute.prefix()) ? emptyAto
m : attribute.localName(); | |
348 namespaces->set(lookupKey, attribute.value()); | |
349 } | |
350 } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) { | |
351 if (!attribute.prefix()) | |
352 prefixedName.setPrefix(xmlAtom); | |
353 } else { | |
354 if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) { | |
355 if (!attribute.prefix()) | |
356 prefixedName.setPrefix(xlinkAtom); | |
357 } | |
358 | |
359 if (namespaces && shouldAddNamespaceAttribute(attribute, element)) { | |
360 if (!prefixedName.prefix()) { | |
361 // This behavior is in process of being standardized. See cr
bug.com/248044 and https://www.w3.org/Bugs/Public/show_bug.cgi?id=24208 | |
362 String prefixPrefix("ns", 2); | |
363 for (unsigned i = attribute.namespaceURI().impl()->existingH
ash(); ; ++i) { | |
364 AtomicString newPrefix(String(prefixPrefix + String::num
ber(i))); | |
365 AtomicString foundURI = namespaces->get(newPrefix); | |
366 if (foundURI == attribute.namespaceURI() || foundURI ==
nullAtom) { | |
367 // We already generated a prefix for this namespace. | |
368 prefixedName.setPrefix(newPrefix); | |
369 break; | |
370 } | |
371 } | |
372 } | |
373 ASSERT(prefixedName.prefix()); | |
374 appendNamespace(result, prefixedName.prefix(), attribute.namespa
ceURI(), *namespaces); | |
375 } | |
376 } | |
377 result.append(' '); | |
378 result.append(prefixedName.toString()); | |
379 } | |
380 | |
381 result.append('='); | |
382 | |
383 if (element.isURLAttribute(attribute)) { | |
384 appendQuotedURLAttributeValue(result, element, attribute); | |
385 } else { | |
386 result.append('"'); | |
387 appendAttributeValue(result, attribute.value(), documentIsHTML); | |
388 result.append('"'); | |
389 } | |
390 } | |
391 | |
392 void MarkupFormatter::appendCDATASection(StringBuilder& result, const String& se
ction) | |
393 { | |
394 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly othe
r callers) should raise an exception if it includes "]]>". | |
395 result.appendLiteral("<![CDATA["); | |
396 result.append(section); | |
397 result.appendLiteral("]]>"); | |
398 } | |
399 | |
400 bool MarkupFormatter::shouldAddNamespaceElement(const Element& element, Namespac
es& namespaces) const | |
401 { | |
402 // Don't add namespace attribute if it is already defined for this elem. | |
403 const AtomicString& prefix = element.prefix(); | |
404 if (prefix.isEmpty()) { | |
405 if (element.hasAttribute(xmlnsAtom)) { | |
406 namespaces.set(emptyAtom, element.namespaceURI()); | |
407 return false; | |
408 } | |
409 return true; | |
410 } | |
411 | |
412 return !element.hasAttribute(WTF::xmlnsWithColon + prefix); | |
413 } | |
414 | |
415 bool MarkupFormatter::shouldAddNamespaceAttribute(const Attribute& attribute, co
nst Element& element) const | |
416 { | |
417 // xmlns and xmlns:prefix attributes should be handled by another branch in
appendAttribute. | |
418 ASSERT(attribute.namespaceURI() != XMLNSNames::xmlnsNamespaceURI); | |
419 | |
420 // Attributes are in the null namespace by default. | |
421 if (!attribute.namespaceURI()) | |
422 return false; | |
423 | |
424 // Attributes without a prefix will need one generated for them, and an xmln
s attribute for that prefix. | |
425 if (!attribute.prefix()) | |
426 return true; | |
427 | |
428 return !element.hasAttribute(WTF::xmlnsWithColon + attribute.prefix()); | |
429 } | |
430 | |
431 EntityMask MarkupFormatter::entityMaskForText(const Text& text) const | |
432 { | |
433 if (!serializeAsHTMLDocument(text)) | |
434 return EntityMaskInPCDATA; | |
435 | |
436 // TODO(hajimehoshi): We need to switch EditingStrategy. | |
437 const QualifiedName* parentName = nullptr; | |
438 if (text.parentElement()) | |
439 parentName = &(text.parentElement())->tagQName(); | |
440 | |
441 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *p
arentName == xmpTag)) | |
442 return EntityMaskInCDATA; | |
443 return EntityMaskInHTMLPCDATA; | |
444 } | |
445 | |
446 // Rules of self-closure | |
447 // 1. No elements in HTML documents use the self-closing syntax. | |
448 // 2. Elements w/ children never self-close because they use a separate end tag. | |
449 // 3. HTML elements which do not have a "forbidden" end tag will close with a se
parate end tag. | |
450 // 4. Other elements self-close. | |
451 bool MarkupFormatter::shouldSelfClose(const Element& element) const | |
452 { | |
453 if (serializeAsHTMLDocument(element)) | |
454 return false; | |
455 if (element.hasChildren()) | |
456 return false; | |
457 if (element.isHTMLElement() && !elementCannotHaveEndTag(element)) | |
458 return false; | |
459 return true; | |
460 } | |
461 | |
462 bool MarkupFormatter::serializeAsHTMLDocument(const Node& node) const | |
463 { | |
464 if (m_serializationType == SerializationType::ForcedXML) | |
465 return false; | |
466 return node.document().isHTMLDocument(); | |
467 } | |
468 | |
469 } | |
OLD | NEW |