| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights
reserved. | |
| 3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved. | |
| 4 * | |
| 5 * Redistribution and use in source and binary forms, with or without | |
| 6 * modification, are permitted provided that the following conditions | |
| 7 * are met: | |
| 8 * 1. Redistributions of source code must retain the above copyright | |
| 9 * notice, this list of conditions and the following disclaimer. | |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | |
| 11 * notice, this list of conditions and the following disclaimer in the | |
| 12 * documentation and/or other materials provided with the distribution. | |
| 13 * | |
| 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 */ | |
| 26 | |
| 27 #include "config.h" | |
| 28 #include "core/editing/MarkupFormatter.h" | |
| 29 | |
| 30 #include "core/HTMLNames.h" | |
| 31 #include "core/XLinkNames.h" | |
| 32 #include "core/XMLNSNames.h" | |
| 33 #include "core/XMLNames.h" | |
| 34 #include "core/dom/CDATASection.h" | |
| 35 #include "core/dom/Comment.h" | |
| 36 #include "core/dom/Document.h" | |
| 37 #include "core/dom/DocumentFragment.h" | |
| 38 #include "core/dom/DocumentType.h" | |
| 39 #include "core/dom/ProcessingInstruction.h" | |
| 40 #include "core/editing/Editor.h" | |
| 41 #include "core/html/HTMLElement.h" | |
| 42 #include "core/html/HTMLTemplateElement.h" | |
| 43 #include "platform/weborigin/KURL.h" | |
| 44 #include "wtf/text/CharacterNames.h" | |
| 45 | |
| 46 namespace blink { | |
| 47 | |
| 48 using namespace HTMLNames; | |
| 49 | |
| 50 struct EntityDescription { | |
| 51 UChar entity; | |
| 52 const CString& reference; | |
| 53 EntityMask mask; | |
| 54 }; | |
| 55 | |
| 56 template <typename CharType> | |
| 57 static inline void appendCharactersReplacingEntitiesInternal(StringBuilder& resu
lt, CharType* text, unsigned length, const EntityDescription entityMaps[], unsig
ned entityMapsCount, EntityMask entityMask) | |
| 58 { | |
| 59 unsigned positionAfterLastEntity = 0; | |
| 60 for (unsigned i = 0; i < length; ++i) { | |
| 61 for (unsigned entityIndex = 0; entityIndex < entityMapsCount; ++entityIn
dex) { | |
| 62 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIn
dex].mask & entityMask) { | |
| 63 result.append(text + positionAfterLastEntity, i - positionAfterL
astEntity); | |
| 64 const CString& replacement = entityMaps[entityIndex].reference; | |
| 65 result.append(replacement.data(), replacement.length()); | |
| 66 positionAfterLastEntity = i + 1; | |
| 67 break; | |
| 68 } | |
| 69 } | |
| 70 } | |
| 71 result.append(text + positionAfterLastEntity, length - positionAfterLastEnti
ty); | |
| 72 } | |
| 73 | |
| 74 void MarkupFormatter::appendCharactersReplacingEntities(StringBuilder& result, c
onst String& source, unsigned offset, unsigned length, EntityMask entityMask) | |
| 75 { | |
| 76 DEFINE_STATIC_LOCAL(const CString, ampReference, ("&")); | |
| 77 DEFINE_STATIC_LOCAL(const CString, ltReference, ("<")); | |
| 78 DEFINE_STATIC_LOCAL(const CString, gtReference, (">")); | |
| 79 DEFINE_STATIC_LOCAL(const CString, quotReference, (""")); | |
| 80 DEFINE_STATIC_LOCAL(const CString, nbspReference, (" ")); | |
| 81 | |
| 82 static const EntityDescription entityMaps[] = { | |
| 83 { '&', ampReference, EntityAmp }, | |
| 84 { '<', ltReference, EntityLt }, | |
| 85 { '>', gtReference, EntityGt }, | |
| 86 { '"', quotReference, EntityQuot }, | |
| 87 { noBreakSpaceCharacter, nbspReference, EntityNbsp }, | |
| 88 }; | |
| 89 | |
| 90 if (!(offset + length)) | |
| 91 return; | |
| 92 | |
| 93 ASSERT(offset + length <= source.length()); | |
| 94 if (source.is8Bit()) | |
| 95 appendCharactersReplacingEntitiesInternal(result, source.characters8() +
offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask); | |
| 96 else | |
| 97 appendCharactersReplacingEntitiesInternal(result, source.characters16()
+ offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask); | |
| 98 } | |
| 99 | |
| 100 MarkupFormatter::MarkupFormatter(EAbsoluteURLs resolveUrlsMethod, SerializationT
ype serializationType) | |
| 101 : m_resolveURLsMethod(resolveUrlsMethod) | |
| 102 , m_serializationType(serializationType) | |
| 103 { | |
| 104 } | |
| 105 | |
| 106 MarkupFormatter::~MarkupFormatter() | |
| 107 { | |
| 108 } | |
| 109 | |
| 110 String MarkupFormatter::resolveURLIfNeeded(const Element& element, const String&
urlString) const | |
| 111 { | |
| 112 switch (m_resolveURLsMethod) { | |
| 113 case ResolveAllURLs: | |
| 114 return element.document().completeURL(urlString).string(); | |
| 115 | |
| 116 case ResolveNonLocalURLs: | |
| 117 if (!element.document().url().isLocalFile()) | |
| 118 return element.document().completeURL(urlString).string(); | |
| 119 break; | |
| 120 | |
| 121 case DoNotResolveURLs: | |
| 122 break; | |
| 123 } | |
| 124 return urlString; | |
| 125 } | |
| 126 | |
| 127 void MarkupFormatter::appendStartMarkup(StringBuilder& result, const Node& node,
Namespaces* namespaces) | |
| 128 { | |
| 129 switch (node.nodeType()) { | |
| 130 case Node::TEXT_NODE: | |
| 131 ASSERT_NOT_REACHED(); | |
| 132 break; | |
| 133 case Node::COMMENT_NODE: | |
| 134 appendComment(result, toComment(node).data()); | |
| 135 break; | |
| 136 case Node::DOCUMENT_NODE: | |
| 137 appendXMLDeclaration(result, toDocument(node)); | |
| 138 break; | |
| 139 case Node::DOCUMENT_FRAGMENT_NODE: | |
| 140 break; | |
| 141 case Node::DOCUMENT_TYPE_NODE: | |
| 142 appendDocumentType(result, toDocumentType(node)); | |
| 143 break; | |
| 144 case Node::PROCESSING_INSTRUCTION_NODE: | |
| 145 appendProcessingInstruction(result, toProcessingInstruction(node).target
(), toProcessingInstruction(node).data()); | |
| 146 break; | |
| 147 case Node::ELEMENT_NODE: | |
| 148 ASSERT_NOT_REACHED(); | |
| 149 break; | |
| 150 case Node::CDATA_SECTION_NODE: | |
| 151 appendCDATASection(result, toCDATASection(node).data()); | |
| 152 break; | |
| 153 case Node::ATTRIBUTE_NODE: | |
| 154 ASSERT_NOT_REACHED(); | |
| 155 break; | |
| 156 } | |
| 157 } | |
| 158 | |
| 159 static bool elementCannotHaveEndTag(const Node& node) | |
| 160 { | |
| 161 if (!node.isHTMLElement()) | |
| 162 return false; | |
| 163 | |
| 164 // FIXME: ieForbidsInsertHTML may not be the right function to call here | |
| 165 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML | |
| 166 // or createContextualFragment. It does not necessarily align with | |
| 167 // which elements should be serialized w/o end tags. | |
| 168 return toHTMLElement(node).ieForbidsInsertHTML(); | |
| 169 } | |
| 170 | |
| 171 void MarkupFormatter::appendEndMarkup(StringBuilder& result, const Element& elem
ent) | |
| 172 { | |
| 173 if (shouldSelfClose(element) || (!element.hasChildren() && elementCannotHave
EndTag(element))) | |
| 174 return; | |
| 175 | |
| 176 result.appendLiteral("</"); | |
| 177 result.append(element.tagQName().toString()); | |
| 178 result.append('>'); | |
| 179 } | |
| 180 | |
| 181 void MarkupFormatter::appendAttributeValue(StringBuilder& result, const String&
attribute, bool documentIsHTML) | |
| 182 { | |
| 183 appendCharactersReplacingEntities(result, attribute, 0, attribute.length(), | |
| 184 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeV
alue); | |
| 185 } | |
| 186 | |
| 187 void MarkupFormatter::appendQuotedURLAttributeValue(StringBuilder& result, const
Element& element, const Attribute& attribute) | |
| 188 { | |
| 189 ASSERT(element.isURLAttribute(attribute)); | |
| 190 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value
()); | |
| 191 UChar quoteChar = '"'; | |
| 192 String strippedURLString = resolvedURLString.stripWhiteSpace(); | |
| 193 if (protocolIsJavaScript(strippedURLString)) { | |
| 194 // minimal escaping for javascript urls | |
| 195 if (strippedURLString.contains('&')) | |
| 196 strippedURLString.replaceWithLiteral('&', "&"); | |
| 197 | |
| 198 if (strippedURLString.contains('"')) { | |
| 199 if (strippedURLString.contains('\'')) | |
| 200 strippedURLString.replaceWithLiteral('"', """); | |
| 201 else | |
| 202 quoteChar = '\''; | |
| 203 } | |
| 204 result.append(quoteChar); | |
| 205 result.append(strippedURLString); | |
| 206 result.append(quoteChar); | |
| 207 return; | |
| 208 } | |
| 209 | |
| 210 // FIXME: This does not fully match other browsers. Firefox percent-escapes
non-ASCII characters for innerHTML. | |
| 211 result.append(quoteChar); | |
| 212 appendAttributeValue(result, resolvedURLString, false); | |
| 213 result.append(quoteChar); | |
| 214 } | |
| 215 | |
| 216 void MarkupFormatter::appendNamespace(StringBuilder& result, const AtomicString&
prefix, const AtomicString& namespaceURI, Namespaces& namespaces) | |
| 217 { | |
| 218 if (namespaceURI.isEmpty()) | |
| 219 return; | |
| 220 | |
| 221 const AtomicString& lookupKey = (!prefix) ? emptyAtom : prefix; | |
| 222 AtomicString foundURI = namespaces.get(lookupKey); | |
| 223 if (foundURI != namespaceURI) { | |
| 224 namespaces.set(lookupKey, namespaceURI); | |
| 225 result.append(' '); | |
| 226 result.append(xmlnsAtom.string()); | |
| 227 if (!prefix.isEmpty()) { | |
| 228 result.append(':'); | |
| 229 result.append(prefix); | |
| 230 } | |
| 231 | |
| 232 result.appendLiteral("=\""); | |
| 233 appendAttributeValue(result, namespaceURI, false); | |
| 234 result.append('"'); | |
| 235 } | |
| 236 } | |
| 237 | |
| 238 void MarkupFormatter::appendText(StringBuilder& result, Text& text) | |
| 239 { | |
| 240 const String& str = text.data(); | |
| 241 appendCharactersReplacingEntities(result, str, 0, str.length(), entityMaskFo
rText(text)); | |
| 242 } | |
| 243 | |
| 244 void MarkupFormatter::appendComment(StringBuilder& result, const String& comment
) | |
| 245 { | |
| 246 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly ot
her callers) should raise an exception if it includes "-->". | |
| 247 result.appendLiteral("<!--"); | |
| 248 result.append(comment); | |
| 249 result.appendLiteral("-->"); | |
| 250 } | |
| 251 | |
| 252 void MarkupFormatter::appendXMLDeclaration(StringBuilder& result, const Document
& document) | |
| 253 { | |
| 254 if (!document.hasXMLDeclaration()) | |
| 255 return; | |
| 256 | |
| 257 result.appendLiteral("<?xml version=\""); | |
| 258 result.append(document.xmlVersion()); | |
| 259 const String& encoding = document.xmlEncoding(); | |
| 260 if (!encoding.isEmpty()) { | |
| 261 result.appendLiteral("\" encoding=\""); | |
| 262 result.append(encoding); | |
| 263 } | |
| 264 if (document.xmlStandaloneStatus() != Document::StandaloneUnspecified) { | |
| 265 result.appendLiteral("\" standalone=\""); | |
| 266 if (document.xmlStandalone()) | |
| 267 result.appendLiteral("yes"); | |
| 268 else | |
| 269 result.appendLiteral("no"); | |
| 270 } | |
| 271 | |
| 272 result.appendLiteral("\"?>"); | |
| 273 } | |
| 274 | |
| 275 void MarkupFormatter::appendDocumentType(StringBuilder& result, const DocumentTy
pe& n) | |
| 276 { | |
| 277 if (n.name().isEmpty()) | |
| 278 return; | |
| 279 | |
| 280 result.appendLiteral("<!DOCTYPE "); | |
| 281 result.append(n.name()); | |
| 282 if (!n.publicId().isEmpty()) { | |
| 283 result.appendLiteral(" PUBLIC \""); | |
| 284 result.append(n.publicId()); | |
| 285 result.append('"'); | |
| 286 if (!n.systemId().isEmpty()) { | |
| 287 result.appendLiteral(" \""); | |
| 288 result.append(n.systemId()); | |
| 289 result.append('"'); | |
| 290 } | |
| 291 } else if (!n.systemId().isEmpty()) { | |
| 292 result.appendLiteral(" SYSTEM \""); | |
| 293 result.append(n.systemId()); | |
| 294 result.append('"'); | |
| 295 } | |
| 296 result.append('>'); | |
| 297 } | |
| 298 | |
| 299 void MarkupFormatter::appendProcessingInstruction(StringBuilder& result, const S
tring& target, const String& data) | |
| 300 { | |
| 301 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other call
ers) this should raise an exception if it includes "?>". | |
| 302 result.appendLiteral("<?"); | |
| 303 result.append(target); | |
| 304 result.append(' '); | |
| 305 result.append(data); | |
| 306 result.appendLiteral("?>"); | |
| 307 } | |
| 308 | |
| 309 void MarkupFormatter::appendOpenTag(StringBuilder& result, const Element& elemen
t, Namespaces* namespaces) | |
| 310 { | |
| 311 result.append('<'); | |
| 312 result.append(element.tagQName().toString()); | |
| 313 if (!serializeAsHTMLDocument(element) && namespaces && shouldAddNamespaceEle
ment(element, *namespaces)) | |
| 314 appendNamespace(result, element.prefix(), element.namespaceURI(), *names
paces); | |
| 315 } | |
| 316 | |
| 317 void MarkupFormatter::appendCloseTag(StringBuilder& result, const Element& eleme
nt) | |
| 318 { | |
| 319 if (shouldSelfClose(element)) { | |
| 320 if (element.isHTMLElement()) | |
| 321 result.append(' '); // XHTML 1.0 <-> HTML compatibility. | |
| 322 result.append('/'); | |
| 323 } | |
| 324 result.append('>'); | |
| 325 } | |
| 326 | |
| 327 static inline bool attributeIsInSerializedNamespace(const Attribute& attribute) | |
| 328 { | |
| 329 return attribute.namespaceURI() == XMLNames::xmlNamespaceURI | |
| 330 || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI | |
| 331 || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI; | |
| 332 } | |
| 333 | |
| 334 void MarkupFormatter::appendAttribute(StringBuilder& result, const Element& elem
ent, const Attribute& attribute, Namespaces* namespaces) | |
| 335 { | |
| 336 bool documentIsHTML = serializeAsHTMLDocument(element); | |
| 337 | |
| 338 QualifiedName prefixedName = attribute.name(); | |
| 339 if (documentIsHTML && !attributeIsInSerializedNamespace(attribute)) { | |
| 340 result.append(' '); | |
| 341 result.append(attribute.name().localName()); | |
| 342 } else { | |
| 343 if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) { | |
| 344 if (!attribute.prefix() && attribute.localName() != xmlnsAtom) | |
| 345 prefixedName.setPrefix(xmlnsAtom); | |
| 346 if (namespaces) { // Account for the namespace attribute we're about
to append. | |
| 347 const AtomicString& lookupKey = (!attribute.prefix()) ? emptyAto
m : attribute.localName(); | |
| 348 namespaces->set(lookupKey, attribute.value()); | |
| 349 } | |
| 350 } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) { | |
| 351 if (!attribute.prefix()) | |
| 352 prefixedName.setPrefix(xmlAtom); | |
| 353 } else { | |
| 354 if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) { | |
| 355 if (!attribute.prefix()) | |
| 356 prefixedName.setPrefix(xlinkAtom); | |
| 357 } | |
| 358 | |
| 359 if (namespaces && shouldAddNamespaceAttribute(attribute, element)) { | |
| 360 if (!prefixedName.prefix()) { | |
| 361 // This behavior is in process of being standardized. See cr
bug.com/248044 and https://www.w3.org/Bugs/Public/show_bug.cgi?id=24208 | |
| 362 String prefixPrefix("ns", 2); | |
| 363 for (unsigned i = attribute.namespaceURI().impl()->existingH
ash(); ; ++i) { | |
| 364 AtomicString newPrefix(String(prefixPrefix + String::num
ber(i))); | |
| 365 AtomicString foundURI = namespaces->get(newPrefix); | |
| 366 if (foundURI == attribute.namespaceURI() || foundURI ==
nullAtom) { | |
| 367 // We already generated a prefix for this namespace. | |
| 368 prefixedName.setPrefix(newPrefix); | |
| 369 break; | |
| 370 } | |
| 371 } | |
| 372 } | |
| 373 ASSERT(prefixedName.prefix()); | |
| 374 appendNamespace(result, prefixedName.prefix(), attribute.namespa
ceURI(), *namespaces); | |
| 375 } | |
| 376 } | |
| 377 result.append(' '); | |
| 378 result.append(prefixedName.toString()); | |
| 379 } | |
| 380 | |
| 381 result.append('='); | |
| 382 | |
| 383 if (element.isURLAttribute(attribute)) { | |
| 384 appendQuotedURLAttributeValue(result, element, attribute); | |
| 385 } else { | |
| 386 result.append('"'); | |
| 387 appendAttributeValue(result, attribute.value(), documentIsHTML); | |
| 388 result.append('"'); | |
| 389 } | |
| 390 } | |
| 391 | |
| 392 void MarkupFormatter::appendCDATASection(StringBuilder& result, const String& se
ction) | |
| 393 { | |
| 394 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly othe
r callers) should raise an exception if it includes "]]>". | |
| 395 result.appendLiteral("<![CDATA["); | |
| 396 result.append(section); | |
| 397 result.appendLiteral("]]>"); | |
| 398 } | |
| 399 | |
| 400 bool MarkupFormatter::shouldAddNamespaceElement(const Element& element, Namespac
es& namespaces) const | |
| 401 { | |
| 402 // Don't add namespace attribute if it is already defined for this elem. | |
| 403 const AtomicString& prefix = element.prefix(); | |
| 404 if (prefix.isEmpty()) { | |
| 405 if (element.hasAttribute(xmlnsAtom)) { | |
| 406 namespaces.set(emptyAtom, element.namespaceURI()); | |
| 407 return false; | |
| 408 } | |
| 409 return true; | |
| 410 } | |
| 411 | |
| 412 return !element.hasAttribute(WTF::xmlnsWithColon + prefix); | |
| 413 } | |
| 414 | |
| 415 bool MarkupFormatter::shouldAddNamespaceAttribute(const Attribute& attribute, co
nst Element& element) const | |
| 416 { | |
| 417 // xmlns and xmlns:prefix attributes should be handled by another branch in
appendAttribute. | |
| 418 ASSERT(attribute.namespaceURI() != XMLNSNames::xmlnsNamespaceURI); | |
| 419 | |
| 420 // Attributes are in the null namespace by default. | |
| 421 if (!attribute.namespaceURI()) | |
| 422 return false; | |
| 423 | |
| 424 // Attributes without a prefix will need one generated for them, and an xmln
s attribute for that prefix. | |
| 425 if (!attribute.prefix()) | |
| 426 return true; | |
| 427 | |
| 428 return !element.hasAttribute(WTF::xmlnsWithColon + attribute.prefix()); | |
| 429 } | |
| 430 | |
| 431 EntityMask MarkupFormatter::entityMaskForText(const Text& text) const | |
| 432 { | |
| 433 if (!serializeAsHTMLDocument(text)) | |
| 434 return EntityMaskInPCDATA; | |
| 435 | |
| 436 // TODO(hajimehoshi): We need to switch EditingStrategy. | |
| 437 const QualifiedName* parentName = nullptr; | |
| 438 if (text.parentElement()) | |
| 439 parentName = &(text.parentElement())->tagQName(); | |
| 440 | |
| 441 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *p
arentName == xmpTag)) | |
| 442 return EntityMaskInCDATA; | |
| 443 return EntityMaskInHTMLPCDATA; | |
| 444 } | |
| 445 | |
| 446 // Rules of self-closure | |
| 447 // 1. No elements in HTML documents use the self-closing syntax. | |
| 448 // 2. Elements w/ children never self-close because they use a separate end tag. | |
| 449 // 3. HTML elements which do not have a "forbidden" end tag will close with a se
parate end tag. | |
| 450 // 4. Other elements self-close. | |
| 451 bool MarkupFormatter::shouldSelfClose(const Element& element) const | |
| 452 { | |
| 453 if (serializeAsHTMLDocument(element)) | |
| 454 return false; | |
| 455 if (element.hasChildren()) | |
| 456 return false; | |
| 457 if (element.isHTMLElement() && !elementCannotHaveEndTag(element)) | |
| 458 return false; | |
| 459 return true; | |
| 460 } | |
| 461 | |
| 462 bool MarkupFormatter::serializeAsHTMLDocument(const Node& node) const | |
| 463 { | |
| 464 if (m_serializationType == SerializationType::ForcedXML) | |
| 465 return false; | |
| 466 return node.document().isHTMLDocument(); | |
| 467 } | |
| 468 | |
| 469 } | |
| OLD | NEW |