Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(420)

Unified Diff: common/data/text/sanitizehtml/sanitize.go

Issue 2873983003: sanitizehtml: disallow tables (Closed)
Patch Set: simplify Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | common/data/text/sanitizehtml/sanitize_test.go » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: common/data/text/sanitizehtml/sanitize.go
diff --git a/common/data/text/sanitizehtml/sanitize.go b/common/data/text/sanitizehtml/sanitize.go
index ce06f04d5d9d107e25e0426e2e60c777c6ab54b8..909484c5e02c6c9cc83bec510173bdc92c3a0843 100644
--- a/common/data/text/sanitizehtml/sanitize.go
+++ b/common/data/text/sanitizehtml/sanitize.go
@@ -16,13 +16,6 @@ import (
"golang.org/x/net/html/atom"
)
-// attrValueSanitizer sanitizes an attribute value.
-type attrValueSanitizer func(string) string
-
-func alwaysSafe(s string) string {
- return s
-}
-
func sanitizeURL(s string) string {
const sanitizedPrefix = "about:invalid#sanitized&reason="
switch u, err := url.Parse(s); {
@@ -42,23 +35,6 @@ func sanitizeURL(s string) string {
}
}
-type attrMap map[string]attrValueSanitizer
-
-var (
- anchorAttrs = attrMap{
- "alt": alwaysSafe,
- "href": sanitizeURL,
- }
- trAttrs = attrMap{
- "rowspan": alwaysSafe,
- "colspan": alwaysSafe,
- }
- tdAttrs = attrMap{
- "rowspan": alwaysSafe,
- "colspan": alwaysSafe,
- }
-)
-
type stringWriter interface {
WriteString(string) (int, error)
}
@@ -75,40 +51,13 @@ func (s *sanitizer) p(safeMarkup string) {
}
}
-// printAttrs sanitizes and prints a whitelist of attributes in el
-func (s *sanitizer) printAttrs(el *html.Node, whitelist attrMap) {
- for _, a := range el.Attr {
- key := strings.ToLower(a.Key)
- if sanitizer, ok := whitelist[key]; a.Namespace == "" && ok {
- s.p(" ")
- s.p(key)
- s.p("=\"")
- s.p(html.EscapeString(sanitizer(a.Val)))
- s.p("\"")
- }
- }
-}
-
-// printElem prints the safe element with a whitelist of attributes.
-// If allowedAttrs is nil, all attributes are omitted.
-//
-// Do not call for unsafe elements.
-func (s *sanitizer) printElem(safeElement *html.Node, allowedAttrs attrMap) {
- tag := safeElement.DataAtom.String()
- s.p("<")
- s.p(tag)
- if allowedAttrs == nil {
- // ignore attributes
- } else {
- s.printAttrs(safeElement, allowedAttrs)
- }
- s.p(">")
-
- s.visitChildren(safeElement)
-
- s.p("</")
- s.p(tag)
- s.p(">")
+// printAttr prints a space and then an HTML attribute node.
+func (s *sanitizer) printAttr(key, value string) {
+ s.p(" ")
+ s.p(key)
+ s.p("=\"")
+ s.p(html.EscapeString(value))
+ s.p("\"")
}
func (s *sanitizer) visit(n *html.Node) {
@@ -130,20 +79,36 @@ func (s *sanitizer) visit(n *html.Node) {
case atom.A:
s.p(`<a rel="noopener" target="_blank"`)
- s.printAttrs(n, anchorAttrs)
+
+ for _, a := range n.Attr {
+ if a.Namespace != "" {
+ continue
+ }
+ switch strings.ToLower(a.Key) {
+ case "href":
+ s.printAttr("href", sanitizeURL(a.Val))
+
+ case "alt":
+ s.printAttr("alt", a.Val)
+ }
+ }
+
s.p(">")
s.visitChildren(n)
s.p("</a>")
- case atom.P, atom.Ol, atom.Ul, atom.Li, atom.Table, atom.Strong, atom.Em:
+ case atom.P, atom.Ol, atom.Ul, atom.Li, atom.Strong, atom.Em:
// print without attributes
- s.printElem(n, nil)
+ tag := n.DataAtom.String()
+ s.p("<")
+ s.p(tag)
+ s.p(">")
- case atom.Tr:
- s.printElem(n, trAttrs)
+ s.visitChildren(n)
- case atom.Td:
- s.printElem(n, tdAttrs)
+ s.p("</")
+ s.p(tag)
+ s.p(">")
default:
// ignore the element, but visit children.
@@ -173,8 +138,6 @@ func (s *sanitizer) visitChildren(n *html.Node) {
// with an innocuous one.
// - alt attribute is allowed
// - ul, ol, li
-// - table
-// - tr, td. Attributes rowspan/colspan are allowed.
//
// Elements <script> and <style> are ignored entirely.
// For all other HTML nodes, Sanitize ignores the node, but visits its children.
« no previous file with comments | « no previous file | common/data/text/sanitizehtml/sanitize_test.go » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698