| Index: common/data/text/sanitizehtml/sanitize.go
|
| diff --git a/common/data/text/sanitizehtml/sanitize.go b/common/data/text/sanitizehtml/sanitize.go
|
| index ce06f04d5d9d107e25e0426e2e60c777c6ab54b8..909484c5e02c6c9cc83bec510173bdc92c3a0843 100644
|
| --- a/common/data/text/sanitizehtml/sanitize.go
|
| +++ b/common/data/text/sanitizehtml/sanitize.go
|
| @@ -16,13 +16,6 @@ import (
|
| "golang.org/x/net/html/atom"
|
| )
|
|
|
| -// attrValueSanitizer sanitizes an attribute value.
|
| -type attrValueSanitizer func(string) string
|
| -
|
| -func alwaysSafe(s string) string {
|
| - return s
|
| -}
|
| -
|
| func sanitizeURL(s string) string {
|
| const sanitizedPrefix = "about:invalid#sanitized&reason="
|
| switch u, err := url.Parse(s); {
|
| @@ -42,23 +35,6 @@ func sanitizeURL(s string) string {
|
| }
|
| }
|
|
|
| -type attrMap map[string]attrValueSanitizer
|
| -
|
| -var (
|
| - anchorAttrs = attrMap{
|
| - "alt": alwaysSafe,
|
| - "href": sanitizeURL,
|
| - }
|
| - trAttrs = attrMap{
|
| - "rowspan": alwaysSafe,
|
| - "colspan": alwaysSafe,
|
| - }
|
| - tdAttrs = attrMap{
|
| - "rowspan": alwaysSafe,
|
| - "colspan": alwaysSafe,
|
| - }
|
| -)
|
| -
|
| type stringWriter interface {
|
| WriteString(string) (int, error)
|
| }
|
| @@ -75,40 +51,13 @@ func (s *sanitizer) p(safeMarkup string) {
|
| }
|
| }
|
|
|
| -// printAttrs sanitizes and prints a whitelist of attributes in el
|
| -func (s *sanitizer) printAttrs(el *html.Node, whitelist attrMap) {
|
| - for _, a := range el.Attr {
|
| - key := strings.ToLower(a.Key)
|
| - if sanitizer, ok := whitelist[key]; a.Namespace == "" && ok {
|
| - s.p(" ")
|
| - s.p(key)
|
| - s.p("=\"")
|
| - s.p(html.EscapeString(sanitizer(a.Val)))
|
| - s.p("\"")
|
| - }
|
| - }
|
| -}
|
| -
|
| -// printElem prints the safe element with a whitelist of attributes.
|
| -// If allowedAttrs is nil, all attributes are omitted.
|
| -//
|
| -// Do not call for unsafe elements.
|
| -func (s *sanitizer) printElem(safeElement *html.Node, allowedAttrs attrMap) {
|
| - tag := safeElement.DataAtom.String()
|
| - s.p("<")
|
| - s.p(tag)
|
| - if allowedAttrs == nil {
|
| - // ignore attributes
|
| - } else {
|
| - s.printAttrs(safeElement, allowedAttrs)
|
| - }
|
| - s.p(">")
|
| -
|
| - s.visitChildren(safeElement)
|
| -
|
| - s.p("</")
|
| - s.p(tag)
|
| - s.p(">")
|
| +// printAttr prints a space and then an HTML attribute node.
|
| +func (s *sanitizer) printAttr(key, value string) {
|
| + s.p(" ")
|
| + s.p(key)
|
| + s.p("=\"")
|
| + s.p(html.EscapeString(value))
|
| + s.p("\"")
|
| }
|
|
|
| func (s *sanitizer) visit(n *html.Node) {
|
| @@ -130,20 +79,36 @@ func (s *sanitizer) visit(n *html.Node) {
|
|
|
| case atom.A:
|
| s.p(`<a rel="noopener" target="_blank"`)
|
| - s.printAttrs(n, anchorAttrs)
|
| +
|
| + for _, a := range n.Attr {
|
| + if a.Namespace != "" {
|
| + continue
|
| + }
|
| + switch strings.ToLower(a.Key) {
|
| + case "href":
|
| + s.printAttr("href", sanitizeURL(a.Val))
|
| +
|
| + case "alt":
|
| + s.printAttr("alt", a.Val)
|
| + }
|
| + }
|
| +
|
| s.p(">")
|
| s.visitChildren(n)
|
| s.p("</a>")
|
|
|
| - case atom.P, atom.Ol, atom.Ul, atom.Li, atom.Table, atom.Strong, atom.Em:
|
| + case atom.P, atom.Ol, atom.Ul, atom.Li, atom.Strong, atom.Em:
|
| // print without attributes
|
| - s.printElem(n, nil)
|
| + tag := n.DataAtom.String()
|
| + s.p("<")
|
| + s.p(tag)
|
| + s.p(">")
|
|
|
| - case atom.Tr:
|
| - s.printElem(n, trAttrs)
|
| + s.visitChildren(n)
|
|
|
| - case atom.Td:
|
| - s.printElem(n, tdAttrs)
|
| + s.p("</")
|
| + s.p(tag)
|
| + s.p(">")
|
|
|
| default:
|
| // ignore the element, but visit children.
|
| @@ -173,8 +138,6 @@ func (s *sanitizer) visitChildren(n *html.Node) {
|
| // with an innocuous one.
|
| // - alt attribute is allowed
|
| // - ul, ol, li
|
| -// - table
|
| -// - tr, td. Attributes rowspan/colspan are allowed.
|
| //
|
| // Elements <script> and <style> are ignored entirely.
|
| // For all other HTML nodes, Sanitize ignores the node, but visits its children.
|
|
|