Source file src/html/template/html.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package template
     6	
     7	import (
     8		"bytes"
     9		"fmt"
    10		"strings"
    11		"unicode/utf8"
    12	)
    13	
    14	// htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
    15	func htmlNospaceEscaper(args ...interface{}) string {
    16		s, t := stringify(args...)
    17		if t == contentTypeHTML {
    18			return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
    19		}
    20		return htmlReplacer(s, htmlNospaceReplacementTable, false)
    21	}
    22	
    23	// attrEscaper escapes for inclusion in quoted attribute values.
    24	func attrEscaper(args ...interface{}) string {
    25		s, t := stringify(args...)
    26		if t == contentTypeHTML {
    27			return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
    28		}
    29		return htmlReplacer(s, htmlReplacementTable, true)
    30	}
    31	
    32	// rcdataEscaper escapes for inclusion in an RCDATA element body.
    33	func rcdataEscaper(args ...interface{}) string {
    34		s, t := stringify(args...)
    35		if t == contentTypeHTML {
    36			return htmlReplacer(s, htmlNormReplacementTable, true)
    37		}
    38		return htmlReplacer(s, htmlReplacementTable, true)
    39	}
    40	
    41	// htmlEscaper escapes for inclusion in HTML text.
    42	func htmlEscaper(args ...interface{}) string {
    43		s, t := stringify(args...)
    44		if t == contentTypeHTML {
    45			return s
    46		}
    47		return htmlReplacer(s, htmlReplacementTable, true)
    48	}
    49	
    50	// htmlReplacementTable contains the runes that need to be escaped
    51	// inside a quoted attribute value or in a text node.
    52	var htmlReplacementTable = []string{
    53		// https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
    54		// U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
    55		// CHARACTER character to the current attribute's value.
    56		// "
    57		// and similarly
    58		// https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
    59		0:    "\uFFFD",
    60		'"':  "&#34;",
    61		'&':  "&amp;",
    62		'\'': "&#39;",
    63		'+':  "&#43;",
    64		'<':  "&lt;",
    65		'>':  "&gt;",
    66	}
    67	
    68	// htmlNormReplacementTable is like htmlReplacementTable but without '&' to
    69	// avoid over-encoding existing entities.
    70	var htmlNormReplacementTable = []string{
    71		0:    "\uFFFD",
    72		'"':  "&#34;",
    73		'\'': "&#39;",
    74		'+':  "&#43;",
    75		'<':  "&lt;",
    76		'>':  "&gt;",
    77	}
    78	
    79	// htmlNospaceReplacementTable contains the runes that need to be escaped
    80	// inside an unquoted attribute value.
    81	// The set of runes escaped is the union of the HTML specials and
    82	// those determined by running the JS below in browsers:
    83	// <div id=d></div>
    84	// <script>(function () {
    85	// var a = [], d = document.getElementById("d"), i, c, s;
    86	// for (i = 0; i < 0x10000; ++i) {
    87	//   c = String.fromCharCode(i);
    88	//   d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
    89	//   s = d.getElementsByTagName("SPAN")[0];
    90	//   if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
    91	// }
    92	// document.write(a.join(", "));
    93	// })()</script>
    94	var htmlNospaceReplacementTable = []string{
    95		0:    "&#xfffd;",
    96		'\t': "&#9;",
    97		'\n': "&#10;",
    98		'\v': "&#11;",
    99		'\f': "&#12;",
   100		'\r': "&#13;",
   101		' ':  "&#32;",
   102		'"':  "&#34;",
   103		'&':  "&amp;",
   104		'\'': "&#39;",
   105		'+':  "&#43;",
   106		'<':  "&lt;",
   107		'=':  "&#61;",
   108		'>':  "&gt;",
   109		// A parse error in the attribute value (unquoted) and
   110		// before attribute value states.
   111		// Treated as a quoting character by IE.
   112		'`': "&#96;",
   113	}
   114	
   115	// htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
   116	// without '&' to avoid over-encoding existing entities.
   117	var htmlNospaceNormReplacementTable = []string{
   118		0:    "&#xfffd;",
   119		'\t': "&#9;",
   120		'\n': "&#10;",
   121		'\v': "&#11;",
   122		'\f': "&#12;",
   123		'\r': "&#13;",
   124		' ':  "&#32;",
   125		'"':  "&#34;",
   126		'\'': "&#39;",
   127		'+':  "&#43;",
   128		'<':  "&lt;",
   129		'=':  "&#61;",
   130		'>':  "&gt;",
   131		// A parse error in the attribute value (unquoted) and
   132		// before attribute value states.
   133		// Treated as a quoting character by IE.
   134		'`': "&#96;",
   135	}
   136	
   137	// htmlReplacer returns s with runes replaced according to replacementTable
   138	// and when badRunes is true, certain bad runes are allowed through unescaped.
   139	func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
   140		written, b := 0, new(strings.Builder)
   141		r, w := rune(0), 0
   142		for i := 0; i < len(s); i += w {
   143			// Cannot use 'for range s' because we need to preserve the width
   144			// of the runes in the input. If we see a decoding error, the input
   145			// width will not be utf8.Runelen(r) and we will overrun the buffer.
   146			r, w = utf8.DecodeRuneInString(s[i:])
   147			if int(r) < len(replacementTable) {
   148				if repl := replacementTable[r]; len(repl) != 0 {
   149					if written == 0 {
   150						b.Grow(len(s))
   151					}
   152					b.WriteString(s[written:i])
   153					b.WriteString(repl)
   154					written = i + w
   155				}
   156			} else if badRunes {
   157				// No-op.
   158				// IE does not allow these ranges in unquoted attrs.
   159			} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
   160				if written == 0 {
   161					b.Grow(len(s))
   162				}
   163				fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
   164				written = i + w
   165			}
   166		}
   167		if written == 0 {
   168			return s
   169		}
   170		b.WriteString(s[written:])
   171		return b.String()
   172	}
   173	
   174	// stripTags takes a snippet of HTML and returns only the text content.
   175	// For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
   176	func stripTags(html string) string {
   177		var b bytes.Buffer
   178		s, c, i, allText := []byte(html), context{}, 0, true
   179		// Using the transition funcs helps us avoid mangling
   180		// `<div title="1>2">` or `I <3 Ponies!`.
   181		for i != len(s) {
   182			if c.delim == delimNone {
   183				st := c.state
   184				// Use RCDATA instead of parsing into JS or CSS styles.
   185				if c.element != elementNone && !isInTag(st) {
   186					st = stateRCDATA
   187				}
   188				d, nread := transitionFunc[st](c, s[i:])
   189				i1 := i + nread
   190				if c.state == stateText || c.state == stateRCDATA {
   191					// Emit text up to the start of the tag or comment.
   192					j := i1
   193					if d.state != c.state {
   194						for j1 := j - 1; j1 >= i; j1-- {
   195							if s[j1] == '<' {
   196								j = j1
   197								break
   198							}
   199						}
   200					}
   201					b.Write(s[i:j])
   202				} else {
   203					allText = false
   204				}
   205				c, i = d, i1
   206				continue
   207			}
   208			i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
   209			if i1 < i {
   210				break
   211			}
   212			if c.delim != delimSpaceOrTagEnd {
   213				// Consume any quote.
   214				i1++
   215			}
   216			c, i = context{state: stateTag, element: c.element}, i1
   217		}
   218		if allText {
   219			return html
   220		} else if c.state == stateText || c.state == stateRCDATA {
   221			b.Write(s[i:])
   222		}
   223		return b.String()
   224	}
   225	
   226	// htmlNameFilter accepts valid parts of an HTML attribute or tag name or
   227	// a known-safe HTML attribute.
   228	func htmlNameFilter(args ...interface{}) string {
   229		s, t := stringify(args...)
   230		if t == contentTypeHTMLAttr {
   231			return s
   232		}
   233		if len(s) == 0 {
   234			// Avoid violation of structure preservation.
   235			// <input checked {{.K}}={{.V}}>.
   236			// Without this, if .K is empty then .V is the value of
   237			// checked, but otherwise .V is the value of the attribute
   238			// named .K.
   239			return filterFailsafe
   240		}
   241		s = strings.ToLower(s)
   242		if t := attrType(s); t != contentTypePlain {
   243			// TODO: Split attr and element name part filters so we can whitelist
   244			// attributes.
   245			return filterFailsafe
   246		}
   247		for _, r := range s {
   248			switch {
   249			case '0' <= r && r <= '9':
   250			case 'a' <= r && r <= 'z':
   251			default:
   252				return filterFailsafe
   253			}
   254		}
   255		return s
   256	}
   257	
   258	// commentEscaper returns the empty string regardless of input.
   259	// Comment content does not correspond to any parsed structure or
   260	// human-readable content, so the simplest and most secure policy is to drop
   261	// content interpolated into comments.
   262	// This approach is equally valid whether or not static comment content is
   263	// removed from the template.
   264	func commentEscaper(args ...interface{}) string {
   265		return ""
   266	}
   267
View as plain text