Source file src/html/template/css.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package template
     6	
     7	import (
     8		"bytes"
     9		"fmt"
    10		"strings"
    11		"unicode"
    12		"unicode/utf8"
    13	)
    14	
    15	// endsWithCSSKeyword reports whether b ends with an ident that
    16	// case-insensitively matches the lower-case kw.
    17	func endsWithCSSKeyword(b []byte, kw string) bool {
    18		i := len(b) - len(kw)
    19		if i < 0 {
    20			// Too short.
    21			return false
    22		}
    23		if i != 0 {
    24			r, _ := utf8.DecodeLastRune(b[:i])
    25			if isCSSNmchar(r) {
    26				// Too long.
    27				return false
    28			}
    29		}
    30		// Many CSS keywords, such as "!important" can have characters encoded,
    31		// but the URI production does not allow that according to
    32		// https://www.w3.org/TR/css3-syntax/#TOK-URI
    33		// This does not attempt to recognize encoded keywords. For example,
    34		// given "\75\72\6c" and "url" this return false.
    35		return string(bytes.ToLower(b[i:])) == kw
    36	}
    37	
    38	// isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
    39	func isCSSNmchar(r rune) bool {
    40		// Based on the CSS3 nmchar production but ignores multi-rune escape
    41		// sequences.
    42		// https://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
    43		return 'a' <= r && r <= 'z' ||
    44			'A' <= r && r <= 'Z' ||
    45			'0' <= r && r <= '9' ||
    46			r == '-' ||
    47			r == '_' ||
    48			// Non-ASCII cases below.
    49			0x80 <= r && r <= 0xd7ff ||
    50			0xe000 <= r && r <= 0xfffd ||
    51			0x10000 <= r && r <= 0x10ffff
    52	}
    53	
    54	// decodeCSS decodes CSS3 escapes given a sequence of stringchars.
    55	// If there is no change, it returns the input, otherwise it returns a slice
    56	// backed by a new array.
    57	// https://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
    58	func decodeCSS(s []byte) []byte {
    59		i := bytes.IndexByte(s, '\\')
    60		if i == -1 {
    61			return s
    62		}
    63		// The UTF-8 sequence for a codepoint is never longer than 1 + the
    64		// number hex digits need to represent that codepoint, so len(s) is an
    65		// upper bound on the output length.
    66		b := make([]byte, 0, len(s))
    67		for len(s) != 0 {
    68			i := bytes.IndexByte(s, '\\')
    69			if i == -1 {
    70				i = len(s)
    71			}
    72			b, s = append(b, s[:i]...), s[i:]
    73			if len(s) < 2 {
    74				break
    75			}
    76			// https://www.w3.org/TR/css3-syntax/#SUBTOK-escape
    77			// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
    78			if isHex(s[1]) {
    79				// https://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
    80				//   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
    81				j := 2
    82				for j < len(s) && j < 7 && isHex(s[j]) {
    83					j++
    84				}
    85				r := hexDecode(s[1:j])
    86				if r > unicode.MaxRune {
    87					r, j = r/16, j-1
    88				}
    89				n := utf8.EncodeRune(b[len(b):cap(b)], r)
    90				// The optional space at the end allows a hex
    91				// sequence to be followed by a literal hex.
    92				// string(decodeCSS([]byte(`\A B`))) == "\nB"
    93				b, s = b[:len(b)+n], skipCSSSpace(s[j:])
    94			} else {
    95				// `\\` decodes to `\` and `\"` to `"`.
    96				_, n := utf8.DecodeRune(s[1:])
    97				b, s = append(b, s[1:1+n]...), s[1+n:]
    98			}
    99		}
   100		return b
   101	}
   102	
   103	// isHex reports whether the given character is a hex digit.
   104	func isHex(c byte) bool {
   105		return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
   106	}
   107	
   108	// hexDecode decodes a short hex digit sequence: "10" -> 16.
   109	func hexDecode(s []byte) rune {
   110		n := '\x00'
   111		for _, c := range s {
   112			n <<= 4
   113			switch {
   114			case '0' <= c && c <= '9':
   115				n |= rune(c - '0')
   116			case 'a' <= c && c <= 'f':
   117				n |= rune(c-'a') + 10
   118			case 'A' <= c && c <= 'F':
   119				n |= rune(c-'A') + 10
   120			default:
   121				panic(fmt.Sprintf("Bad hex digit in %q", s))
   122			}
   123		}
   124		return n
   125	}
   126	
   127	// skipCSSSpace returns a suffix of c, skipping over a single space.
   128	func skipCSSSpace(c []byte) []byte {
   129		if len(c) == 0 {
   130			return c
   131		}
   132		// wc ::= #x9 | #xA | #xC | #xD | #x20
   133		switch c[0] {
   134		case '\t', '\n', '\f', ' ':
   135			return c[1:]
   136		case '\r':
   137			// This differs from CSS3's wc production because it contains a
   138			// probable spec error whereby wc contains all the single byte
   139			// sequences in nl (newline) but not CRLF.
   140			if len(c) >= 2 && c[1] == '\n' {
   141				return c[2:]
   142			}
   143			return c[1:]
   144		}
   145		return c
   146	}
   147	
   148	// isCSSSpace reports whether b is a CSS space char as defined in wc.
   149	func isCSSSpace(b byte) bool {
   150		switch b {
   151		case '\t', '\n', '\f', '\r', ' ':
   152			return true
   153		}
   154		return false
   155	}
   156	
   157	// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
   158	func cssEscaper(args ...interface{}) string {
   159		s, _ := stringify(args...)
   160		var b strings.Builder
   161		r, w, written := rune(0), 0, 0
   162		for i := 0; i < len(s); i += w {
   163			// See comment in htmlEscaper.
   164			r, w = utf8.DecodeRuneInString(s[i:])
   165			var repl string
   166			switch {
   167			case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
   168				repl = cssReplacementTable[r]
   169			default:
   170				continue
   171			}
   172			if written == 0 {
   173				b.Grow(len(s))
   174			}
   175			b.WriteString(s[written:i])
   176			b.WriteString(repl)
   177			written = i + w
   178			if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
   179				b.WriteByte(' ')
   180			}
   181		}
   182		if written == 0 {
   183			return s
   184		}
   185		b.WriteString(s[written:])
   186		return b.String()
   187	}
   188	
   189	var cssReplacementTable = []string{
   190		0:    `\0`,
   191		'\t': `\9`,
   192		'\n': `\a`,
   193		'\f': `\c`,
   194		'\r': `\d`,
   195		// Encode HTML specials as hex so the output can be embedded
   196		// in HTML attributes without further encoding.
   197		'"':  `\22`,
   198		'&':  `\26`,
   199		'\'': `\27`,
   200		'(':  `\28`,
   201		')':  `\29`,
   202		'+':  `\2b`,
   203		'/':  `\2f`,
   204		':':  `\3a`,
   205		';':  `\3b`,
   206		'<':  `\3c`,
   207		'>':  `\3e`,
   208		'\\': `\\`,
   209		'{':  `\7b`,
   210		'}':  `\7d`,
   211	}
   212	
   213	var expressionBytes = []byte("expression")
   214	var mozBindingBytes = []byte("mozbinding")
   215	
   216	// cssValueFilter allows innocuous CSS values in the output including CSS
   217	// quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
   218	// (inherit, blue), and colors (#888).
   219	// It filters out unsafe values, such as those that affect token boundaries,
   220	// and anything that might execute scripts.
   221	func cssValueFilter(args ...interface{}) string {
   222		s, t := stringify(args...)
   223		if t == contentTypeCSS {
   224			return s
   225		}
   226		b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
   227	
   228		// CSS3 error handling is specified as honoring string boundaries per
   229		// https://www.w3.org/TR/css3-syntax/#error-handling :
   230		//     Malformed declarations. User agents must handle unexpected
   231		//     tokens encountered while parsing a declaration by reading until
   232		//     the end of the declaration, while observing the rules for
   233		//     matching pairs of (), [], {}, "", and '', and correctly handling
   234		//     escapes. For example, a malformed declaration may be missing a
   235		//     property, colon (:) or value.
   236		// So we need to make sure that values do not have mismatched bracket
   237		// or quote characters to prevent the browser from restarting parsing
   238		// inside a string that might embed JavaScript source.
   239		for i, c := range b {
   240			switch c {
   241			case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
   242				return filterFailsafe
   243			case '-':
   244				// Disallow <!-- or -->.
   245				// -- should not appear in valid identifiers.
   246				if i != 0 && b[i-1] == '-' {
   247					return filterFailsafe
   248				}
   249			default:
   250				if c < utf8.RuneSelf && isCSSNmchar(rune(c)) {
   251					id = append(id, c)
   252				}
   253			}
   254		}
   255		id = bytes.ToLower(id)
   256		if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) {
   257			return filterFailsafe
   258		}
   259		return string(b)
   260	}
   261
View as plain text