...

Source file src/html/template/js.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package template
     6	
     7	import (
     8		"bytes"
     9		"encoding/json"
    10		"fmt"
    11		"reflect"
    12		"strings"
    13		"unicode/utf8"
    14	)
    15	
    16	// nextJSCtx returns the context that determines whether a slash after the
    17	// given run of tokens starts a regular expression instead of a division
    18	// operator: / or /=.
    19	//
    20	// This assumes that the token run does not include any string tokens, comment
    21	// tokens, regular expression literal tokens, or division operators.
    22	//
    23	// This fails on some valid but nonsensical JavaScript programs like
    24	// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
    25	// fail on any known useful programs. It is based on the draft
    26	// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
    27	// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
    28	func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
    29		s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
    30		if len(s) == 0 {
    31			return preceding
    32		}
    33	
    34		// All cases below are in the single-byte UTF-8 group.
    35		switch c, n := s[len(s)-1], len(s); c {
    36		case '+', '-':
    37			// ++ and -- are not regexp preceders, but + and - are whether
    38			// they are used as infix or prefix operators.
    39			start := n - 1
    40			// Count the number of adjacent dashes or pluses.
    41			for start > 0 && s[start-1] == c {
    42				start--
    43			}
    44			if (n-start)&1 == 1 {
    45				// Reached for trailing minus signs since "---" is the
    46				// same as "-- -".
    47				return jsCtxRegexp
    48			}
    49			return jsCtxDivOp
    50		case '.':
    51			// Handle "42."
    52			if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
    53				return jsCtxDivOp
    54			}
    55			return jsCtxRegexp
    56		// Suffixes for all punctuators from section 7.7 of the language spec
    57		// that only end binary operators not handled above.
    58		case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
    59			return jsCtxRegexp
    60		// Suffixes for all punctuators from section 7.7 of the language spec
    61		// that are prefix operators not handled above.
    62		case '!', '~':
    63			return jsCtxRegexp
    64		// Matches all the punctuators from section 7.7 of the language spec
    65		// that are open brackets not handled above.
    66		case '(', '[':
    67			return jsCtxRegexp
    68		// Matches all the punctuators from section 7.7 of the language spec
    69		// that precede expression starts.
    70		case ':', ';', '{':
    71			return jsCtxRegexp
    72		// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
    73		// are handled in the default except for '}' which can precede a
    74		// division op as in
    75		//    ({ valueOf: function () { return 42 } } / 2
    76		// which is valid, but, in practice, developers don't divide object
    77		// literals, so our heuristic works well for code like
    78		//    function () { ... }  /foo/.test(x) && sideEffect();
    79		// The ')' punctuator can precede a regular expression as in
    80		//     if (b) /foo/.test(x) && ...
    81		// but this is much less likely than
    82		//     (a + b) / c
    83		case '}':
    84			return jsCtxRegexp
    85		default:
    86			// Look for an IdentifierName and see if it is a keyword that
    87			// can precede a regular expression.
    88			j := n
    89			for j > 0 && isJSIdentPart(rune(s[j-1])) {
    90				j--
    91			}
    92			if regexpPrecederKeywords[string(s[j:])] {
    93				return jsCtxRegexp
    94			}
    95		}
    96		// Otherwise is a punctuator not listed above, or
    97		// a string which precedes a div op, or an identifier
    98		// which precedes a div op.
    99		return jsCtxDivOp
   100	}
   101	
   102	// regexpPrecederKeywords is a set of reserved JS keywords that can precede a
   103	// regular expression in JS source.
   104	var regexpPrecederKeywords = map[string]bool{
   105		"break":      true,
   106		"case":       true,
   107		"continue":   true,
   108		"delete":     true,
   109		"do":         true,
   110		"else":       true,
   111		"finally":    true,
   112		"in":         true,
   113		"instanceof": true,
   114		"return":     true,
   115		"throw":      true,
   116		"try":        true,
   117		"typeof":     true,
   118		"void":       true,
   119	}
   120	
   121	var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
   122	
   123	// indirectToJSONMarshaler returns the value, after dereferencing as many times
   124	// as necessary to reach the base type (or nil) or an implementation of json.Marshal.
   125	func indirectToJSONMarshaler(a interface{}) interface{} {
   126		// text/template now supports passing untyped nil as a func call
   127		// argument, so we must support it. Otherwise we'd panic below, as one
   128		// cannot call the Type or Interface methods on an invalid
   129		// reflect.Value. See golang.org/issue/18716.
   130		if a == nil {
   131			return nil
   132		}
   133	
   134		v := reflect.ValueOf(a)
   135		for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
   136			v = v.Elem()
   137		}
   138		return v.Interface()
   139	}
   140	
   141	// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
   142	// neither side-effects nor free variables outside (NaN, Infinity).
   143	func jsValEscaper(args ...interface{}) string {
   144		var a interface{}
   145		if len(args) == 1 {
   146			a = indirectToJSONMarshaler(args[0])
   147			switch t := a.(type) {
   148			case JS:
   149				return string(t)
   150			case JSStr:
   151				// TODO: normalize quotes.
   152				return `"` + string(t) + `"`
   153			case json.Marshaler:
   154				// Do not treat as a Stringer.
   155			case fmt.Stringer:
   156				a = t.String()
   157			}
   158		} else {
   159			for i, arg := range args {
   160				args[i] = indirectToJSONMarshaler(arg)
   161			}
   162			a = fmt.Sprint(args...)
   163		}
   164		// TODO: detect cycles before calling Marshal which loops infinitely on
   165		// cyclic data. This may be an unacceptable DoS risk.
   166	
   167		b, err := json.Marshal(a)
   168		if err != nil {
   169			// Put a space before comment so that if it is flush against
   170			// a division operator it is not turned into a line comment:
   171			//     x/{{y}}
   172			// turning into
   173			//     x//* error marshaling y:
   174			//          second line of error message */null
   175			return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /"))
   176		}
   177	
   178		// TODO: maybe post-process output to prevent it from containing
   179		// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
   180		// in case custom marshalers produce output containing those.
   181	
   182		// TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.
   183		if len(b) == 0 {
   184			// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
   185			// not cause the output `x=y/*z`.
   186			return " null "
   187		}
   188		first, _ := utf8.DecodeRune(b)
   189		last, _ := utf8.DecodeLastRune(b)
   190		var buf strings.Builder
   191		// Prevent IdentifierNames and NumericLiterals from running into
   192		// keywords: in, instanceof, typeof, void
   193		pad := isJSIdentPart(first) || isJSIdentPart(last)
   194		if pad {
   195			buf.WriteByte(' ')
   196		}
   197		written := 0
   198		// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
   199		// so it falls within the subset of JSON which is valid JS.
   200		for i := 0; i < len(b); {
   201			rune, n := utf8.DecodeRune(b[i:])
   202			repl := ""
   203			if rune == 0x2028 {
   204				repl = `\u2028`
   205			} else if rune == 0x2029 {
   206				repl = `\u2029`
   207			}
   208			if repl != "" {
   209				buf.Write(b[written:i])
   210				buf.WriteString(repl)
   211				written = i + n
   212			}
   213			i += n
   214		}
   215		if buf.Len() != 0 {
   216			buf.Write(b[written:])
   217			if pad {
   218				buf.WriteByte(' ')
   219			}
   220			return buf.String()
   221		}
   222		return string(b)
   223	}
   224	
   225	// jsStrEscaper produces a string that can be included between quotes in
   226	// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
   227	// or in an HTML5 event handler attribute such as onclick.
   228	func jsStrEscaper(args ...interface{}) string {
   229		s, t := stringify(args...)
   230		if t == contentTypeJSStr {
   231			return replace(s, jsStrNormReplacementTable)
   232		}
   233		return replace(s, jsStrReplacementTable)
   234	}
   235	
   236	// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
   237	// specials so the result is treated literally when included in a regular
   238	// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
   239	// the literal text of {{.X}} followed by the string "bar".
   240	func jsRegexpEscaper(args ...interface{}) string {
   241		s, _ := stringify(args...)
   242		s = replace(s, jsRegexpReplacementTable)
   243		if s == "" {
   244			// /{{.X}}/ should not produce a line comment when .X == "".
   245			return "(?:)"
   246		}
   247		return s
   248	}
   249	
   250	// replace replaces each rune r of s with replacementTable[r], provided that
   251	// r < len(replacementTable). If replacementTable[r] is the empty string then
   252	// no replacement is made.
   253	// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
   254	// `\u2029`.
   255	func replace(s string, replacementTable []string) string {
   256		var b strings.Builder
   257		r, w, written := rune(0), 0, 0
   258		for i := 0; i < len(s); i += w {
   259			// See comment in htmlEscaper.
   260			r, w = utf8.DecodeRuneInString(s[i:])
   261			var repl string
   262			switch {
   263			case int(r) < len(replacementTable) && replacementTable[r] != "":
   264				repl = replacementTable[r]
   265			case r == '\u2028':
   266				repl = `\u2028`
   267			case r == '\u2029':
   268				repl = `\u2029`
   269			default:
   270				continue
   271			}
   272			if written == 0 {
   273				b.Grow(len(s))
   274			}
   275			b.WriteString(s[written:i])
   276			b.WriteString(repl)
   277			written = i + w
   278		}
   279		if written == 0 {
   280			return s
   281		}
   282		b.WriteString(s[written:])
   283		return b.String()
   284	}
   285	
   286	var jsStrReplacementTable = []string{
   287		0:    `\0`,
   288		'\t': `\t`,
   289		'\n': `\n`,
   290		'\v': `\x0b`, // "\v" == "v" on IE 6.
   291		'\f': `\f`,
   292		'\r': `\r`,
   293		// Encode HTML specials as hex so the output can be embedded
   294		// in HTML attributes without further encoding.
   295		'"':  `\x22`,
   296		'&':  `\x26`,
   297		'\'': `\x27`,
   298		'+':  `\x2b`,
   299		'/':  `\/`,
   300		'<':  `\x3c`,
   301		'>':  `\x3e`,
   302		'\\': `\\`,
   303	}
   304	
   305	// jsStrNormReplacementTable is like jsStrReplacementTable but does not
   306	// overencode existing escapes since this table has no entry for `\`.
   307	var jsStrNormReplacementTable = []string{
   308		0:    `\0`,
   309		'\t': `\t`,
   310		'\n': `\n`,
   311		'\v': `\x0b`, // "\v" == "v" on IE 6.
   312		'\f': `\f`,
   313		'\r': `\r`,
   314		// Encode HTML specials as hex so the output can be embedded
   315		// in HTML attributes without further encoding.
   316		'"':  `\x22`,
   317		'&':  `\x26`,
   318		'\'': `\x27`,
   319		'+':  `\x2b`,
   320		'/':  `\/`,
   321		'<':  `\x3c`,
   322		'>':  `\x3e`,
   323	}
   324	
   325	var jsRegexpReplacementTable = []string{
   326		0:    `\0`,
   327		'\t': `\t`,
   328		'\n': `\n`,
   329		'\v': `\x0b`, // "\v" == "v" on IE 6.
   330		'\f': `\f`,
   331		'\r': `\r`,
   332		// Encode HTML specials as hex so the output can be embedded
   333		// in HTML attributes without further encoding.
   334		'"':  `\x22`,
   335		'$':  `\$`,
   336		'&':  `\x26`,
   337		'\'': `\x27`,
   338		'(':  `\(`,
   339		')':  `\)`,
   340		'*':  `\*`,
   341		'+':  `\x2b`,
   342		'-':  `\-`,
   343		'.':  `\.`,
   344		'/':  `\/`,
   345		'<':  `\x3c`,
   346		'>':  `\x3e`,
   347		'?':  `\?`,
   348		'[':  `\[`,
   349		'\\': `\\`,
   350		']':  `\]`,
   351		'^':  `\^`,
   352		'{':  `\{`,
   353		'|':  `\|`,
   354		'}':  `\}`,
   355	}
   356	
   357	// isJSIdentPart reports whether the given rune is a JS identifier part.
   358	// It does not handle all the non-Latin letters, joiners, and combining marks,
   359	// but it does handle every codepoint that can occur in a numeric literal or
   360	// a keyword.
   361	func isJSIdentPart(r rune) bool {
   362		switch {
   363		case r == '$':
   364			return true
   365		case '0' <= r && r <= '9':
   366			return true
   367		case 'A' <= r && r <= 'Z':
   368			return true
   369		case r == '_':
   370			return true
   371		case 'a' <= r && r <= 'z':
   372			return true
   373		}
   374		return false
   375	}
   376	
   377	// isJSType reports whether the given MIME type should be considered JavaScript.
   378	//
   379	// It is used to determine whether a script tag with a type attribute is a javascript container.
   380	func isJSType(mimeType string) bool {
   381		// per
   382		//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
   383		//   https://tools.ietf.org/html/rfc7231#section-3.1.1
   384		//   https://tools.ietf.org/html/rfc4329#section-3
   385		//   https://www.ietf.org/rfc/rfc4627.txt
   386		mimeType = strings.ToLower(mimeType)
   387		// discard parameters
   388		if i := strings.Index(mimeType, ";"); i >= 0 {
   389			mimeType = mimeType[:i]
   390		}
   391		mimeType = strings.TrimSpace(mimeType)
   392		switch mimeType {
   393		case
   394			"application/ecmascript",
   395			"application/javascript",
   396			"application/json",
   397			"application/ld+json",
   398			"application/x-ecmascript",
   399			"application/x-javascript",
   400			"module",
   401			"text/ecmascript",
   402			"text/javascript",
   403			"text/javascript1.0",
   404			"text/javascript1.1",
   405			"text/javascript1.2",
   406			"text/javascript1.3",
   407			"text/javascript1.4",
   408			"text/javascript1.5",
   409			"text/jscript",
   410			"text/livescript",
   411			"text/x-ecmascript",
   412			"text/x-javascript":
   413			return true
   414		default:
   415			return false
   416		}
   417	}
   418	

View as plain text