...

Source file src/pkg/html/template/escape.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package template
     6	
     7	import (
     8		"bytes"
     9		"fmt"
    10		"html"
    11		"io"
    12		"text/template"
    13		"text/template/parse"
    14	)
    15	
    16	// escapeTemplate rewrites the named template, which must be
    17	// associated with t, to guarantee that the output of any of the named
    18	// templates is properly escaped. If no error is returned, then the named templates have
    19	// been modified. Otherwise the named templates have been rendered
    20	// unusable.
    21	func escapeTemplate(tmpl *Template, node parse.Node, name string) error {
    22		c, _ := tmpl.esc.escapeTree(context{}, node, name, 0)
    23		var err error
    24		if c.err != nil {
    25			err, c.err.Name = c.err, name
    26		} else if c.state != stateText {
    27			err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)}
    28		}
    29		if err != nil {
    30			// Prevent execution of unsafe templates.
    31			if t := tmpl.set[name]; t != nil {
    32				t.escapeErr = err
    33				t.text.Tree = nil
    34				t.Tree = nil
    35			}
    36			return err
    37		}
    38		tmpl.esc.commit()
    39		if t := tmpl.set[name]; t != nil {
    40			t.escapeErr = escapeOK
    41			t.Tree = t.text.Tree
    42		}
    43		return nil
    44	}
    45	
    46	// evalArgs formats the list of arguments into a string. It is equivalent to
    47	// fmt.Sprint(args...), except that it deferences all pointers.
    48	func evalArgs(args ...interface{}) string {
    49		// Optimization for simple common case of a single string argument.
    50		if len(args) == 1 {
    51			if s, ok := args[0].(string); ok {
    52				return s
    53			}
    54		}
    55		for i, arg := range args {
    56			args[i] = indirectToStringerOrError(arg)
    57		}
    58		return fmt.Sprint(args...)
    59	}
    60	
    61	// funcMap maps command names to functions that render their inputs safe.
    62	var funcMap = template.FuncMap{
    63		"_html_template_attrescaper":     attrEscaper,
    64		"_html_template_commentescaper":  commentEscaper,
    65		"_html_template_cssescaper":      cssEscaper,
    66		"_html_template_cssvaluefilter":  cssValueFilter,
    67		"_html_template_htmlnamefilter":  htmlNameFilter,
    68		"_html_template_htmlescaper":     htmlEscaper,
    69		"_html_template_jsregexpescaper": jsRegexpEscaper,
    70		"_html_template_jsstrescaper":    jsStrEscaper,
    71		"_html_template_jsvalescaper":    jsValEscaper,
    72		"_html_template_nospaceescaper":  htmlNospaceEscaper,
    73		"_html_template_rcdataescaper":   rcdataEscaper,
    74		"_html_template_srcsetescaper":   srcsetFilterAndEscaper,
    75		"_html_template_urlescaper":      urlEscaper,
    76		"_html_template_urlfilter":       urlFilter,
    77		"_html_template_urlnormalizer":   urlNormalizer,
    78		"_eval_args_":                    evalArgs,
    79	}
    80	
    81	// escaper collects type inferences about templates and changes needed to make
    82	// templates injection safe.
    83	type escaper struct {
    84		// ns is the nameSpace that this escaper is associated with.
    85		ns *nameSpace
    86		// output[templateName] is the output context for a templateName that
    87		// has been mangled to include its input context.
    88		output map[string]context
    89		// derived[c.mangle(name)] maps to a template derived from the template
    90		// named name templateName for the start context c.
    91		derived map[string]*template.Template
    92		// called[templateName] is a set of called mangled template names.
    93		called map[string]bool
    94		// xxxNodeEdits are the accumulated edits to apply during commit.
    95		// Such edits are not applied immediately in case a template set
    96		// executes a given template in different escaping contexts.
    97		actionNodeEdits   map[*parse.ActionNode][]string
    98		templateNodeEdits map[*parse.TemplateNode]string
    99		textNodeEdits     map[*parse.TextNode][]byte
   100	}
   101	
   102	// makeEscaper creates a blank escaper for the given set.
   103	func makeEscaper(n *nameSpace) escaper {
   104		return escaper{
   105			n,
   106			map[string]context{},
   107			map[string]*template.Template{},
   108			map[string]bool{},
   109			map[*parse.ActionNode][]string{},
   110			map[*parse.TemplateNode]string{},
   111			map[*parse.TextNode][]byte{},
   112		}
   113	}
   114	
   115	// filterFailsafe is an innocuous word that is emitted in place of unsafe values
   116	// by sanitizer functions. It is not a keyword in any programming language,
   117	// contains no special characters, is not empty, and when it appears in output
   118	// it is distinct enough that a developer can find the source of the problem
   119	// via a search engine.
   120	const filterFailsafe = "ZgotmplZ"
   121	
   122	// escape escapes a template node.
   123	func (e *escaper) escape(c context, n parse.Node) context {
   124		switch n := n.(type) {
   125		case *parse.ActionNode:
   126			return e.escapeAction(c, n)
   127		case *parse.IfNode:
   128			return e.escapeBranch(c, &n.BranchNode, "if")
   129		case *parse.ListNode:
   130			return e.escapeList(c, n)
   131		case *parse.RangeNode:
   132			return e.escapeBranch(c, &n.BranchNode, "range")
   133		case *parse.TemplateNode:
   134			return e.escapeTemplate(c, n)
   135		case *parse.TextNode:
   136			return e.escapeText(c, n)
   137		case *parse.WithNode:
   138			return e.escapeBranch(c, &n.BranchNode, "with")
   139		}
   140		panic("escaping " + n.String() + " is unimplemented")
   141	}
   142	
   143	// escapeAction escapes an action template node.
   144	func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
   145		if len(n.Pipe.Decl) != 0 {
   146			// A local variable assignment, not an interpolation.
   147			return c
   148		}
   149		c = nudge(c)
   150		// Check for disallowed use of predefined escapers in the pipeline.
   151		for pos, idNode := range n.Pipe.Cmds {
   152			node, ok := idNode.Args[0].(*parse.IdentifierNode)
   153			if !ok {
   154				// A predefined escaper "esc" will never be found as an identifier in a
   155				// Chain or Field node, since:
   156				// - "esc.x ..." is invalid, since predefined escapers return strings, and
   157				//   strings do not have methods, keys or fields.
   158				// - "... .esc" is invalid, since predefined escapers are global functions,
   159				//   not methods or fields of any types.
   160				// Therefore, it is safe to ignore these two node types.
   161				continue
   162			}
   163			ident := node.Ident
   164			if _, ok := predefinedEscapers[ident]; ok {
   165				if pos < len(n.Pipe.Cmds)-1 ||
   166					c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" {
   167					return context{
   168						state: stateError,
   169						err:   errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident),
   170					}
   171				}
   172			}
   173		}
   174		s := make([]string, 0, 3)
   175		switch c.state {
   176		case stateError:
   177			return c
   178		case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
   179			switch c.urlPart {
   180			case urlPartNone:
   181				s = append(s, "_html_template_urlfilter")
   182				fallthrough
   183			case urlPartPreQuery:
   184				switch c.state {
   185				case stateCSSDqStr, stateCSSSqStr:
   186					s = append(s, "_html_template_cssescaper")
   187				default:
   188					s = append(s, "_html_template_urlnormalizer")
   189				}
   190			case urlPartQueryOrFrag:
   191				s = append(s, "_html_template_urlescaper")
   192			case urlPartUnknown:
   193				return context{
   194					state: stateError,
   195					err:   errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n),
   196				}
   197			default:
   198				panic(c.urlPart.String())
   199			}
   200		case stateJS:
   201			s = append(s, "_html_template_jsvalescaper")
   202			// A slash after a value starts a div operator.
   203			c.jsCtx = jsCtxDivOp
   204		case stateJSDqStr, stateJSSqStr:
   205			s = append(s, "_html_template_jsstrescaper")
   206		case stateJSRegexp:
   207			s = append(s, "_html_template_jsregexpescaper")
   208		case stateCSS:
   209			s = append(s, "_html_template_cssvaluefilter")
   210		case stateText:
   211			s = append(s, "_html_template_htmlescaper")
   212		case stateRCDATA:
   213			s = append(s, "_html_template_rcdataescaper")
   214		case stateAttr:
   215			// Handled below in delim check.
   216		case stateAttrName, stateTag:
   217			c.state = stateAttrName
   218			s = append(s, "_html_template_htmlnamefilter")
   219		case stateSrcset:
   220			s = append(s, "_html_template_srcsetescaper")
   221		default:
   222			if isComment(c.state) {
   223				s = append(s, "_html_template_commentescaper")
   224			} else {
   225				panic("unexpected state " + c.state.String())
   226			}
   227		}
   228		switch c.delim {
   229		case delimNone:
   230			// No extra-escaping needed for raw text content.
   231		case delimSpaceOrTagEnd:
   232			s = append(s, "_html_template_nospaceescaper")
   233		default:
   234			s = append(s, "_html_template_attrescaper")
   235		}
   236		e.editActionNode(n, s)
   237		return c
   238	}
   239	
   240	// ensurePipelineContains ensures that the pipeline ends with the commands with
   241	// the identifiers in s in order. If the pipeline ends with a predefined escaper
   242	// (i.e. "html" or "urlquery"), merge it with the identifiers in s.
   243	func ensurePipelineContains(p *parse.PipeNode, s []string) {
   244		if len(s) == 0 {
   245			// Do not rewrite pipeline if we have no escapers to insert.
   246			return
   247		}
   248		// Precondition: p.Cmds contains at most one predefined escaper and the
   249		// escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is
   250		// always true because of the checks in escapeAction.
   251		pipelineLen := len(p.Cmds)
   252		if pipelineLen > 0 {
   253			lastCmd := p.Cmds[pipelineLen-1]
   254			if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok {
   255				if esc := idNode.Ident; predefinedEscapers[esc] {
   256					// Pipeline ends with a predefined escaper.
   257					if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 {
   258						// Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }},
   259						// where esc is the predefined escaper, and arg1...argN are its arguments.
   260						// Convert this into the equivalent form
   261						// {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily
   262						// merged with the escapers in s.
   263						lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position())
   264						p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position()))
   265						pipelineLen++
   266					}
   267					// If any of the commands in s that we are about to insert is equivalent
   268					// to the predefined escaper, use the predefined escaper instead.
   269					dup := false
   270					for i, escaper := range s {
   271						if escFnsEq(esc, escaper) {
   272							s[i] = idNode.Ident
   273							dup = true
   274						}
   275					}
   276					if dup {
   277						// The predefined escaper will already be inserted along with the
   278						// escapers in s, so do not copy it to the rewritten pipeline.
   279						pipelineLen--
   280					}
   281				}
   282			}
   283		}
   284		// Rewrite the pipeline, creating the escapers in s at the end of the pipeline.
   285		newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s))
   286		insertedIdents := make(map[string]bool)
   287		for i := 0; i < pipelineLen; i++ {
   288			cmd := p.Cmds[i]
   289			newCmds[i] = cmd
   290			if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok {
   291				insertedIdents[normalizeEscFn(idNode.Ident)] = true
   292			}
   293		}
   294		for _, name := range s {
   295			if !insertedIdents[normalizeEscFn(name)] {
   296				// When two templates share an underlying parse tree via the use of
   297				// AddParseTree and one template is executed after the other, this check
   298				// ensures that escapers that were already inserted into the pipeline on
   299				// the first escaping pass do not get inserted again.
   300				newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position()))
   301			}
   302		}
   303		p.Cmds = newCmds
   304	}
   305	
   306	// predefinedEscapers contains template predefined escapers that are equivalent
   307	// to some contextual escapers. Keep in sync with equivEscapers.
   308	var predefinedEscapers = map[string]bool{
   309		"html":     true,
   310		"urlquery": true,
   311	}
   312	
   313	// equivEscapers matches contextual escapers to equivalent predefined
   314	// template escapers.
   315	var equivEscapers = map[string]string{
   316		// The following pairs of HTML escapers provide equivalent security
   317		// guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'.
   318		"_html_template_attrescaper":   "html",
   319		"_html_template_htmlescaper":   "html",
   320		"_html_template_rcdataescaper": "html",
   321		// These two URL escapers produce URLs safe for embedding in a URL query by
   322		// percent-encoding all the reserved characters specified in RFC 3986 Section
   323		// 2.2
   324		"_html_template_urlescaper": "urlquery",
   325		// These two functions are not actually equivalent; urlquery is stricter as it
   326		// escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer
   327		// does not. It is therefore only safe to replace _html_template_urlnormalizer
   328		// with urlquery (this happens in ensurePipelineContains), but not the otherI've
   329		// way around. We keep this entry around to preserve the behavior of templates
   330		// written before Go 1.9, which might depend on this substitution taking place.
   331		"_html_template_urlnormalizer": "urlquery",
   332	}
   333	
   334	// escFnsEq reports whether the two escaping functions are equivalent.
   335	func escFnsEq(a, b string) bool {
   336		return normalizeEscFn(a) == normalizeEscFn(b)
   337	}
   338	
   339	// normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of
   340	// escaper functions a and b that are equivalent.
   341	func normalizeEscFn(e string) string {
   342		if norm := equivEscapers[e]; norm != "" {
   343			return norm
   344		}
   345		return e
   346	}
   347	
   348	// redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x)
   349	// for all x.
   350	var redundantFuncs = map[string]map[string]bool{
   351		"_html_template_commentescaper": {
   352			"_html_template_attrescaper":    true,
   353			"_html_template_nospaceescaper": true,
   354			"_html_template_htmlescaper":    true,
   355		},
   356		"_html_template_cssescaper": {
   357			"_html_template_attrescaper": true,
   358		},
   359		"_html_template_jsregexpescaper": {
   360			"_html_template_attrescaper": true,
   361		},
   362		"_html_template_jsstrescaper": {
   363			"_html_template_attrescaper": true,
   364		},
   365		"_html_template_urlescaper": {
   366			"_html_template_urlnormalizer": true,
   367		},
   368	}
   369	
   370	// appendCmd appends the given command to the end of the command pipeline
   371	// unless it is redundant with the last command.
   372	func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode {
   373		if n := len(cmds); n != 0 {
   374			last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode)
   375			next, okNext := cmd.Args[0].(*parse.IdentifierNode)
   376			if okLast && okNext && redundantFuncs[last.Ident][next.Ident] {
   377				return cmds
   378			}
   379		}
   380		return append(cmds, cmd)
   381	}
   382	
   383	// newIdentCmd produces a command containing a single identifier node.
   384	func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode {
   385		return &parse.CommandNode{
   386			NodeType: parse.NodeCommand,
   387			Args:     []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree.
   388		}
   389	}
   390	
   391	// nudge returns the context that would result from following empty string
   392	// transitions from the input context.
   393	// For example, parsing:
   394	//     `<a href=`
   395	// will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
   396	//     `<a href=x`
   397	// will end in context{stateURL, delimSpaceOrTagEnd, ...}.
   398	// There are two transitions that happen when the 'x' is seen:
   399	// (1) Transition from a before-value state to a start-of-value state without
   400	//     consuming any character.
   401	// (2) Consume 'x' and transition past the first value character.
   402	// In this case, nudging produces the context after (1) happens.
   403	func nudge(c context) context {
   404		switch c.state {
   405		case stateTag:
   406			// In `<foo {{.}}`, the action should emit an attribute.
   407			c.state = stateAttrName
   408		case stateBeforeValue:
   409			// In `<foo bar={{.}}`, the action is an undelimited value.
   410			c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
   411		case stateAfterName:
   412			// In `<foo bar {{.}}`, the action is an attribute name.
   413			c.state, c.attr = stateAttrName, attrNone
   414		}
   415		return c
   416	}
   417	
   418	// join joins the two contexts of a branch template node. The result is an
   419	// error context if either of the input contexts are error contexts, or if the
   420	// input contexts differ.
   421	func join(a, b context, node parse.Node, nodeName string) context {
   422		if a.state == stateError {
   423			return a
   424		}
   425		if b.state == stateError {
   426			return b
   427		}
   428		if a.eq(b) {
   429			return a
   430		}
   431	
   432		c := a
   433		c.urlPart = b.urlPart
   434		if c.eq(b) {
   435			// The contexts differ only by urlPart.
   436			c.urlPart = urlPartUnknown
   437			return c
   438		}
   439	
   440		c = a
   441		c.jsCtx = b.jsCtx
   442		if c.eq(b) {
   443			// The contexts differ only by jsCtx.
   444			c.jsCtx = jsCtxUnknown
   445			return c
   446		}
   447	
   448		// Allow a nudged context to join with an unnudged one.
   449		// This means that
   450		//   <p title={{if .C}}{{.}}{{end}}
   451		// ends in an unquoted value state even though the else branch
   452		// ends in stateBeforeValue.
   453		if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
   454			if e := join(c, d, node, nodeName); e.state != stateError {
   455				return e
   456			}
   457		}
   458	
   459		return context{
   460			state: stateError,
   461			err:   errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
   462		}
   463	}
   464	
   465	// escapeBranch escapes a branch template node: "if", "range" and "with".
   466	func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context {
   467		c0 := e.escapeList(c, n.List)
   468		if nodeName == "range" && c0.state != stateError {
   469			// The "true" branch of a "range" node can execute multiple times.
   470			// We check that executing n.List once results in the same context
   471			// as executing n.List twice.
   472			c1, _ := e.escapeListConditionally(c0, n.List, nil)
   473			c0 = join(c0, c1, n, nodeName)
   474			if c0.state == stateError {
   475				// Make clear that this is a problem on loop re-entry
   476				// since developers tend to overlook that branch when
   477				// debugging templates.
   478				c0.err.Line = n.Line
   479				c0.err.Description = "on range loop re-entry: " + c0.err.Description
   480				return c0
   481			}
   482		}
   483		c1 := e.escapeList(c, n.ElseList)
   484		return join(c0, c1, n, nodeName)
   485	}
   486	
   487	// escapeList escapes a list template node.
   488	func (e *escaper) escapeList(c context, n *parse.ListNode) context {
   489		if n == nil {
   490			return c
   491		}
   492		for _, m := range n.Nodes {
   493			c = e.escape(c, m)
   494		}
   495		return c
   496	}
   497	
   498	// escapeListConditionally escapes a list node but only preserves edits and
   499	// inferences in e if the inferences and output context satisfy filter.
   500	// It returns the best guess at an output context, and the result of the filter
   501	// which is the same as whether e was updated.
   502	func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) {
   503		e1 := makeEscaper(e.ns)
   504		// Make type inferences available to f.
   505		for k, v := range e.output {
   506			e1.output[k] = v
   507		}
   508		c = e1.escapeList(c, n)
   509		ok := filter != nil && filter(&e1, c)
   510		if ok {
   511			// Copy inferences and edits from e1 back into e.
   512			for k, v := range e1.output {
   513				e.output[k] = v
   514			}
   515			for k, v := range e1.derived {
   516				e.derived[k] = v
   517			}
   518			for k, v := range e1.called {
   519				e.called[k] = v
   520			}
   521			for k, v := range e1.actionNodeEdits {
   522				e.editActionNode(k, v)
   523			}
   524			for k, v := range e1.templateNodeEdits {
   525				e.editTemplateNode(k, v)
   526			}
   527			for k, v := range e1.textNodeEdits {
   528				e.editTextNode(k, v)
   529			}
   530		}
   531		return c, ok
   532	}
   533	
   534	// escapeTemplate escapes a {{template}} call node.
   535	func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context {
   536		c, name := e.escapeTree(c, n, n.Name, n.Line)
   537		if name != n.Name {
   538			e.editTemplateNode(n, name)
   539		}
   540		return c
   541	}
   542	
   543	// escapeTree escapes the named template starting in the given context as
   544	// necessary and returns its output context.
   545	func (e *escaper) escapeTree(c context, node parse.Node, name string, line int) (context, string) {
   546		// Mangle the template name with the input context to produce a reliable
   547		// identifier.
   548		dname := c.mangle(name)
   549		e.called[dname] = true
   550		if out, ok := e.output[dname]; ok {
   551			// Already escaped.
   552			return out, dname
   553		}
   554		t := e.template(name)
   555		if t == nil {
   556			// Two cases: The template exists but is empty, or has never been mentioned at
   557			// all. Distinguish the cases in the error messages.
   558			if e.ns.set[name] != nil {
   559				return context{
   560					state: stateError,
   561					err:   errorf(ErrNoSuchTemplate, node, line, "%q is an incomplete or empty template", name),
   562				}, dname
   563			}
   564			return context{
   565				state: stateError,
   566				err:   errorf(ErrNoSuchTemplate, node, line, "no such template %q", name),
   567			}, dname
   568		}
   569		if dname != name {
   570			// Use any template derived during an earlier call to escapeTemplate
   571			// with different top level templates, or clone if necessary.
   572			dt := e.template(dname)
   573			if dt == nil {
   574				dt = template.New(dname)
   575				dt.Tree = &parse.Tree{Name: dname, Root: t.Root.CopyList()}
   576				e.derived[dname] = dt
   577			}
   578			t = dt
   579		}
   580		return e.computeOutCtx(c, t), dname
   581	}
   582	
   583	// computeOutCtx takes a template and its start context and computes the output
   584	// context while storing any inferences in e.
   585	func (e *escaper) computeOutCtx(c context, t *template.Template) context {
   586		// Propagate context over the body.
   587		c1, ok := e.escapeTemplateBody(c, t)
   588		if !ok {
   589			// Look for a fixed point by assuming c1 as the output context.
   590			if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 {
   591				c1, ok = c2, true
   592			}
   593			// Use c1 as the error context if neither assumption worked.
   594		}
   595		if !ok && c1.state != stateError {
   596			return context{
   597				state: stateError,
   598				err:   errorf(ErrOutputContext, t.Tree.Root, 0, "cannot compute output context for template %s", t.Name()),
   599			}
   600		}
   601		return c1
   602	}
   603	
   604	// escapeTemplateBody escapes the given template assuming the given output
   605	// context, and returns the best guess at the output context and whether the
   606	// assumption was correct.
   607	func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) {
   608		filter := func(e1 *escaper, c1 context) bool {
   609			if c1.state == stateError {
   610				// Do not update the input escaper, e.
   611				return false
   612			}
   613			if !e1.called[t.Name()] {
   614				// If t is not recursively called, then c1 is an
   615				// accurate output context.
   616				return true
   617			}
   618			// c1 is accurate if it matches our assumed output context.
   619			return c.eq(c1)
   620		}
   621		// We need to assume an output context so that recursive template calls
   622		// take the fast path out of escapeTree instead of infinitely recursing.
   623		// Naively assuming that the input context is the same as the output
   624		// works >90% of the time.
   625		e.output[t.Name()] = c
   626		return e.escapeListConditionally(c, t.Tree.Root, filter)
   627	}
   628	
   629	// delimEnds maps each delim to a string of characters that terminate it.
   630	var delimEnds = [...]string{
   631		delimDoubleQuote: `"`,
   632		delimSingleQuote: "'",
   633		// Determined empirically by running the below in various browsers.
   634		// var div = document.createElement("DIV");
   635		// for (var i = 0; i < 0x10000; ++i) {
   636		//   div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>";
   637		//   if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0)
   638		//     document.write("<p>U+" + i.toString(16));
   639		// }
   640		delimSpaceOrTagEnd: " \t\n\f\r>",
   641	}
   642	
   643	var doctypeBytes = []byte("<!DOCTYPE")
   644	
   645	// escapeText escapes a text template node.
   646	func (e *escaper) escapeText(c context, n *parse.TextNode) context {
   647		s, written, i, b := n.Text, 0, 0, new(bytes.Buffer)
   648		for i != len(s) {
   649			c1, nread := contextAfterText(c, s[i:])
   650			i1 := i + nread
   651			if c.state == stateText || c.state == stateRCDATA {
   652				end := i1
   653				if c1.state != c.state {
   654					for j := end - 1; j >= i; j-- {
   655						if s[j] == '<' {
   656							end = j
   657							break
   658						}
   659					}
   660				}
   661				for j := i; j < end; j++ {
   662					if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) {
   663						b.Write(s[written:j])
   664						b.WriteString("&lt;")
   665						written = j + 1
   666					}
   667				}
   668			} else if isComment(c.state) && c.delim == delimNone {
   669				switch c.state {
   670				case stateJSBlockCmt:
   671					// https://es5.github.com/#x7.4:
   672					// "Comments behave like white space and are
   673					// discarded except that, if a MultiLineComment
   674					// contains a line terminator character, then
   675					// the entire comment is considered to be a
   676					// LineTerminator for purposes of parsing by
   677					// the syntactic grammar."
   678					if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") {
   679						b.WriteByte('\n')
   680					} else {
   681						b.WriteByte(' ')
   682					}
   683				case stateCSSBlockCmt:
   684					b.WriteByte(' ')
   685				}
   686				written = i1
   687			}
   688			if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone {
   689				// Preserve the portion between written and the comment start.
   690				cs := i1 - 2
   691				if c1.state == stateHTMLCmt {
   692					// "<!--" instead of "/*" or "//"
   693					cs -= 2
   694				}
   695				b.Write(s[written:cs])
   696				written = i1
   697			}
   698			if i == i1 && c.state == c1.state {
   699				panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
   700			}
   701			c, i = c1, i1
   702		}
   703	
   704		if written != 0 && c.state != stateError {
   705			if !isComment(c.state) || c.delim != delimNone {
   706				b.Write(n.Text[written:])
   707			}
   708			e.editTextNode(n, b.Bytes())
   709		}
   710		return c
   711	}
   712	
   713	// contextAfterText starts in context c, consumes some tokens from the front of
   714	// s, then returns the context after those tokens and the unprocessed suffix.
   715	func contextAfterText(c context, s []byte) (context, int) {
   716		if c.delim == delimNone {
   717			c1, i := tSpecialTagEnd(c, s)
   718			if i == 0 {
   719				// A special end tag (`</script>`) has been seen and
   720				// all content preceding it has been consumed.
   721				return c1, 0
   722			}
   723			// Consider all content up to any end tag.
   724			return transitionFunc[c.state](c, s[:i])
   725		}
   726	
   727		// We are at the beginning of an attribute value.
   728	
   729		i := bytes.IndexAny(s, delimEnds[c.delim])
   730		if i == -1 {
   731			i = len(s)
   732		}
   733		if c.delim == delimSpaceOrTagEnd {
   734			// https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
   735			// lists the runes below as error characters.
   736			// Error out because HTML parsers may differ on whether
   737			// "<a id= onclick=f("     ends inside id's or onclick's value,
   738			// "<a class=`foo "        ends inside a value,
   739			// "<a style=font:'Arial'" needs open-quote fixup.
   740			// IE treats '`' as a quotation character.
   741			if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 {
   742				return context{
   743					state: stateError,
   744					err:   errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]),
   745				}, len(s)
   746			}
   747		}
   748		if i == len(s) {
   749			// Remain inside the attribute.
   750			// Decode the value so non-HTML rules can easily handle
   751			//     <button onclick="alert(&quot;Hi!&quot;)">
   752			// without having to entity decode token boundaries.
   753			for u := []byte(html.UnescapeString(string(s))); len(u) != 0; {
   754				c1, i1 := transitionFunc[c.state](c, u)
   755				c, u = c1, u[i1:]
   756			}
   757			return c, len(s)
   758		}
   759	
   760		element := c.element
   761	
   762		// If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS.
   763		if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) {
   764			element = elementNone
   765		}
   766	
   767		if c.delim != delimSpaceOrTagEnd {
   768			// Consume any quote.
   769			i++
   770		}
   771		// On exiting an attribute, we discard all state information
   772		// except the state and element.
   773		return context{state: stateTag, element: element}, i
   774	}
   775	
   776	// editActionNode records a change to an action pipeline for later commit.
   777	func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) {
   778		if _, ok := e.actionNodeEdits[n]; ok {
   779			panic(fmt.Sprintf("node %s shared between templates", n))
   780		}
   781		e.actionNodeEdits[n] = cmds
   782	}
   783	
   784	// editTemplateNode records a change to a {{template}} callee for later commit.
   785	func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) {
   786		if _, ok := e.templateNodeEdits[n]; ok {
   787			panic(fmt.Sprintf("node %s shared between templates", n))
   788		}
   789		e.templateNodeEdits[n] = callee
   790	}
   791	
   792	// editTextNode records a change to a text node for later commit.
   793	func (e *escaper) editTextNode(n *parse.TextNode, text []byte) {
   794		if _, ok := e.textNodeEdits[n]; ok {
   795			panic(fmt.Sprintf("node %s shared between templates", n))
   796		}
   797		e.textNodeEdits[n] = text
   798	}
   799	
   800	// commit applies changes to actions and template calls needed to contextually
   801	// autoescape content and adds any derived templates to the set.
   802	func (e *escaper) commit() {
   803		for name := range e.output {
   804			e.template(name).Funcs(funcMap)
   805		}
   806		// Any template from the name space associated with this escaper can be used
   807		// to add derived templates to the underlying text/template name space.
   808		tmpl := e.arbitraryTemplate()
   809		for _, t := range e.derived {
   810			if _, err := tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil {
   811				panic("error adding derived template")
   812			}
   813		}
   814		for n, s := range e.actionNodeEdits {
   815			ensurePipelineContains(n.Pipe, s)
   816		}
   817		for n, name := range e.templateNodeEdits {
   818			n.Name = name
   819		}
   820		for n, s := range e.textNodeEdits {
   821			n.Text = s
   822		}
   823		// Reset state that is specific to this commit so that the same changes are
   824		// not re-applied to the template on subsequent calls to commit.
   825		e.called = make(map[string]bool)
   826		e.actionNodeEdits = make(map[*parse.ActionNode][]string)
   827		e.templateNodeEdits = make(map[*parse.TemplateNode]string)
   828		e.textNodeEdits = make(map[*parse.TextNode][]byte)
   829	}
   830	
   831	// template returns the named template given a mangled template name.
   832	func (e *escaper) template(name string) *template.Template {
   833		// Any template from the name space associated with this escaper can be used
   834		// to look up templates in the underlying text/template name space.
   835		t := e.arbitraryTemplate().text.Lookup(name)
   836		if t == nil {
   837			t = e.derived[name]
   838		}
   839		return t
   840	}
   841	
   842	// arbitraryTemplate returns an arbitrary template from the name space
   843	// associated with e and panics if no templates are found.
   844	func (e *escaper) arbitraryTemplate() *Template {
   845		for _, t := range e.ns.set {
   846			return t
   847		}
   848		panic("no templates in name space")
   849	}
   850	
   851	// Forwarding functions so that clients need only import this package
   852	// to reach the general escaping functions of text/template.
   853	
   854	// HTMLEscape writes to w the escaped HTML equivalent of the plain text data b.
   855	func HTMLEscape(w io.Writer, b []byte) {
   856		template.HTMLEscape(w, b)
   857	}
   858	
   859	// HTMLEscapeString returns the escaped HTML equivalent of the plain text data s.
   860	func HTMLEscapeString(s string) string {
   861		return template.HTMLEscapeString(s)
   862	}
   863	
   864	// HTMLEscaper returns the escaped HTML equivalent of the textual
   865	// representation of its arguments.
   866	func HTMLEscaper(args ...interface{}) string {
   867		return template.HTMLEscaper(args...)
   868	}
   869	
   870	// JSEscape writes to w the escaped JavaScript equivalent of the plain text data b.
   871	func JSEscape(w io.Writer, b []byte) {
   872		template.JSEscape(w, b)
   873	}
   874	
   875	// JSEscapeString returns the escaped JavaScript equivalent of the plain text data s.
   876	func JSEscapeString(s string) string {
   877		return template.JSEscapeString(s)
   878	}
   879	
   880	// JSEscaper returns the escaped JavaScript equivalent of the textual
   881	// representation of its arguments.
   882	func JSEscaper(args ...interface{}) string {
   883		return template.JSEscaper(args...)
   884	}
   885	
   886	// URLQueryEscaper returns the escaped value of the textual representation of
   887	// its arguments in a form suitable for embedding in a URL query.
   888	func URLQueryEscaper(args ...interface{}) string {
   889		return template.URLQueryEscaper(args...)
   890	}
   891	

View as plain text