...

Source file src/vendor/golang.org/x/net/http/httpguts/httplex.go

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package httpguts
     6	
     7	import (
     8		"net"
     9		"strings"
    10		"unicode/utf8"
    11	
    12		"golang.org/x/net/idna"
    13	)
    14	
    15	var isTokenTable = [127]bool{
    16		'!':  true,
    17		'#':  true,
    18		'$':  true,
    19		'%':  true,
    20		'&':  true,
    21		'\'': true,
    22		'*':  true,
    23		'+':  true,
    24		'-':  true,
    25		'.':  true,
    26		'0':  true,
    27		'1':  true,
    28		'2':  true,
    29		'3':  true,
    30		'4':  true,
    31		'5':  true,
    32		'6':  true,
    33		'7':  true,
    34		'8':  true,
    35		'9':  true,
    36		'A':  true,
    37		'B':  true,
    38		'C':  true,
    39		'D':  true,
    40		'E':  true,
    41		'F':  true,
    42		'G':  true,
    43		'H':  true,
    44		'I':  true,
    45		'J':  true,
    46		'K':  true,
    47		'L':  true,
    48		'M':  true,
    49		'N':  true,
    50		'O':  true,
    51		'P':  true,
    52		'Q':  true,
    53		'R':  true,
    54		'S':  true,
    55		'T':  true,
    56		'U':  true,
    57		'W':  true,
    58		'V':  true,
    59		'X':  true,
    60		'Y':  true,
    61		'Z':  true,
    62		'^':  true,
    63		'_':  true,
    64		'`':  true,
    65		'a':  true,
    66		'b':  true,
    67		'c':  true,
    68		'd':  true,
    69		'e':  true,
    70		'f':  true,
    71		'g':  true,
    72		'h':  true,
    73		'i':  true,
    74		'j':  true,
    75		'k':  true,
    76		'l':  true,
    77		'm':  true,
    78		'n':  true,
    79		'o':  true,
    80		'p':  true,
    81		'q':  true,
    82		'r':  true,
    83		's':  true,
    84		't':  true,
    85		'u':  true,
    86		'v':  true,
    87		'w':  true,
    88		'x':  true,
    89		'y':  true,
    90		'z':  true,
    91		'|':  true,
    92		'~':  true,
    93	}
    94	
    95	func IsTokenRune(r rune) bool {
    96		i := int(r)
    97		return i < len(isTokenTable) && isTokenTable[i]
    98	}
    99	
   100	func isNotToken(r rune) bool {
   101		return !IsTokenRune(r)
   102	}
   103	
   104	// HeaderValuesContainsToken reports whether any string in values
   105	// contains the provided token, ASCII case-insensitively.
   106	func HeaderValuesContainsToken(values []string, token string) bool {
   107		for _, v := range values {
   108			if headerValueContainsToken(v, token) {
   109				return true
   110			}
   111		}
   112		return false
   113	}
   114	
   115	// isOWS reports whether b is an optional whitespace byte, as defined
   116	// by RFC 7230 section 3.2.3.
   117	func isOWS(b byte) bool { return b == ' ' || b == '\t' }
   118	
   119	// trimOWS returns x with all optional whitespace removes from the
   120	// beginning and end.
   121	func trimOWS(x string) string {
   122		// TODO: consider using strings.Trim(x, " \t") instead,
   123		// if and when it's fast enough. See issue 10292.
   124		// But this ASCII-only code will probably always beat UTF-8
   125		// aware code.
   126		for len(x) > 0 && isOWS(x[0]) {
   127			x = x[1:]
   128		}
   129		for len(x) > 0 && isOWS(x[len(x)-1]) {
   130			x = x[:len(x)-1]
   131		}
   132		return x
   133	}
   134	
   135	// headerValueContainsToken reports whether v (assumed to be a
   136	// 0#element, in the ABNF extension described in RFC 7230 section 7)
   137	// contains token amongst its comma-separated tokens, ASCII
   138	// case-insensitively.
   139	func headerValueContainsToken(v string, token string) bool {
   140		v = trimOWS(v)
   141		if comma := strings.IndexByte(v, ','); comma != -1 {
   142			return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token)
   143		}
   144		return tokenEqual(v, token)
   145	}
   146	
   147	// lowerASCII returns the ASCII lowercase version of b.
   148	func lowerASCII(b byte) byte {
   149		if 'A' <= b && b <= 'Z' {
   150			return b + ('a' - 'A')
   151		}
   152		return b
   153	}
   154	
   155	// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
   156	func tokenEqual(t1, t2 string) bool {
   157		if len(t1) != len(t2) {
   158			return false
   159		}
   160		for i, b := range t1 {
   161			if b >= utf8.RuneSelf {
   162				// No UTF-8 or non-ASCII allowed in tokens.
   163				return false
   164			}
   165			if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
   166				return false
   167			}
   168		}
   169		return true
   170	}
   171	
   172	// isLWS reports whether b is linear white space, according
   173	// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
   174	//      LWS            = [CRLF] 1*( SP | HT )
   175	func isLWS(b byte) bool { return b == ' ' || b == '\t' }
   176	
   177	// isCTL reports whether b is a control byte, according
   178	// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
   179	//      CTL            = <any US-ASCII control character
   180	//                       (octets 0 - 31) and DEL (127)>
   181	func isCTL(b byte) bool {
   182		const del = 0x7f // a CTL
   183		return b < ' ' || b == del
   184	}
   185	
   186	// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
   187	// HTTP/2 imposes the additional restriction that uppercase ASCII
   188	// letters are not allowed.
   189	//
   190	//  RFC 7230 says:
   191	//   header-field   = field-name ":" OWS field-value OWS
   192	//   field-name     = token
   193	//   token          = 1*tchar
   194	//   tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
   195	//           "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
   196	func ValidHeaderFieldName(v string) bool {
   197		if len(v) == 0 {
   198			return false
   199		}
   200		for _, r := range v {
   201			if !IsTokenRune(r) {
   202				return false
   203			}
   204		}
   205		return true
   206	}
   207	
   208	// ValidHostHeader reports whether h is a valid host header.
   209	func ValidHostHeader(h string) bool {
   210		// The latest spec is actually this:
   211		//
   212		// http://tools.ietf.org/html/rfc7230#section-5.4
   213		//     Host = uri-host [ ":" port ]
   214		//
   215		// Where uri-host is:
   216		//     http://tools.ietf.org/html/rfc3986#section-3.2.2
   217		//
   218		// But we're going to be much more lenient for now and just
   219		// search for any byte that's not a valid byte in any of those
   220		// expressions.
   221		for i := 0; i < len(h); i++ {
   222			if !validHostByte[h[i]] {
   223				return false
   224			}
   225		}
   226		return true
   227	}
   228	
   229	// See the validHostHeader comment.
   230	var validHostByte = [256]bool{
   231		'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
   232		'8': true, '9': true,
   233	
   234		'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
   235		'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
   236		'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
   237		'y': true, 'z': true,
   238	
   239		'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
   240		'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
   241		'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
   242		'Y': true, 'Z': true,
   243	
   244		'!':  true, // sub-delims
   245		'$':  true, // sub-delims
   246		'%':  true, // pct-encoded (and used in IPv6 zones)
   247		'&':  true, // sub-delims
   248		'(':  true, // sub-delims
   249		')':  true, // sub-delims
   250		'*':  true, // sub-delims
   251		'+':  true, // sub-delims
   252		',':  true, // sub-delims
   253		'-':  true, // unreserved
   254		'.':  true, // unreserved
   255		':':  true, // IPv6address + Host expression's optional port
   256		';':  true, // sub-delims
   257		'=':  true, // sub-delims
   258		'[':  true,
   259		'\'': true, // sub-delims
   260		']':  true,
   261		'_':  true, // unreserved
   262		'~':  true, // unreserved
   263	}
   264	
   265	// ValidHeaderFieldValue reports whether v is a valid "field-value" according to
   266	// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
   267	//
   268	//        message-header = field-name ":" [ field-value ]
   269	//        field-value    = *( field-content | LWS )
   270	//        field-content  = <the OCTETs making up the field-value
   271	//                         and consisting of either *TEXT or combinations
   272	//                         of token, separators, and quoted-string>
   273	//
   274	// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
   275	//
   276	//        TEXT           = <any OCTET except CTLs,
   277	//                          but including LWS>
   278	//        LWS            = [CRLF] 1*( SP | HT )
   279	//        CTL            = <any US-ASCII control character
   280	//                         (octets 0 - 31) and DEL (127)>
   281	//
   282	// RFC 7230 says:
   283	//  field-value    = *( field-content / obs-fold )
   284	//  obj-fold       =  N/A to http2, and deprecated
   285	//  field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
   286	//  field-vchar    = VCHAR / obs-text
   287	//  obs-text       = %x80-FF
   288	//  VCHAR          = "any visible [USASCII] character"
   289	//
   290	// http2 further says: "Similarly, HTTP/2 allows header field values
   291	// that are not valid. While most of the values that can be encoded
   292	// will not alter header field parsing, carriage return (CR, ASCII
   293	// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
   294	// 0x0) might be exploited by an attacker if they are translated
   295	// verbatim. Any request or response that contains a character not
   296	// permitted in a header field value MUST be treated as malformed
   297	// (Section 8.1.2.6). Valid characters are defined by the
   298	// field-content ABNF rule in Section 3.2 of [RFC7230]."
   299	//
   300	// This function does not (yet?) properly handle the rejection of
   301	// strings that begin or end with SP or HTAB.
   302	func ValidHeaderFieldValue(v string) bool {
   303		for i := 0; i < len(v); i++ {
   304			b := v[i]
   305			if isCTL(b) && !isLWS(b) {
   306				return false
   307			}
   308		}
   309		return true
   310	}
   311	
   312	func isASCII(s string) bool {
   313		for i := 0; i < len(s); i++ {
   314			if s[i] >= utf8.RuneSelf {
   315				return false
   316			}
   317		}
   318		return true
   319	}
   320	
   321	// PunycodeHostPort returns the IDNA Punycode version
   322	// of the provided "host" or "host:port" string.
   323	func PunycodeHostPort(v string) (string, error) {
   324		if isASCII(v) {
   325			return v, nil
   326		}
   327	
   328		host, port, err := net.SplitHostPort(v)
   329		if err != nil {
   330			// The input 'v' argument was just a "host" argument,
   331			// without a port. This error should not be returned
   332			// to the caller.
   333			host = v
   334			port = ""
   335		}
   336		host, err = idna.ToASCII(host)
   337		if err != nil {
   338			// Non-UTF-8? Not representable in Punycode, in any
   339			// case.
   340			return "", err
   341		}
   342		if port == "" {
   343			return host, nil
   344		}
   345		return net.JoinHostPort(host, port), nil
   346	}
   347	

View as plain text