...

Source file src/pkg/net/url/url.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package url parses URLs and implements query escaping.
     6	package url
     7	
     8	// See RFC 3986. This package generally follows RFC 3986, except where
     9	// it deviates for compatibility reasons. When sending changes, first
    10	// search old issues for history on decisions. Unit tests should also
    11	// contain references to issue numbers with details.
    12	
    13	import (
    14		"errors"
    15		"fmt"
    16		"sort"
    17		"strconv"
    18		"strings"
    19	)
    20	
    21	// Error reports an error and the operation and URL that caused it.
    22	type Error struct {
    23		Op  string
    24		URL string
    25		Err error
    26	}
    27	
    28	func (e *Error) Unwrap() error { return e.Err }
    29	func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
    30	
    31	func (e *Error) Timeout() bool {
    32		t, ok := e.Err.(interface {
    33			Timeout() bool
    34		})
    35		return ok && t.Timeout()
    36	}
    37	
    38	func (e *Error) Temporary() bool {
    39		t, ok := e.Err.(interface {
    40			Temporary() bool
    41		})
    42		return ok && t.Temporary()
    43	}
    44	
    45	func ishex(c byte) bool {
    46		switch {
    47		case '0' <= c && c <= '9':
    48			return true
    49		case 'a' <= c && c <= 'f':
    50			return true
    51		case 'A' <= c && c <= 'F':
    52			return true
    53		}
    54		return false
    55	}
    56	
    57	func unhex(c byte) byte {
    58		switch {
    59		case '0' <= c && c <= '9':
    60			return c - '0'
    61		case 'a' <= c && c <= 'f':
    62			return c - 'a' + 10
    63		case 'A' <= c && c <= 'F':
    64			return c - 'A' + 10
    65		}
    66		return 0
    67	}
    68	
    69	type encoding int
    70	
    71	const (
    72		encodePath encoding = 1 + iota
    73		encodePathSegment
    74		encodeHost
    75		encodeZone
    76		encodeUserPassword
    77		encodeQueryComponent
    78		encodeFragment
    79	)
    80	
    81	type EscapeError string
    82	
    83	func (e EscapeError) Error() string {
    84		return "invalid URL escape " + strconv.Quote(string(e))
    85	}
    86	
    87	type InvalidHostError string
    88	
    89	func (e InvalidHostError) Error() string {
    90		return "invalid character " + strconv.Quote(string(e)) + " in host name"
    91	}
    92	
    93	// Return true if the specified character should be escaped when
    94	// appearing in a URL string, according to RFC 3986.
    95	//
    96	// Please be informed that for now shouldEscape does not check all
    97	// reserved characters correctly. See golang.org/issue/5684.
    98	func shouldEscape(c byte, mode encoding) bool {
    99		// §2.3 Unreserved characters (alphanum)
   100		if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
   101			return false
   102		}
   103	
   104		if mode == encodeHost || mode == encodeZone {
   105			// §3.2.2 Host allows
   106			//	sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
   107			// as part of reg-name.
   108			// We add : because we include :port as part of host.
   109			// We add [ ] because we include [ipv6]:port as part of host.
   110			// We add < > because they're the only characters left that
   111			// we could possibly allow, and Parse will reject them if we
   112			// escape them (because hosts can't use %-encoding for
   113			// ASCII bytes).
   114			switch c {
   115			case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
   116				return false
   117			}
   118		}
   119	
   120		switch c {
   121		case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
   122			return false
   123	
   124		case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
   125			// Different sections of the URL allow a few of
   126			// the reserved characters to appear unescaped.
   127			switch mode {
   128			case encodePath: // §3.3
   129				// The RFC allows : @ & = + $ but saves / ; , for assigning
   130				// meaning to individual path segments. This package
   131				// only manipulates the path as a whole, so we allow those
   132				// last three as well. That leaves only ? to escape.
   133				return c == '?'
   134	
   135			case encodePathSegment: // §3.3
   136				// The RFC allows : @ & = + $ but saves / ; , for assigning
   137				// meaning to individual path segments.
   138				return c == '/' || c == ';' || c == ',' || c == '?'
   139	
   140			case encodeUserPassword: // §3.2.1
   141				// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
   142				// userinfo, so we must escape only '@', '/', and '?'.
   143				// The parsing of userinfo treats ':' as special so we must escape
   144				// that too.
   145				return c == '@' || c == '/' || c == '?' || c == ':'
   146	
   147			case encodeQueryComponent: // §3.4
   148				// The RFC reserves (so we must escape) everything.
   149				return true
   150	
   151			case encodeFragment: // §4.1
   152				// The RFC text is silent but the grammar allows
   153				// everything, so escape nothing.
   154				return false
   155			}
   156		}
   157	
   158		if mode == encodeFragment {
   159			// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
   160			// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
   161			// need to be escaped. To minimize potential breakage, we apply two restrictions:
   162			// (1) we always escape sub-delims outside of the fragment, and (2) we always
   163			// escape single quote to avoid breaking callers that had previously assumed that
   164			// single quotes would be escaped. See issue #19917.
   165			switch c {
   166			case '!', '(', ')', '*':
   167				return false
   168			}
   169		}
   170	
   171		// Everything else must be escaped.
   172		return true
   173	}
   174	
   175	// QueryUnescape does the inverse transformation of QueryEscape,
   176	// converting each 3-byte encoded substring of the form "%AB" into the
   177	// hex-decoded byte 0xAB.
   178	// It returns an error if any % is not followed by two hexadecimal
   179	// digits.
   180	func QueryUnescape(s string) (string, error) {
   181		return unescape(s, encodeQueryComponent)
   182	}
   183	
   184	// PathUnescape does the inverse transformation of PathEscape,
   185	// converting each 3-byte encoded substring of the form "%AB" into the
   186	// hex-decoded byte 0xAB. It returns an error if any % is not followed
   187	// by two hexadecimal digits.
   188	//
   189	// PathUnescape is identical to QueryUnescape except that it does not
   190	// unescape '+' to ' ' (space).
   191	func PathUnescape(s string) (string, error) {
   192		return unescape(s, encodePathSegment)
   193	}
   194	
   195	// unescape unescapes a string; the mode specifies
   196	// which section of the URL string is being unescaped.
   197	func unescape(s string, mode encoding) (string, error) {
   198		// Count %, check that they're well-formed.
   199		n := 0
   200		hasPlus := false
   201		for i := 0; i < len(s); {
   202			switch s[i] {
   203			case '%':
   204				n++
   205				if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   206					s = s[i:]
   207					if len(s) > 3 {
   208						s = s[:3]
   209					}
   210					return "", EscapeError(s)
   211				}
   212				// Per https://tools.ietf.org/html/rfc3986#page-21
   213				// in the host component %-encoding can only be used
   214				// for non-ASCII bytes.
   215				// But https://tools.ietf.org/html/rfc6874#section-2
   216				// introduces %25 being allowed to escape a percent sign
   217				// in IPv6 scoped-address literals. Yay.
   218				if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" {
   219					return "", EscapeError(s[i : i+3])
   220				}
   221				if mode == encodeZone {
   222					// RFC 6874 says basically "anything goes" for zone identifiers
   223					// and that even non-ASCII can be redundantly escaped,
   224					// but it seems prudent to restrict %-escaped bytes here to those
   225					// that are valid host name bytes in their unescaped form.
   226					// That is, you can use escaping in the zone identifier but not
   227					// to introduce bytes you couldn't just write directly.
   228					// But Windows puts spaces here! Yay.
   229					v := unhex(s[i+1])<<4 | unhex(s[i+2])
   230					if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) {
   231						return "", EscapeError(s[i : i+3])
   232					}
   233				}
   234				i += 3
   235			case '+':
   236				hasPlus = mode == encodeQueryComponent
   237				i++
   238			default:
   239				if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) {
   240					return "", InvalidHostError(s[i : i+1])
   241				}
   242				i++
   243			}
   244		}
   245	
   246		if n == 0 && !hasPlus {
   247			return s, nil
   248		}
   249	
   250		var t strings.Builder
   251		t.Grow(len(s) - 2*n)
   252		for i := 0; i < len(s); i++ {
   253			switch s[i] {
   254			case '%':
   255				t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2]))
   256				i += 2
   257			case '+':
   258				if mode == encodeQueryComponent {
   259					t.WriteByte(' ')
   260				} else {
   261					t.WriteByte('+')
   262				}
   263			default:
   264				t.WriteByte(s[i])
   265			}
   266		}
   267		return t.String(), nil
   268	}
   269	
   270	// QueryEscape escapes the string so it can be safely placed
   271	// inside a URL query.
   272	func QueryEscape(s string) string {
   273		return escape(s, encodeQueryComponent)
   274	}
   275	
   276	// PathEscape escapes the string so it can be safely placed inside a URL path segment,
   277	// replacing special characters (including /) with %XX sequences as needed.
   278	func PathEscape(s string) string {
   279		return escape(s, encodePathSegment)
   280	}
   281	
   282	func escape(s string, mode encoding) string {
   283		spaceCount, hexCount := 0, 0
   284		for i := 0; i < len(s); i++ {
   285			c := s[i]
   286			if shouldEscape(c, mode) {
   287				if c == ' ' && mode == encodeQueryComponent {
   288					spaceCount++
   289				} else {
   290					hexCount++
   291				}
   292			}
   293		}
   294	
   295		if spaceCount == 0 && hexCount == 0 {
   296			return s
   297		}
   298	
   299		var buf [64]byte
   300		var t []byte
   301	
   302		required := len(s) + 2*hexCount
   303		if required <= len(buf) {
   304			t = buf[:required]
   305		} else {
   306			t = make([]byte, required)
   307		}
   308	
   309		if hexCount == 0 {
   310			copy(t, s)
   311			for i := 0; i < len(s); i++ {
   312				if s[i] == ' ' {
   313					t[i] = '+'
   314				}
   315			}
   316			return string(t)
   317		}
   318	
   319		j := 0
   320		for i := 0; i < len(s); i++ {
   321			switch c := s[i]; {
   322			case c == ' ' && mode == encodeQueryComponent:
   323				t[j] = '+'
   324				j++
   325			case shouldEscape(c, mode):
   326				t[j] = '%'
   327				t[j+1] = "0123456789ABCDEF"[c>>4]
   328				t[j+2] = "0123456789ABCDEF"[c&15]
   329				j += 3
   330			default:
   331				t[j] = s[i]
   332				j++
   333			}
   334		}
   335		return string(t)
   336	}
   337	
   338	// A URL represents a parsed URL (technically, a URI reference).
   339	//
   340	// The general form represented is:
   341	//
   342	//	[scheme:][//[userinfo@]host][/]path[?query][#fragment]
   343	//
   344	// URLs that do not start with a slash after the scheme are interpreted as:
   345	//
   346	//	scheme:opaque[?query][#fragment]
   347	//
   348	// Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
   349	// A consequence is that it is impossible to tell which slashes in the Path were
   350	// slashes in the raw URL and which were %2f. This distinction is rarely important,
   351	// but when it is, the code should use RawPath, an optional field which only gets
   352	// set if the default encoding is different from Path.
   353	//
   354	// URL's String method uses the EscapedPath method to obtain the path. See the
   355	// EscapedPath method for more details.
   356	type URL struct {
   357		Scheme     string
   358		Opaque     string    // encoded opaque data
   359		User       *Userinfo // username and password information
   360		Host       string    // host or host:port
   361		Path       string    // path (relative paths may omit leading slash)
   362		RawPath    string    // encoded path hint (see EscapedPath method)
   363		ForceQuery bool      // append a query ('?') even if RawQuery is empty
   364		RawQuery   string    // encoded query values, without '?'
   365		Fragment   string    // fragment for references, without '#'
   366	}
   367	
   368	// User returns a Userinfo containing the provided username
   369	// and no password set.
   370	func User(username string) *Userinfo {
   371		return &Userinfo{username, "", false}
   372	}
   373	
   374	// UserPassword returns a Userinfo containing the provided username
   375	// and password.
   376	//
   377	// This functionality should only be used with legacy web sites.
   378	// RFC 2396 warns that interpreting Userinfo this way
   379	// ``is NOT RECOMMENDED, because the passing of authentication
   380	// information in clear text (such as URI) has proven to be a
   381	// security risk in almost every case where it has been used.''
   382	func UserPassword(username, password string) *Userinfo {
   383		return &Userinfo{username, password, true}
   384	}
   385	
   386	// The Userinfo type is an immutable encapsulation of username and
   387	// password details for a URL. An existing Userinfo value is guaranteed
   388	// to have a username set (potentially empty, as allowed by RFC 2396),
   389	// and optionally a password.
   390	type Userinfo struct {
   391		username    string
   392		password    string
   393		passwordSet bool
   394	}
   395	
   396	// Username returns the username.
   397	func (u *Userinfo) Username() string {
   398		if u == nil {
   399			return ""
   400		}
   401		return u.username
   402	}
   403	
   404	// Password returns the password in case it is set, and whether it is set.
   405	func (u *Userinfo) Password() (string, bool) {
   406		if u == nil {
   407			return "", false
   408		}
   409		return u.password, u.passwordSet
   410	}
   411	
   412	// String returns the encoded userinfo information in the standard form
   413	// of "username[:password]".
   414	func (u *Userinfo) String() string {
   415		if u == nil {
   416			return ""
   417		}
   418		s := escape(u.username, encodeUserPassword)
   419		if u.passwordSet {
   420			s += ":" + escape(u.password, encodeUserPassword)
   421		}
   422		return s
   423	}
   424	
   425	// Maybe rawurl is of the form scheme:path.
   426	// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
   427	// If so, return scheme, path; else return "", rawurl.
   428	func getscheme(rawurl string) (scheme, path string, err error) {
   429		for i := 0; i < len(rawurl); i++ {
   430			c := rawurl[i]
   431			switch {
   432			case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
   433			// do nothing
   434			case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
   435				if i == 0 {
   436					return "", rawurl, nil
   437				}
   438			case c == ':':
   439				if i == 0 {
   440					return "", "", errors.New("missing protocol scheme")
   441				}
   442				return rawurl[:i], rawurl[i+1:], nil
   443			default:
   444				// we have encountered an invalid character,
   445				// so there is no valid scheme
   446				return "", rawurl, nil
   447			}
   448		}
   449		return "", rawurl, nil
   450	}
   451	
   452	// Maybe s is of the form t c u.
   453	// If so, return t, c u (or t, u if cutc == true).
   454	// If not, return s, "".
   455	func split(s string, c string, cutc bool) (string, string) {
   456		i := strings.Index(s, c)
   457		if i < 0 {
   458			return s, ""
   459		}
   460		if cutc {
   461			return s[:i], s[i+len(c):]
   462		}
   463		return s[:i], s[i:]
   464	}
   465	
   466	// Parse parses rawurl into a URL structure.
   467	//
   468	// The rawurl may be relative (a path, without a host) or absolute
   469	// (starting with a scheme). Trying to parse a hostname and path
   470	// without a scheme is invalid but may not necessarily return an
   471	// error, due to parsing ambiguities.
   472	func Parse(rawurl string) (*URL, error) {
   473		// Cut off #frag
   474		u, frag := split(rawurl, "#", true)
   475		url, err := parse(u, false)
   476		if err != nil {
   477			return nil, &Error{"parse", u, err}
   478		}
   479		if frag == "" {
   480			return url, nil
   481		}
   482		if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
   483			return nil, &Error{"parse", rawurl, err}
   484		}
   485		return url, nil
   486	}
   487	
   488	// ParseRequestURI parses rawurl into a URL structure. It assumes that
   489	// rawurl was received in an HTTP request, so the rawurl is interpreted
   490	// only as an absolute URI or an absolute path.
   491	// The string rawurl is assumed not to have a #fragment suffix.
   492	// (Web browsers strip #fragment before sending the URL to a web server.)
   493	func ParseRequestURI(rawurl string) (*URL, error) {
   494		url, err := parse(rawurl, true)
   495		if err != nil {
   496			return nil, &Error{"parse", rawurl, err}
   497		}
   498		return url, nil
   499	}
   500	
   501	// parse parses a URL from a string in one of two contexts. If
   502	// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
   503	// in which case only absolute URLs or path-absolute relative URLs are allowed.
   504	// If viaRequest is false, all forms of relative URLs are allowed.
   505	func parse(rawurl string, viaRequest bool) (*URL, error) {
   506		var rest string
   507		var err error
   508	
   509		if stringContainsCTLByte(rawurl) {
   510			return nil, errors.New("net/url: invalid control character in URL")
   511		}
   512	
   513		if rawurl == "" && viaRequest {
   514			return nil, errors.New("empty url")
   515		}
   516		url := new(URL)
   517	
   518		if rawurl == "*" {
   519			url.Path = "*"
   520			return url, nil
   521		}
   522	
   523		// Split off possible leading "http:", "mailto:", etc.
   524		// Cannot contain escaped characters.
   525		if url.Scheme, rest, err = getscheme(rawurl); err != nil {
   526			return nil, err
   527		}
   528		url.Scheme = strings.ToLower(url.Scheme)
   529	
   530		if strings.HasSuffix(rest, "?") && strings.Count(rest, "?") == 1 {
   531			url.ForceQuery = true
   532			rest = rest[:len(rest)-1]
   533		} else {
   534			rest, url.RawQuery = split(rest, "?", true)
   535		}
   536	
   537		if !strings.HasPrefix(rest, "/") {
   538			if url.Scheme != "" {
   539				// We consider rootless paths per RFC 3986 as opaque.
   540				url.Opaque = rest
   541				return url, nil
   542			}
   543			if viaRequest {
   544				return nil, errors.New("invalid URI for request")
   545			}
   546	
   547			// Avoid confusion with malformed schemes, like cache_object:foo/bar.
   548			// See golang.org/issue/16822.
   549			//
   550			// RFC 3986, §3.3:
   551			// In addition, a URI reference (Section 4.1) may be a relative-path reference,
   552			// in which case the first path segment cannot contain a colon (":") character.
   553			colon := strings.Index(rest, ":")
   554			slash := strings.Index(rest, "/")
   555			if colon >= 0 && (slash < 0 || colon < slash) {
   556				// First path segment has colon. Not allowed in relative URL.
   557				return nil, errors.New("first path segment in URL cannot contain colon")
   558			}
   559		}
   560	
   561		if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
   562			var authority string
   563			authority, rest = split(rest[2:], "/", false)
   564			url.User, url.Host, err = parseAuthority(authority)
   565			if err != nil {
   566				return nil, err
   567			}
   568		}
   569		// Set Path and, optionally, RawPath.
   570		// RawPath is a hint of the encoding of Path. We don't want to set it if
   571		// the default escaping of Path is equivalent, to help make sure that people
   572		// don't rely on it in general.
   573		if err := url.setPath(rest); err != nil {
   574			return nil, err
   575		}
   576		return url, nil
   577	}
   578	
   579	func parseAuthority(authority string) (user *Userinfo, host string, err error) {
   580		i := strings.LastIndex(authority, "@")
   581		if i < 0 {
   582			host, err = parseHost(authority)
   583		} else {
   584			host, err = parseHost(authority[i+1:])
   585		}
   586		if err != nil {
   587			return nil, "", err
   588		}
   589		if i < 0 {
   590			return nil, host, nil
   591		}
   592		userinfo := authority[:i]
   593		if !validUserinfo(userinfo) {
   594			return nil, "", errors.New("net/url: invalid userinfo")
   595		}
   596		if !strings.Contains(userinfo, ":") {
   597			if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
   598				return nil, "", err
   599			}
   600			user = User(userinfo)
   601		} else {
   602			username, password := split(userinfo, ":", true)
   603			if username, err = unescape(username, encodeUserPassword); err != nil {
   604				return nil, "", err
   605			}
   606			if password, err = unescape(password, encodeUserPassword); err != nil {
   607				return nil, "", err
   608			}
   609			user = UserPassword(username, password)
   610		}
   611		return user, host, nil
   612	}
   613	
   614	// parseHost parses host as an authority without user
   615	// information. That is, as host[:port].
   616	func parseHost(host string) (string, error) {
   617		if strings.HasPrefix(host, "[") {
   618			// Parse an IP-Literal in RFC 3986 and RFC 6874.
   619			// E.g., "[fe80::1]", "[fe80::1%25en0]", "[fe80::1]:80".
   620			i := strings.LastIndex(host, "]")
   621			if i < 0 {
   622				return "", errors.New("missing ']' in host")
   623			}
   624			colonPort := host[i+1:]
   625			if !validOptionalPort(colonPort) {
   626				return "", fmt.Errorf("invalid port %q after host", colonPort)
   627			}
   628	
   629			// RFC 6874 defines that %25 (%-encoded percent) introduces
   630			// the zone identifier, and the zone identifier can use basically
   631			// any %-encoding it likes. That's different from the host, which
   632			// can only %-encode non-ASCII bytes.
   633			// We do impose some restrictions on the zone, to avoid stupidity
   634			// like newlines.
   635			zone := strings.Index(host[:i], "%25")
   636			if zone >= 0 {
   637				host1, err := unescape(host[:zone], encodeHost)
   638				if err != nil {
   639					return "", err
   640				}
   641				host2, err := unescape(host[zone:i], encodeZone)
   642				if err != nil {
   643					return "", err
   644				}
   645				host3, err := unescape(host[i:], encodeHost)
   646				if err != nil {
   647					return "", err
   648				}
   649				return host1 + host2 + host3, nil
   650			}
   651		} else if i := strings.LastIndex(host, ":"); i != -1 {
   652			colonPort := host[i:]
   653			if !validOptionalPort(colonPort) {
   654				return "", fmt.Errorf("invalid port %q after host", colonPort)
   655			}
   656		}
   657	
   658		var err error
   659		if host, err = unescape(host, encodeHost); err != nil {
   660			return "", err
   661		}
   662		return host, nil
   663	}
   664	
   665	// setPath sets the Path and RawPath fields of the URL based on the provided
   666	// escaped path p. It maintains the invariant that RawPath is only specified
   667	// when it differs from the default encoding of the path.
   668	// For example:
   669	// - setPath("/foo/bar")   will set Path="/foo/bar" and RawPath=""
   670	// - setPath("/foo%2fbar") will set Path="/foo/bar" and RawPath="/foo%2fbar"
   671	// setPath will return an error only if the provided path contains an invalid
   672	// escaping.
   673	func (u *URL) setPath(p string) error {
   674		path, err := unescape(p, encodePath)
   675		if err != nil {
   676			return err
   677		}
   678		u.Path = path
   679		if escp := escape(path, encodePath); p == escp {
   680			// Default encoding is fine.
   681			u.RawPath = ""
   682		} else {
   683			u.RawPath = p
   684		}
   685		return nil
   686	}
   687	
   688	// EscapedPath returns the escaped form of u.Path.
   689	// In general there are multiple possible escaped forms of any path.
   690	// EscapedPath returns u.RawPath when it is a valid escaping of u.Path.
   691	// Otherwise EscapedPath ignores u.RawPath and computes an escaped
   692	// form on its own.
   693	// The String and RequestURI methods use EscapedPath to construct
   694	// their results.
   695	// In general, code should call EscapedPath instead of
   696	// reading u.RawPath directly.
   697	func (u *URL) EscapedPath() string {
   698		if u.RawPath != "" && validEncodedPath(u.RawPath) {
   699			p, err := unescape(u.RawPath, encodePath)
   700			if err == nil && p == u.Path {
   701				return u.RawPath
   702			}
   703		}
   704		if u.Path == "*" {
   705			return "*" // don't escape (Issue 11202)
   706		}
   707		return escape(u.Path, encodePath)
   708	}
   709	
   710	// validEncodedPath reports whether s is a valid encoded path.
   711	// It must not contain any bytes that require escaping during path encoding.
   712	func validEncodedPath(s string) bool {
   713		for i := 0; i < len(s); i++ {
   714			// RFC 3986, Appendix A.
   715			// pchar = unreserved / pct-encoded / sub-delims / ":" / "@".
   716			// shouldEscape is not quite compliant with the RFC,
   717			// so we check the sub-delims ourselves and let
   718			// shouldEscape handle the others.
   719			switch s[i] {
   720			case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '@':
   721				// ok
   722			case '[', ']':
   723				// ok - not specified in RFC 3986 but left alone by modern browsers
   724			case '%':
   725				// ok - percent encoded, will decode
   726			default:
   727				if shouldEscape(s[i], encodePath) {
   728					return false
   729				}
   730			}
   731		}
   732		return true
   733	}
   734	
   735	// validOptionalPort reports whether port is either an empty string
   736	// or matches /^:\d*$/
   737	func validOptionalPort(port string) bool {
   738		if port == "" {
   739			return true
   740		}
   741		if port[0] != ':' {
   742			return false
   743		}
   744		for _, b := range port[1:] {
   745			if b < '0' || b > '9' {
   746				return false
   747			}
   748		}
   749		return true
   750	}
   751	
   752	// String reassembles the URL into a valid URL string.
   753	// The general form of the result is one of:
   754	//
   755	//	scheme:opaque?query#fragment
   756	//	scheme://userinfo@host/path?query#fragment
   757	//
   758	// If u.Opaque is non-empty, String uses the first form;
   759	// otherwise it uses the second form.
   760	// Any non-ASCII characters in host are escaped.
   761	// To obtain the path, String uses u.EscapedPath().
   762	//
   763	// In the second form, the following rules apply:
   764	//	- if u.Scheme is empty, scheme: is omitted.
   765	//	- if u.User is nil, userinfo@ is omitted.
   766	//	- if u.Host is empty, host/ is omitted.
   767	//	- if u.Scheme and u.Host are empty and u.User is nil,
   768	//	   the entire scheme://userinfo@host/ is omitted.
   769	//	- if u.Host is non-empty and u.Path begins with a /,
   770	//	   the form host/path does not add its own /.
   771	//	- if u.RawQuery is empty, ?query is omitted.
   772	//	- if u.Fragment is empty, #fragment is omitted.
   773	func (u *URL) String() string {
   774		var buf strings.Builder
   775		if u.Scheme != "" {
   776			buf.WriteString(u.Scheme)
   777			buf.WriteByte(':')
   778		}
   779		if u.Opaque != "" {
   780			buf.WriteString(u.Opaque)
   781		} else {
   782			if u.Scheme != "" || u.Host != "" || u.User != nil {
   783				if u.Host != "" || u.Path != "" || u.User != nil {
   784					buf.WriteString("//")
   785				}
   786				if ui := u.User; ui != nil {
   787					buf.WriteString(ui.String())
   788					buf.WriteByte('@')
   789				}
   790				if h := u.Host; h != "" {
   791					buf.WriteString(escape(h, encodeHost))
   792				}
   793			}
   794			path := u.EscapedPath()
   795			if path != "" && path[0] != '/' && u.Host != "" {
   796				buf.WriteByte('/')
   797			}
   798			if buf.Len() == 0 {
   799				// RFC 3986 §4.2
   800				// A path segment that contains a colon character (e.g., "this:that")
   801				// cannot be used as the first segment of a relative-path reference, as
   802				// it would be mistaken for a scheme name. Such a segment must be
   803				// preceded by a dot-segment (e.g., "./this:that") to make a relative-
   804				// path reference.
   805				if i := strings.IndexByte(path, ':'); i > -1 && strings.IndexByte(path[:i], '/') == -1 {
   806					buf.WriteString("./")
   807				}
   808			}
   809			buf.WriteString(path)
   810		}
   811		if u.ForceQuery || u.RawQuery != "" {
   812			buf.WriteByte('?')
   813			buf.WriteString(u.RawQuery)
   814		}
   815		if u.Fragment != "" {
   816			buf.WriteByte('#')
   817			buf.WriteString(escape(u.Fragment, encodeFragment))
   818		}
   819		return buf.String()
   820	}
   821	
   822	// Values maps a string key to a list of values.
   823	// It is typically used for query parameters and form values.
   824	// Unlike in the http.Header map, the keys in a Values map
   825	// are case-sensitive.
   826	type Values map[string][]string
   827	
   828	// Get gets the first value associated with the given key.
   829	// If there are no values associated with the key, Get returns
   830	// the empty string. To access multiple values, use the map
   831	// directly.
   832	func (v Values) Get(key string) string {
   833		if v == nil {
   834			return ""
   835		}
   836		vs := v[key]
   837		if len(vs) == 0 {
   838			return ""
   839		}
   840		return vs[0]
   841	}
   842	
   843	// Set sets the key to value. It replaces any existing
   844	// values.
   845	func (v Values) Set(key, value string) {
   846		v[key] = []string{value}
   847	}
   848	
   849	// Add adds the value to key. It appends to any existing
   850	// values associated with key.
   851	func (v Values) Add(key, value string) {
   852		v[key] = append(v[key], value)
   853	}
   854	
   855	// Del deletes the values associated with key.
   856	func (v Values) Del(key string) {
   857		delete(v, key)
   858	}
   859	
   860	// ParseQuery parses the URL-encoded query string and returns
   861	// a map listing the values specified for each key.
   862	// ParseQuery always returns a non-nil map containing all the
   863	// valid query parameters found; err describes the first decoding error
   864	// encountered, if any.
   865	//
   866	// Query is expected to be a list of key=value settings separated by
   867	// ampersands or semicolons. A setting without an equals sign is
   868	// interpreted as a key set to an empty value.
   869	func ParseQuery(query string) (Values, error) {
   870		m := make(Values)
   871		err := parseQuery(m, query)
   872		return m, err
   873	}
   874	
   875	func parseQuery(m Values, query string) (err error) {
   876		for query != "" {
   877			key := query
   878			if i := strings.IndexAny(key, "&;"); i >= 0 {
   879				key, query = key[:i], key[i+1:]
   880			} else {
   881				query = ""
   882			}
   883			if key == "" {
   884				continue
   885			}
   886			value := ""
   887			if i := strings.Index(key, "="); i >= 0 {
   888				key, value = key[:i], key[i+1:]
   889			}
   890			key, err1 := QueryUnescape(key)
   891			if err1 != nil {
   892				if err == nil {
   893					err = err1
   894				}
   895				continue
   896			}
   897			value, err1 = QueryUnescape(value)
   898			if err1 != nil {
   899				if err == nil {
   900					err = err1
   901				}
   902				continue
   903			}
   904			m[key] = append(m[key], value)
   905		}
   906		return err
   907	}
   908	
   909	// Encode encodes the values into ``URL encoded'' form
   910	// ("bar=baz&foo=quux") sorted by key.
   911	func (v Values) Encode() string {
   912		if v == nil {
   913			return ""
   914		}
   915		var buf strings.Builder
   916		keys := make([]string, 0, len(v))
   917		for k := range v {
   918			keys = append(keys, k)
   919		}
   920		sort.Strings(keys)
   921		for _, k := range keys {
   922			vs := v[k]
   923			keyEscaped := QueryEscape(k)
   924			for _, v := range vs {
   925				if buf.Len() > 0 {
   926					buf.WriteByte('&')
   927				}
   928				buf.WriteString(keyEscaped)
   929				buf.WriteByte('=')
   930				buf.WriteString(QueryEscape(v))
   931			}
   932		}
   933		return buf.String()
   934	}
   935	
   936	// resolvePath applies special path segments from refs and applies
   937	// them to base, per RFC 3986.
   938	func resolvePath(base, ref string) string {
   939		var full string
   940		if ref == "" {
   941			full = base
   942		} else if ref[0] != '/' {
   943			i := strings.LastIndex(base, "/")
   944			full = base[:i+1] + ref
   945		} else {
   946			full = ref
   947		}
   948		if full == "" {
   949			return ""
   950		}
   951		var dst []string
   952		src := strings.Split(full, "/")
   953		for _, elem := range src {
   954			switch elem {
   955			case ".":
   956				// drop
   957			case "..":
   958				if len(dst) > 0 {
   959					dst = dst[:len(dst)-1]
   960				}
   961			default:
   962				dst = append(dst, elem)
   963			}
   964		}
   965		if last := src[len(src)-1]; last == "." || last == ".." {
   966			// Add final slash to the joined path.
   967			dst = append(dst, "")
   968		}
   969		return "/" + strings.TrimPrefix(strings.Join(dst, "/"), "/")
   970	}
   971	
   972	// IsAbs reports whether the URL is absolute.
   973	// Absolute means that it has a non-empty scheme.
   974	func (u *URL) IsAbs() bool {
   975		return u.Scheme != ""
   976	}
   977	
   978	// Parse parses a URL in the context of the receiver. The provided URL
   979	// may be relative or absolute. Parse returns nil, err on parse
   980	// failure, otherwise its return value is the same as ResolveReference.
   981	func (u *URL) Parse(ref string) (*URL, error) {
   982		refurl, err := Parse(ref)
   983		if err != nil {
   984			return nil, err
   985		}
   986		return u.ResolveReference(refurl), nil
   987	}
   988	
   989	// ResolveReference resolves a URI reference to an absolute URI from
   990	// an absolute base URI u, per RFC 3986 Section 5.2. The URI reference
   991	// may be relative or absolute. ResolveReference always returns a new
   992	// URL instance, even if the returned URL is identical to either the
   993	// base or reference. If ref is an absolute URL, then ResolveReference
   994	// ignores base and returns a copy of ref.
   995	func (u *URL) ResolveReference(ref *URL) *URL {
   996		url := *ref
   997		if ref.Scheme == "" {
   998			url.Scheme = u.Scheme
   999		}
  1000		if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
  1001			// The "absoluteURI" or "net_path" cases.
  1002			// We can ignore the error from setPath since we know we provided a
  1003			// validly-escaped path.
  1004			url.setPath(resolvePath(ref.EscapedPath(), ""))
  1005			return &url
  1006		}
  1007		if ref.Opaque != "" {
  1008			url.User = nil
  1009			url.Host = ""
  1010			url.Path = ""
  1011			return &url
  1012		}
  1013		if ref.Path == "" && ref.RawQuery == "" {
  1014			url.RawQuery = u.RawQuery
  1015			if ref.Fragment == "" {
  1016				url.Fragment = u.Fragment
  1017			}
  1018		}
  1019		// The "abs_path" or "rel_path" cases.
  1020		url.Host = u.Host
  1021		url.User = u.User
  1022		url.setPath(resolvePath(u.EscapedPath(), ref.EscapedPath()))
  1023		return &url
  1024	}
  1025	
  1026	// Query parses RawQuery and returns the corresponding values.
  1027	// It silently discards malformed value pairs.
  1028	// To check errors use ParseQuery.
  1029	func (u *URL) Query() Values {
  1030		v, _ := ParseQuery(u.RawQuery)
  1031		return v
  1032	}
  1033	
  1034	// RequestURI returns the encoded path?query or opaque?query
  1035	// string that would be used in an HTTP request for u.
  1036	func (u *URL) RequestURI() string {
  1037		result := u.Opaque
  1038		if result == "" {
  1039			result = u.EscapedPath()
  1040			if result == "" {
  1041				result = "/"
  1042			}
  1043		} else {
  1044			if strings.HasPrefix(result, "//") {
  1045				result = u.Scheme + ":" + result
  1046			}
  1047		}
  1048		if u.ForceQuery || u.RawQuery != "" {
  1049			result += "?" + u.RawQuery
  1050		}
  1051		return result
  1052	}
  1053	
  1054	// Hostname returns u.Host, stripping any valid port number if present.
  1055	//
  1056	// If the result is enclosed in square brackets, as literal IPv6 addresses are,
  1057	// the square brackets are removed from the result.
  1058	func (u *URL) Hostname() string {
  1059		host, _ := splitHostPort(u.Host)
  1060		return host
  1061	}
  1062	
  1063	// Port returns the port part of u.Host, without the leading colon.
  1064	//
  1065	// If u.Host doesn't contain a valid numeric port, Port returns an empty string.
  1066	func (u *URL) Port() string {
  1067		_, port := splitHostPort(u.Host)
  1068		return port
  1069	}
  1070	
  1071	// splitHostPort separates host and port. If the port is not valid, it returns
  1072	// the entire input as host, and it doesn't check the validity of the host.
  1073	// Unlike net.SplitHostPort, but per RFC 3986, it requires ports to be numeric.
  1074	func splitHostPort(hostport string) (host, port string) {
  1075		host = hostport
  1076	
  1077		colon := strings.LastIndexByte(host, ':')
  1078		if colon != -1 && validOptionalPort(host[colon:]) {
  1079			host, port = host[:colon], host[colon+1:]
  1080		}
  1081	
  1082		if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") {
  1083			host = host[1 : len(host)-1]
  1084		}
  1085	
  1086		return
  1087	}
  1088	
  1089	// Marshaling interface implementations.
  1090	// Would like to implement MarshalText/UnmarshalText but that will change the JSON representation of URLs.
  1091	
  1092	func (u *URL) MarshalBinary() (text []byte, err error) {
  1093		return []byte(u.String()), nil
  1094	}
  1095	
  1096	func (u *URL) UnmarshalBinary(text []byte) error {
  1097		u1, err := Parse(string(text))
  1098		if err != nil {
  1099			return err
  1100		}
  1101		*u = *u1
  1102		return nil
  1103	}
  1104	
  1105	// validUserinfo reports whether s is a valid userinfo string per RFC 3986
  1106	// Section 3.2.1:
  1107	//     userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
  1108	//     unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
  1109	//     sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
  1110	//                   / "*" / "+" / "," / ";" / "="
  1111	//
  1112	// It doesn't validate pct-encoded. The caller does that via func unescape.
  1113	func validUserinfo(s string) bool {
  1114		for _, r := range s {
  1115			if 'A' <= r && r <= 'Z' {
  1116				continue
  1117			}
  1118			if 'a' <= r && r <= 'z' {
  1119				continue
  1120			}
  1121			if '0' <= r && r <= '9' {
  1122				continue
  1123			}
  1124			switch r {
  1125			case '-', '.', '_', ':', '~', '!', '$', '&', '\'',
  1126				'(', ')', '*', '+', ',', ';', '=', '%', '@':
  1127				continue
  1128			default:
  1129				return false
  1130			}
  1131		}
  1132		return true
  1133	}
  1134	
  1135	// stringContainsCTLByte reports whether s contains any ASCII control character.
  1136	func stringContainsCTLByte(s string) bool {
  1137		for i := 0; i < len(s); i++ {
  1138			b := s[i]
  1139			if b < ' ' || b == 0x7f {
  1140				return true
  1141			}
  1142		}
  1143		return false
  1144	}
  1145	

View as plain text