...

Source file src/pkg/cmd/go/internal/module/module.go

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package module defines the module.Version type
     6	// along with support code.
     7	package module
     8	
     9	// IMPORTANT NOTE
    10	//
    11	// This file essentially defines the set of valid import paths for the go command.
    12	// There are many subtle considerations, including Unicode ambiguity,
    13	// security, network, and file system representations.
    14	//
    15	// This file also defines the set of valid module path and version combinations,
    16	// another topic with many subtle considerations.
    17	//
    18	// Changes to the semantics in this file require approval from rsc.
    19	
    20	import (
    21		"errors"
    22		"fmt"
    23		"sort"
    24		"strings"
    25		"unicode"
    26		"unicode/utf8"
    27	
    28		"cmd/go/internal/semver"
    29	)
    30	
    31	// A Version is defined by a module path and version pair.
    32	type Version struct {
    33		Path string
    34	
    35		// Version is usually a semantic version in canonical form.
    36		// There are two exceptions to this general rule.
    37		// First, the top-level target of a build has no specific version
    38		// and uses Version = "".
    39		// Second, during MVS calculations the version "none" is used
    40		// to represent the decision to take no version of a given module.
    41		Version string `json:",omitempty"`
    42	}
    43	
    44	// A ModuleError indicates an error specific to a module.
    45	type ModuleError struct {
    46		Path    string
    47		Version string
    48		Err     error
    49	}
    50	
    51	// VersionError returns a ModuleError derived from a Version and error.
    52	func VersionError(v Version, err error) error {
    53		return &ModuleError{
    54			Path:    v.Path,
    55			Version: v.Version,
    56			Err:     err,
    57		}
    58	}
    59	
    60	func (e *ModuleError) Error() string {
    61		if v, ok := e.Err.(*InvalidVersionError); ok {
    62			return fmt.Sprintf("%s@%s: invalid %s: %v", e.Path, v.Version, v.noun(), v.Err)
    63		}
    64		if e.Version != "" {
    65			return fmt.Sprintf("%s@%s: %v", e.Path, e.Version, e.Err)
    66		}
    67		return fmt.Sprintf("module %s: %v", e.Path, e.Err)
    68	}
    69	
    70	func (e *ModuleError) Unwrap() error { return e.Err }
    71	
    72	// An InvalidVersionError indicates an error specific to a version, with the
    73	// module path unknown or specified externally.
    74	//
    75	// A ModuleError may wrap an InvalidVersionError, but an InvalidVersionError
    76	// must not wrap a ModuleError.
    77	type InvalidVersionError struct {
    78		Version string
    79		Pseudo  bool
    80		Err     error
    81	}
    82	
    83	// noun returns either "version" or "pseudo-version", depending on whether
    84	// e.Version is a pseudo-version.
    85	func (e *InvalidVersionError) noun() string {
    86		if e.Pseudo {
    87			return "pseudo-version"
    88		}
    89		return "version"
    90	}
    91	
    92	func (e *InvalidVersionError) Error() string {
    93		return fmt.Sprintf("%s %q invalid: %s", e.noun(), e.Version, e.Err)
    94	}
    95	
    96	func (e *InvalidVersionError) Unwrap() error { return e.Err }
    97	
    98	// Check checks that a given module path, version pair is valid.
    99	// In addition to the path being a valid module path
   100	// and the version being a valid semantic version,
   101	// the two must correspond.
   102	// For example, the path "yaml/v2" only corresponds to
   103	// semantic versions beginning with "v2.".
   104	func Check(path, version string) error {
   105		if err := CheckPath(path); err != nil {
   106			return err
   107		}
   108		if !semver.IsValid(version) {
   109			return &ModuleError{
   110				Path: path,
   111				Err:  &InvalidVersionError{Version: version, Err: errors.New("not a semantic version")},
   112			}
   113		}
   114		_, pathMajor, _ := SplitPathVersion(path)
   115		if err := MatchPathMajor(version, pathMajor); err != nil {
   116			return &ModuleError{Path: path, Err: err}
   117		}
   118		return nil
   119	}
   120	
   121	// firstPathOK reports whether r can appear in the first element of a module path.
   122	// The first element of the path must be an LDH domain name, at least for now.
   123	// To avoid case ambiguity, the domain name must be entirely lower case.
   124	func firstPathOK(r rune) bool {
   125		return r == '-' || r == '.' ||
   126			'0' <= r && r <= '9' ||
   127			'a' <= r && r <= 'z'
   128	}
   129	
   130	// pathOK reports whether r can appear in an import path element.
   131	// Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: + - . _ and ~.
   132	// This matches what "go get" has historically recognized in import paths.
   133	// TODO(rsc): We would like to allow Unicode letters, but that requires additional
   134	// care in the safe encoding (see note below).
   135	func pathOK(r rune) bool {
   136		if r < utf8.RuneSelf {
   137			return r == '+' || r == '-' || r == '.' || r == '_' || r == '~' ||
   138				'0' <= r && r <= '9' ||
   139				'A' <= r && r <= 'Z' ||
   140				'a' <= r && r <= 'z'
   141		}
   142		return false
   143	}
   144	
   145	// fileNameOK reports whether r can appear in a file name.
   146	// For now we allow all Unicode letters but otherwise limit to pathOK plus a few more punctuation characters.
   147	// If we expand the set of allowed characters here, we have to
   148	// work harder at detecting potential case-folding and normalization collisions.
   149	// See note about "safe encoding" below.
   150	func fileNameOK(r rune) bool {
   151		if r < utf8.RuneSelf {
   152			// Entire set of ASCII punctuation, from which we remove characters:
   153			//     ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
   154			// We disallow some shell special characters: " ' * < > ? ` |
   155			// (Note that some of those are disallowed by the Windows file system as well.)
   156			// We also disallow path separators / : and \ (fileNameOK is only called on path element characters).
   157			// We allow spaces (U+0020) in file names.
   158			const allowed = "!#$%&()+,-.=@[]^_{}~ "
   159			if '0' <= r && r <= '9' || 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' {
   160				return true
   161			}
   162			for i := 0; i < len(allowed); i++ {
   163				if rune(allowed[i]) == r {
   164					return true
   165				}
   166			}
   167			return false
   168		}
   169		// It may be OK to add more ASCII punctuation here, but only carefully.
   170		// For example Windows disallows < > \, and macOS disallows :, so we must not allow those.
   171		return unicode.IsLetter(r)
   172	}
   173	
   174	// CheckPath checks that a module path is valid.
   175	func CheckPath(path string) error {
   176		if err := checkPath(path, false); err != nil {
   177			return fmt.Errorf("malformed module path %q: %v", path, err)
   178		}
   179		i := strings.Index(path, "/")
   180		if i < 0 {
   181			i = len(path)
   182		}
   183		if i == 0 {
   184			return fmt.Errorf("malformed module path %q: leading slash", path)
   185		}
   186		if !strings.Contains(path[:i], ".") {
   187			return fmt.Errorf("malformed module path %q: missing dot in first path element", path)
   188		}
   189		if path[0] == '-' {
   190			return fmt.Errorf("malformed module path %q: leading dash in first path element", path)
   191		}
   192		for _, r := range path[:i] {
   193			if !firstPathOK(r) {
   194				return fmt.Errorf("malformed module path %q: invalid char %q in first path element", path, r)
   195			}
   196		}
   197		if _, _, ok := SplitPathVersion(path); !ok {
   198			return fmt.Errorf("malformed module path %q: invalid version", path)
   199		}
   200		return nil
   201	}
   202	
   203	// CheckImportPath checks that an import path is valid.
   204	func CheckImportPath(path string) error {
   205		if err := checkPath(path, false); err != nil {
   206			return fmt.Errorf("malformed import path %q: %v", path, err)
   207		}
   208		return nil
   209	}
   210	
   211	// checkPath checks that a general path is valid.
   212	// It returns an error describing why but not mentioning path.
   213	// Because these checks apply to both module paths and import paths,
   214	// the caller is expected to add the "malformed ___ path %q: " prefix.
   215	// fileName indicates whether the final element of the path is a file name
   216	// (as opposed to a directory name).
   217	func checkPath(path string, fileName bool) error {
   218		if !utf8.ValidString(path) {
   219			return fmt.Errorf("invalid UTF-8")
   220		}
   221		if path == "" {
   222			return fmt.Errorf("empty string")
   223		}
   224		if path[0] == '-' {
   225			return fmt.Errorf("leading dash")
   226		}
   227		if strings.Contains(path, "..") {
   228			return fmt.Errorf("double dot")
   229		}
   230		if strings.Contains(path, "//") {
   231			return fmt.Errorf("double slash")
   232		}
   233		if path[len(path)-1] == '/' {
   234			return fmt.Errorf("trailing slash")
   235		}
   236		elemStart := 0
   237		for i, r := range path {
   238			if r == '/' {
   239				if err := checkElem(path[elemStart:i], fileName); err != nil {
   240					return err
   241				}
   242				elemStart = i + 1
   243			}
   244		}
   245		if err := checkElem(path[elemStart:], fileName); err != nil {
   246			return err
   247		}
   248		return nil
   249	}
   250	
   251	// checkElem checks whether an individual path element is valid.
   252	// fileName indicates whether the element is a file name (not a directory name).
   253	func checkElem(elem string, fileName bool) error {
   254		if elem == "" {
   255			return fmt.Errorf("empty path element")
   256		}
   257		if strings.Count(elem, ".") == len(elem) {
   258			return fmt.Errorf("invalid path element %q", elem)
   259		}
   260		if elem[0] == '.' && !fileName {
   261			return fmt.Errorf("leading dot in path element")
   262		}
   263		if elem[len(elem)-1] == '.' {
   264			return fmt.Errorf("trailing dot in path element")
   265		}
   266		charOK := pathOK
   267		if fileName {
   268			charOK = fileNameOK
   269		}
   270		for _, r := range elem {
   271			if !charOK(r) {
   272				return fmt.Errorf("invalid char %q", r)
   273			}
   274		}
   275	
   276		// Windows disallows a bunch of path elements, sadly.
   277		// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
   278		short := elem
   279		if i := strings.Index(short, "."); i >= 0 {
   280			short = short[:i]
   281		}
   282		for _, bad := range badWindowsNames {
   283			if strings.EqualFold(bad, short) {
   284				return fmt.Errorf("%q disallowed as path element component on Windows", short)
   285			}
   286		}
   287		return nil
   288	}
   289	
   290	// CheckFilePath checks whether a slash-separated file path is valid.
   291	func CheckFilePath(path string) error {
   292		if err := checkPath(path, true); err != nil {
   293			return fmt.Errorf("malformed file path %q: %v", path, err)
   294		}
   295		return nil
   296	}
   297	
   298	// badWindowsNames are the reserved file path elements on Windows.
   299	// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
   300	var badWindowsNames = []string{
   301		"CON",
   302		"PRN",
   303		"AUX",
   304		"NUL",
   305		"COM1",
   306		"COM2",
   307		"COM3",
   308		"COM4",
   309		"COM5",
   310		"COM6",
   311		"COM7",
   312		"COM8",
   313		"COM9",
   314		"LPT1",
   315		"LPT2",
   316		"LPT3",
   317		"LPT4",
   318		"LPT5",
   319		"LPT6",
   320		"LPT7",
   321		"LPT8",
   322		"LPT9",
   323	}
   324	
   325	// SplitPathVersion returns prefix and major version such that prefix+pathMajor == path
   326	// and version is either empty or "/vN" for N >= 2.
   327	// As a special case, gopkg.in paths are recognized directly;
   328	// they require ".vN" instead of "/vN", and for all N, not just N >= 2.
   329	func SplitPathVersion(path string) (prefix, pathMajor string, ok bool) {
   330		if strings.HasPrefix(path, "gopkg.in/") {
   331			return splitGopkgIn(path)
   332		}
   333	
   334		i := len(path)
   335		dot := false
   336		for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9' || path[i-1] == '.') {
   337			if path[i-1] == '.' {
   338				dot = true
   339			}
   340			i--
   341		}
   342		if i <= 1 || i == len(path) || path[i-1] != 'v' || path[i-2] != '/' {
   343			return path, "", true
   344		}
   345		prefix, pathMajor = path[:i-2], path[i-2:]
   346		if dot || len(pathMajor) <= 2 || pathMajor[2] == '0' || pathMajor == "/v1" {
   347			return path, "", false
   348		}
   349		return prefix, pathMajor, true
   350	}
   351	
   352	// splitGopkgIn is like SplitPathVersion but only for gopkg.in paths.
   353	func splitGopkgIn(path string) (prefix, pathMajor string, ok bool) {
   354		if !strings.HasPrefix(path, "gopkg.in/") {
   355			return path, "", false
   356		}
   357		i := len(path)
   358		if strings.HasSuffix(path, "-unstable") {
   359			i -= len("-unstable")
   360		}
   361		for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9') {
   362			i--
   363		}
   364		if i <= 1 || path[i-1] != 'v' || path[i-2] != '.' {
   365			// All gopkg.in paths must end in vN for some N.
   366			return path, "", false
   367		}
   368		prefix, pathMajor = path[:i-2], path[i-2:]
   369		if len(pathMajor) <= 2 || pathMajor[2] == '0' && pathMajor != ".v0" {
   370			return path, "", false
   371		}
   372		return prefix, pathMajor, true
   373	}
   374	
   375	// MatchPathMajor returns a non-nil error if the semantic version v
   376	// does not match the path major version pathMajor.
   377	func MatchPathMajor(v, pathMajor string) error {
   378		if strings.HasPrefix(pathMajor, ".v") && strings.HasSuffix(pathMajor, "-unstable") {
   379			pathMajor = strings.TrimSuffix(pathMajor, "-unstable")
   380		}
   381		if strings.HasPrefix(v, "v0.0.0-") && pathMajor == ".v1" {
   382			// Allow old bug in pseudo-versions that generated v0.0.0- pseudoversion for gopkg .v1.
   383			// For example, gopkg.in/yaml.v2@v2.2.1's go.mod requires gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405.
   384			return nil
   385		}
   386		m := semver.Major(v)
   387		if pathMajor == "" {
   388			if m == "v0" || m == "v1" || semver.Build(v) == "+incompatible" {
   389				return nil
   390			}
   391			pathMajor = "v0 or v1"
   392		} else if pathMajor[0] == '/' || pathMajor[0] == '.' {
   393			if m == pathMajor[1:] {
   394				return nil
   395			}
   396			pathMajor = pathMajor[1:]
   397		}
   398		return &InvalidVersionError{
   399			Version: v,
   400			Err:     fmt.Errorf("should be %s, not %s", pathMajor, semver.Major(v)),
   401		}
   402	}
   403	
   404	// PathMajorPrefix returns the major-version tag prefix implied by pathMajor.
   405	// An empty PathMajorPrefix allows either v0 or v1.
   406	//
   407	// Note that MatchPathMajor may accept some versions that do not actually begin
   408	// with this prefix: namely, it accepts a 'v0.0.0-' prefix for a '.v1'
   409	// pathMajor, even though that pathMajor implies 'v1' tagging.
   410	func PathMajorPrefix(pathMajor string) string {
   411		if pathMajor == "" {
   412			return ""
   413		}
   414		if pathMajor[0] != '/' && pathMajor[0] != '.' {
   415			panic("pathMajor suffix " + pathMajor + " passed to PathMajorPrefix lacks separator")
   416		}
   417		if strings.HasPrefix(pathMajor, ".v") && strings.HasSuffix(pathMajor, "-unstable") {
   418			pathMajor = strings.TrimSuffix(pathMajor, "-unstable")
   419		}
   420		m := pathMajor[1:]
   421		if m != semver.Major(m) {
   422			panic("pathMajor suffix " + pathMajor + "passed to PathMajorPrefix is not a valid major version")
   423		}
   424		return m
   425	}
   426	
   427	// CanonicalVersion returns the canonical form of the version string v.
   428	// It is the same as semver.Canonical(v) except that it preserves the special build suffix "+incompatible".
   429	func CanonicalVersion(v string) string {
   430		cv := semver.Canonical(v)
   431		if semver.Build(v) == "+incompatible" {
   432			cv += "+incompatible"
   433		}
   434		return cv
   435	}
   436	
   437	// Sort sorts the list by Path, breaking ties by comparing Versions.
   438	func Sort(list []Version) {
   439		sort.Slice(list, func(i, j int) bool {
   440			mi := list[i]
   441			mj := list[j]
   442			if mi.Path != mj.Path {
   443				return mi.Path < mj.Path
   444			}
   445			// To help go.sum formatting, allow version/file.
   446			// Compare semver prefix by semver rules,
   447			// file by string order.
   448			vi := mi.Version
   449			vj := mj.Version
   450			var fi, fj string
   451			if k := strings.Index(vi, "/"); k >= 0 {
   452				vi, fi = vi[:k], vi[k:]
   453			}
   454			if k := strings.Index(vj, "/"); k >= 0 {
   455				vj, fj = vj[:k], vj[k:]
   456			}
   457			if vi != vj {
   458				return semver.Compare(vi, vj) < 0
   459			}
   460			return fi < fj
   461		})
   462	}
   463	
   464	// Safe encodings
   465	//
   466	// Module paths appear as substrings of file system paths
   467	// (in the download cache) and of web server URLs in the proxy protocol.
   468	// In general we cannot rely on file systems to be case-sensitive,
   469	// nor can we rely on web servers, since they read from file systems.
   470	// That is, we cannot rely on the file system to keep rsc.io/QUOTE
   471	// and rsc.io/quote separate. Windows and macOS don't.
   472	// Instead, we must never require two different casings of a file path.
   473	// Because we want the download cache to match the proxy protocol,
   474	// and because we want the proxy protocol to be possible to serve
   475	// from a tree of static files (which might be stored on a case-insensitive
   476	// file system), the proxy protocol must never require two different casings
   477	// of a URL path either.
   478	//
   479	// One possibility would be to make the safe encoding be the lowercase
   480	// hexadecimal encoding of the actual path bytes. This would avoid ever
   481	// needing different casings of a file path, but it would be fairly illegible
   482	// to most programmers when those paths appeared in the file system
   483	// (including in file paths in compiler errors and stack traces)
   484	// in web server logs, and so on. Instead, we want a safe encoding that
   485	// leaves most paths unaltered.
   486	//
   487	// The safe encoding is this:
   488	// replace every uppercase letter with an exclamation mark
   489	// followed by the letter's lowercase equivalent.
   490	//
   491	// For example,
   492	// github.com/Azure/azure-sdk-for-go ->  github.com/!azure/azure-sdk-for-go.
   493	// github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
   494	// github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
   495	//
   496	// Import paths that avoid upper-case letters are left unchanged.
   497	// Note that because import paths are ASCII-only and avoid various
   498	// problematic punctuation (like : < and >), the safe encoding is also ASCII-only
   499	// and avoids the same problematic punctuation.
   500	//
   501	// Import paths have never allowed exclamation marks, so there is no
   502	// need to define how to encode a literal !.
   503	//
   504	// Although paths are disallowed from using Unicode (see pathOK above),
   505	// the eventual plan is to allow Unicode letters as well, to assume that
   506	// file systems and URLs are Unicode-safe (storing UTF-8), and apply
   507	// the !-for-uppercase convention. Note however that not all runes that
   508	// are different but case-fold equivalent are an upper/lower pair.
   509	// For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
   510	// are considered to case-fold to each other. When we do add Unicode
   511	// letters, we must not assume that upper/lower are the only case-equivalent pairs.
   512	// Perhaps the Kelvin symbol would be disallowed entirely, for example.
   513	// Or perhaps it would encode as "!!k", or perhaps as "(212A)".
   514	//
   515	// Also, it would be nice to allow Unicode marks as well as letters,
   516	// but marks include combining marks, and then we must deal not
   517	// only with case folding but also normalization: both U+00E9 ('é')
   518	// and U+0065 U+0301 ('e' followed by combining acute accent)
   519	// look the same on the page and are treated by some file systems
   520	// as the same path. If we do allow Unicode marks in paths, there
   521	// must be some kind of normalization to allow only one canonical
   522	// encoding of any character used in an import path.
   523	
   524	// EncodePath returns the safe encoding of the given module path.
   525	// It fails if the module path is invalid.
   526	func EncodePath(path string) (encoding string, err error) {
   527		if err := CheckPath(path); err != nil {
   528			return "", err
   529		}
   530	
   531		return encodeString(path)
   532	}
   533	
   534	// EncodeVersion returns the safe encoding of the given module version.
   535	// Versions are allowed to be in non-semver form but must be valid file names
   536	// and not contain exclamation marks.
   537	func EncodeVersion(v string) (encoding string, err error) {
   538		if err := checkElem(v, true); err != nil || strings.Contains(v, "!") {
   539			return "", &InvalidVersionError{
   540				Version: v,
   541				Err:     fmt.Errorf("disallowed version string"),
   542			}
   543		}
   544		return encodeString(v)
   545	}
   546	
   547	func encodeString(s string) (encoding string, err error) {
   548		haveUpper := false
   549		for _, r := range s {
   550			if r == '!' || r >= utf8.RuneSelf {
   551				// This should be disallowed by CheckPath, but diagnose anyway.
   552				// The correctness of the encoding loop below depends on it.
   553				return "", fmt.Errorf("internal error: inconsistency in EncodePath")
   554			}
   555			if 'A' <= r && r <= 'Z' {
   556				haveUpper = true
   557			}
   558		}
   559	
   560		if !haveUpper {
   561			return s, nil
   562		}
   563	
   564		var buf []byte
   565		for _, r := range s {
   566			if 'A' <= r && r <= 'Z' {
   567				buf = append(buf, '!', byte(r+'a'-'A'))
   568			} else {
   569				buf = append(buf, byte(r))
   570			}
   571		}
   572		return string(buf), nil
   573	}
   574	
   575	// DecodePath returns the module path of the given safe encoding.
   576	// It fails if the encoding is invalid or encodes an invalid path.
   577	func DecodePath(encoding string) (path string, err error) {
   578		path, ok := decodeString(encoding)
   579		if !ok {
   580			return "", fmt.Errorf("invalid module path encoding %q", encoding)
   581		}
   582		if err := CheckPath(path); err != nil {
   583			return "", fmt.Errorf("invalid module path encoding %q: %v", encoding, err)
   584		}
   585		return path, nil
   586	}
   587	
   588	// DecodeVersion returns the version string for the given safe encoding.
   589	// It fails if the encoding is invalid or encodes an invalid version.
   590	// Versions are allowed to be in non-semver form but must be valid file names
   591	// and not contain exclamation marks.
   592	func DecodeVersion(encoding string) (v string, err error) {
   593		v, ok := decodeString(encoding)
   594		if !ok {
   595			return "", fmt.Errorf("invalid version encoding %q", encoding)
   596		}
   597		if err := checkElem(v, true); err != nil {
   598			return "", fmt.Errorf("disallowed version string %q", v)
   599		}
   600		return v, nil
   601	}
   602	
   603	func decodeString(encoding string) (string, bool) {
   604		var buf []byte
   605	
   606		bang := false
   607		for _, r := range encoding {
   608			if r >= utf8.RuneSelf {
   609				return "", false
   610			}
   611			if bang {
   612				bang = false
   613				if r < 'a' || 'z' < r {
   614					return "", false
   615				}
   616				buf = append(buf, byte(r+'A'-'a'))
   617				continue
   618			}
   619			if r == '!' {
   620				bang = true
   621				continue
   622			}
   623			if 'A' <= r && r <= 'Z' {
   624				return "", false
   625			}
   626			buf = append(buf, byte(r))
   627		}
   628		if bang {
   629			return "", false
   630		}
   631		return string(buf), true
   632	}
   633	

View as plain text