...

Source file src/pkg/cmd/go/internal/get/vcs.go

     1	// Copyright 2012 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package get
     6	
     7	import (
     8		"encoding/json"
     9		"errors"
    10		"fmt"
    11		"internal/lazyregexp"
    12		"internal/singleflight"
    13		"log"
    14		urlpkg "net/url"
    15		"os"
    16		"os/exec"
    17		"path/filepath"
    18		"regexp"
    19		"strings"
    20		"sync"
    21	
    22		"cmd/go/internal/base"
    23		"cmd/go/internal/cfg"
    24		"cmd/go/internal/web"
    25	)
    26	
    27	// A vcsCmd describes how to use a version control system
    28	// like Mercurial, Git, or Subversion.
    29	type vcsCmd struct {
    30		name string
    31		cmd  string // name of binary to invoke command
    32	
    33		createCmd   []string // commands to download a fresh copy of a repository
    34		downloadCmd []string // commands to download updates into an existing repository
    35	
    36		tagCmd         []tagCmd // commands to list tags
    37		tagLookupCmd   []tagCmd // commands to lookup tags before running tagSyncCmd
    38		tagSyncCmd     []string // commands to sync to specific tag
    39		tagSyncDefault []string // commands to sync to default tag
    40	
    41		scheme  []string
    42		pingCmd string
    43	
    44		remoteRepo  func(v *vcsCmd, rootDir string) (remoteRepo string, err error)
    45		resolveRepo func(v *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error)
    46	}
    47	
    48	var defaultSecureScheme = map[string]bool{
    49		"https":   true,
    50		"git+ssh": true,
    51		"bzr+ssh": true,
    52		"svn+ssh": true,
    53		"ssh":     true,
    54	}
    55	
    56	func (v *vcsCmd) isSecure(repo string) bool {
    57		u, err := urlpkg.Parse(repo)
    58		if err != nil {
    59			// If repo is not a URL, it's not secure.
    60			return false
    61		}
    62		return v.isSecureScheme(u.Scheme)
    63	}
    64	
    65	func (v *vcsCmd) isSecureScheme(scheme string) bool {
    66		switch v.cmd {
    67		case "git":
    68			// GIT_ALLOW_PROTOCOL is an environment variable defined by Git. It is a
    69			// colon-separated list of schemes that are allowed to be used with git
    70			// fetch/clone. Any scheme not mentioned will be considered insecure.
    71			if allow := os.Getenv("GIT_ALLOW_PROTOCOL"); allow != "" {
    72				for _, s := range strings.Split(allow, ":") {
    73					if s == scheme {
    74						return true
    75					}
    76				}
    77				return false
    78			}
    79		}
    80		return defaultSecureScheme[scheme]
    81	}
    82	
    83	// A tagCmd describes a command to list available tags
    84	// that can be passed to tagSyncCmd.
    85	type tagCmd struct {
    86		cmd     string // command to list tags
    87		pattern string // regexp to extract tags from list
    88	}
    89	
    90	// vcsList lists the known version control systems
    91	var vcsList = []*vcsCmd{
    92		vcsHg,
    93		vcsGit,
    94		vcsSvn,
    95		vcsBzr,
    96		vcsFossil,
    97	}
    98	
    99	// vcsByCmd returns the version control system for the given
   100	// command name (hg, git, svn, bzr).
   101	func vcsByCmd(cmd string) *vcsCmd {
   102		for _, vcs := range vcsList {
   103			if vcs.cmd == cmd {
   104				return vcs
   105			}
   106		}
   107		return nil
   108	}
   109	
   110	// vcsHg describes how to use Mercurial.
   111	var vcsHg = &vcsCmd{
   112		name: "Mercurial",
   113		cmd:  "hg",
   114	
   115		createCmd:   []string{"clone -U -- {repo} {dir}"},
   116		downloadCmd: []string{"pull"},
   117	
   118		// We allow both tag and branch names as 'tags'
   119		// for selecting a version. This lets people have
   120		// a go.release.r60 branch and a go1 branch
   121		// and make changes in both, without constantly
   122		// editing .hgtags.
   123		tagCmd: []tagCmd{
   124			{"tags", `^(\S+)`},
   125			{"branches", `^(\S+)`},
   126		},
   127		tagSyncCmd:     []string{"update -r {tag}"},
   128		tagSyncDefault: []string{"update default"},
   129	
   130		scheme:     []string{"https", "http", "ssh"},
   131		pingCmd:    "identify -- {scheme}://{repo}",
   132		remoteRepo: hgRemoteRepo,
   133	}
   134	
   135	func hgRemoteRepo(vcsHg *vcsCmd, rootDir string) (remoteRepo string, err error) {
   136		out, err := vcsHg.runOutput(rootDir, "paths default")
   137		if err != nil {
   138			return "", err
   139		}
   140		return strings.TrimSpace(string(out)), nil
   141	}
   142	
   143	// vcsGit describes how to use Git.
   144	var vcsGit = &vcsCmd{
   145		name: "Git",
   146		cmd:  "git",
   147	
   148		createCmd:   []string{"clone -- {repo} {dir}", "-go-internal-cd {dir} submodule update --init --recursive"},
   149		downloadCmd: []string{"pull --ff-only", "submodule update --init --recursive"},
   150	
   151		tagCmd: []tagCmd{
   152			// tags/xxx matches a git tag named xxx
   153			// origin/xxx matches a git branch named xxx on the default remote repository
   154			{"show-ref", `(?:tags|origin)/(\S+)$`},
   155		},
   156		tagLookupCmd: []tagCmd{
   157			{"show-ref tags/{tag} origin/{tag}", `((?:tags|origin)/\S+)$`},
   158		},
   159		tagSyncCmd: []string{"checkout {tag}", "submodule update --init --recursive"},
   160		// both createCmd and downloadCmd update the working dir.
   161		// No need to do more here. We used to 'checkout master'
   162		// but that doesn't work if the default branch is not named master.
   163		// DO NOT add 'checkout master' here.
   164		// See golang.org/issue/9032.
   165		tagSyncDefault: []string{"submodule update --init --recursive"},
   166	
   167		scheme: []string{"git", "https", "http", "git+ssh", "ssh"},
   168	
   169		// Leave out the '--' separator in the ls-remote command: git 2.7.4 does not
   170		// support such a separator for that command, and this use should be safe
   171		// without it because the {scheme} value comes from the predefined list above.
   172		// See golang.org/issue/33836.
   173		pingCmd: "ls-remote {scheme}://{repo}",
   174	
   175		remoteRepo: gitRemoteRepo,
   176	}
   177	
   178	// scpSyntaxRe matches the SCP-like addresses used by Git to access
   179	// repositories by SSH.
   180	var scpSyntaxRe = lazyregexp.New(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)
   181	
   182	func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) {
   183		cmd := "config remote.origin.url"
   184		errParse := errors.New("unable to parse output of git " + cmd)
   185		errRemoteOriginNotFound := errors.New("remote origin not found")
   186		outb, err := vcsGit.run1(rootDir, cmd, nil, false)
   187		if err != nil {
   188			// if it doesn't output any message, it means the config argument is correct,
   189			// but the config value itself doesn't exist
   190			if outb != nil && len(outb) == 0 {
   191				return "", errRemoteOriginNotFound
   192			}
   193			return "", err
   194		}
   195		out := strings.TrimSpace(string(outb))
   196	
   197		var repoURL *urlpkg.URL
   198		if m := scpSyntaxRe.FindStringSubmatch(out); m != nil {
   199			// Match SCP-like syntax and convert it to a URL.
   200			// Eg, "git@github.com:user/repo" becomes
   201			// "ssh://git@github.com/user/repo".
   202			repoURL = &urlpkg.URL{
   203				Scheme: "ssh",
   204				User:   urlpkg.User(m[1]),
   205				Host:   m[2],
   206				Path:   m[3],
   207			}
   208		} else {
   209			repoURL, err = urlpkg.Parse(out)
   210			if err != nil {
   211				return "", err
   212			}
   213		}
   214	
   215		// Iterate over insecure schemes too, because this function simply
   216		// reports the state of the repo. If we can't see insecure schemes then
   217		// we can't report the actual repo URL.
   218		for _, s := range vcsGit.scheme {
   219			if repoURL.Scheme == s {
   220				return repoURL.String(), nil
   221			}
   222		}
   223		return "", errParse
   224	}
   225	
   226	// vcsBzr describes how to use Bazaar.
   227	var vcsBzr = &vcsCmd{
   228		name: "Bazaar",
   229		cmd:  "bzr",
   230	
   231		createCmd: []string{"branch -- {repo} {dir}"},
   232	
   233		// Without --overwrite bzr will not pull tags that changed.
   234		// Replace by --overwrite-tags after http://pad.lv/681792 goes in.
   235		downloadCmd: []string{"pull --overwrite"},
   236	
   237		tagCmd:         []tagCmd{{"tags", `^(\S+)`}},
   238		tagSyncCmd:     []string{"update -r {tag}"},
   239		tagSyncDefault: []string{"update -r revno:-1"},
   240	
   241		scheme:      []string{"https", "http", "bzr", "bzr+ssh"},
   242		pingCmd:     "info -- {scheme}://{repo}",
   243		remoteRepo:  bzrRemoteRepo,
   244		resolveRepo: bzrResolveRepo,
   245	}
   246	
   247	func bzrRemoteRepo(vcsBzr *vcsCmd, rootDir string) (remoteRepo string, err error) {
   248		outb, err := vcsBzr.runOutput(rootDir, "config parent_location")
   249		if err != nil {
   250			return "", err
   251		}
   252		return strings.TrimSpace(string(outb)), nil
   253	}
   254	
   255	func bzrResolveRepo(vcsBzr *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) {
   256		outb, err := vcsBzr.runOutput(rootDir, "info "+remoteRepo)
   257		if err != nil {
   258			return "", err
   259		}
   260		out := string(outb)
   261	
   262		// Expect:
   263		// ...
   264		//   (branch root|repository branch): <URL>
   265		// ...
   266	
   267		found := false
   268		for _, prefix := range []string{"\n  branch root: ", "\n  repository branch: "} {
   269			i := strings.Index(out, prefix)
   270			if i >= 0 {
   271				out = out[i+len(prefix):]
   272				found = true
   273				break
   274			}
   275		}
   276		if !found {
   277			return "", fmt.Errorf("unable to parse output of bzr info")
   278		}
   279	
   280		i := strings.Index(out, "\n")
   281		if i < 0 {
   282			return "", fmt.Errorf("unable to parse output of bzr info")
   283		}
   284		out = out[:i]
   285		return strings.TrimSpace(out), nil
   286	}
   287	
   288	// vcsSvn describes how to use Subversion.
   289	var vcsSvn = &vcsCmd{
   290		name: "Subversion",
   291		cmd:  "svn",
   292	
   293		createCmd:   []string{"checkout -- {repo} {dir}"},
   294		downloadCmd: []string{"update"},
   295	
   296		// There is no tag command in subversion.
   297		// The branch information is all in the path names.
   298	
   299		scheme:     []string{"https", "http", "svn", "svn+ssh"},
   300		pingCmd:    "info -- {scheme}://{repo}",
   301		remoteRepo: svnRemoteRepo,
   302	}
   303	
   304	func svnRemoteRepo(vcsSvn *vcsCmd, rootDir string) (remoteRepo string, err error) {
   305		outb, err := vcsSvn.runOutput(rootDir, "info")
   306		if err != nil {
   307			return "", err
   308		}
   309		out := string(outb)
   310	
   311		// Expect:
   312		//
   313		//	 ...
   314		// 	URL: <URL>
   315		// 	...
   316		//
   317		// Note that we're not using the Repository Root line,
   318		// because svn allows checking out subtrees.
   319		// The URL will be the URL of the subtree (what we used with 'svn co')
   320		// while the Repository Root may be a much higher parent.
   321		i := strings.Index(out, "\nURL: ")
   322		if i < 0 {
   323			return "", fmt.Errorf("unable to parse output of svn info")
   324		}
   325		out = out[i+len("\nURL: "):]
   326		i = strings.Index(out, "\n")
   327		if i < 0 {
   328			return "", fmt.Errorf("unable to parse output of svn info")
   329		}
   330		out = out[:i]
   331		return strings.TrimSpace(out), nil
   332	}
   333	
   334	// fossilRepoName is the name go get associates with a fossil repository. In the
   335	// real world the file can be named anything.
   336	const fossilRepoName = ".fossil"
   337	
   338	// vcsFossil describes how to use Fossil (fossil-scm.org)
   339	var vcsFossil = &vcsCmd{
   340		name: "Fossil",
   341		cmd:  "fossil",
   342	
   343		createCmd:   []string{"-go-internal-mkdir {dir} clone -- {repo} " + filepath.Join("{dir}", fossilRepoName), "-go-internal-cd {dir} open .fossil"},
   344		downloadCmd: []string{"up"},
   345	
   346		tagCmd:         []tagCmd{{"tag ls", `(.*)`}},
   347		tagSyncCmd:     []string{"up tag:{tag}"},
   348		tagSyncDefault: []string{"up trunk"},
   349	
   350		scheme:     []string{"https", "http"},
   351		remoteRepo: fossilRemoteRepo,
   352	}
   353	
   354	func fossilRemoteRepo(vcsFossil *vcsCmd, rootDir string) (remoteRepo string, err error) {
   355		out, err := vcsFossil.runOutput(rootDir, "remote-url")
   356		if err != nil {
   357			return "", err
   358		}
   359		return strings.TrimSpace(string(out)), nil
   360	}
   361	
   362	func (v *vcsCmd) String() string {
   363		return v.name
   364	}
   365	
   366	// run runs the command line cmd in the given directory.
   367	// keyval is a list of key, value pairs. run expands
   368	// instances of {key} in cmd into value, but only after
   369	// splitting cmd into individual arguments.
   370	// If an error occurs, run prints the command line and the
   371	// command's combined stdout+stderr to standard error.
   372	// Otherwise run discards the command's output.
   373	func (v *vcsCmd) run(dir string, cmd string, keyval ...string) error {
   374		_, err := v.run1(dir, cmd, keyval, true)
   375		return err
   376	}
   377	
   378	// runVerboseOnly is like run but only generates error output to standard error in verbose mode.
   379	func (v *vcsCmd) runVerboseOnly(dir string, cmd string, keyval ...string) error {
   380		_, err := v.run1(dir, cmd, keyval, false)
   381		return err
   382	}
   383	
   384	// runOutput is like run but returns the output of the command.
   385	func (v *vcsCmd) runOutput(dir string, cmd string, keyval ...string) ([]byte, error) {
   386		return v.run1(dir, cmd, keyval, true)
   387	}
   388	
   389	// run1 is the generalized implementation of run and runOutput.
   390	func (v *vcsCmd) run1(dir string, cmdline string, keyval []string, verbose bool) ([]byte, error) {
   391		m := make(map[string]string)
   392		for i := 0; i < len(keyval); i += 2 {
   393			m[keyval[i]] = keyval[i+1]
   394		}
   395		args := strings.Fields(cmdline)
   396		for i, arg := range args {
   397			args[i] = expand(m, arg)
   398		}
   399	
   400		if len(args) >= 2 && args[0] == "-go-internal-mkdir" {
   401			var err error
   402			if filepath.IsAbs(args[1]) {
   403				err = os.Mkdir(args[1], os.ModePerm)
   404			} else {
   405				err = os.Mkdir(filepath.Join(dir, args[1]), os.ModePerm)
   406			}
   407			if err != nil {
   408				return nil, err
   409			}
   410			args = args[2:]
   411		}
   412	
   413		if len(args) >= 2 && args[0] == "-go-internal-cd" {
   414			if filepath.IsAbs(args[1]) {
   415				dir = args[1]
   416			} else {
   417				dir = filepath.Join(dir, args[1])
   418			}
   419			args = args[2:]
   420		}
   421	
   422		_, err := exec.LookPath(v.cmd)
   423		if err != nil {
   424			fmt.Fprintf(os.Stderr,
   425				"go: missing %s command. See https://golang.org/s/gogetcmd\n",
   426				v.name)
   427			return nil, err
   428		}
   429	
   430		cmd := exec.Command(v.cmd, args...)
   431		cmd.Dir = dir
   432		cmd.Env = base.EnvForDir(cmd.Dir, os.Environ())
   433		if cfg.BuildX {
   434			fmt.Fprintf(os.Stderr, "cd %s\n", dir)
   435			fmt.Fprintf(os.Stderr, "%s %s\n", v.cmd, strings.Join(args, " "))
   436		}
   437		out, err := cmd.Output()
   438		if err != nil {
   439			if verbose || cfg.BuildV {
   440				fmt.Fprintf(os.Stderr, "# cd %s; %s %s\n", dir, v.cmd, strings.Join(args, " "))
   441				if ee, ok := err.(*exec.ExitError); ok && len(ee.Stderr) > 0 {
   442					os.Stderr.Write(ee.Stderr)
   443				} else {
   444					fmt.Fprintf(os.Stderr, err.Error())
   445				}
   446			}
   447		}
   448		return out, err
   449	}
   450	
   451	// ping pings to determine scheme to use.
   452	func (v *vcsCmd) ping(scheme, repo string) error {
   453		return v.runVerboseOnly(".", v.pingCmd, "scheme", scheme, "repo", repo)
   454	}
   455	
   456	// create creates a new copy of repo in dir.
   457	// The parent of dir must exist; dir must not.
   458	func (v *vcsCmd) create(dir, repo string) error {
   459		for _, cmd := range v.createCmd {
   460			if err := v.run(".", cmd, "dir", dir, "repo", repo); err != nil {
   461				return err
   462			}
   463		}
   464		return nil
   465	}
   466	
   467	// download downloads any new changes for the repo in dir.
   468	func (v *vcsCmd) download(dir string) error {
   469		for _, cmd := range v.downloadCmd {
   470			if err := v.run(dir, cmd); err != nil {
   471				return err
   472			}
   473		}
   474		return nil
   475	}
   476	
   477	// tags returns the list of available tags for the repo in dir.
   478	func (v *vcsCmd) tags(dir string) ([]string, error) {
   479		var tags []string
   480		for _, tc := range v.tagCmd {
   481			out, err := v.runOutput(dir, tc.cmd)
   482			if err != nil {
   483				return nil, err
   484			}
   485			re := regexp.MustCompile(`(?m-s)` + tc.pattern)
   486			for _, m := range re.FindAllStringSubmatch(string(out), -1) {
   487				tags = append(tags, m[1])
   488			}
   489		}
   490		return tags, nil
   491	}
   492	
   493	// tagSync syncs the repo in dir to the named tag,
   494	// which either is a tag returned by tags or is v.tagDefault.
   495	func (v *vcsCmd) tagSync(dir, tag string) error {
   496		if v.tagSyncCmd == nil {
   497			return nil
   498		}
   499		if tag != "" {
   500			for _, tc := range v.tagLookupCmd {
   501				out, err := v.runOutput(dir, tc.cmd, "tag", tag)
   502				if err != nil {
   503					return err
   504				}
   505				re := regexp.MustCompile(`(?m-s)` + tc.pattern)
   506				m := re.FindStringSubmatch(string(out))
   507				if len(m) > 1 {
   508					tag = m[1]
   509					break
   510				}
   511			}
   512		}
   513	
   514		if tag == "" && v.tagSyncDefault != nil {
   515			for _, cmd := range v.tagSyncDefault {
   516				if err := v.run(dir, cmd); err != nil {
   517					return err
   518				}
   519			}
   520			return nil
   521		}
   522	
   523		for _, cmd := range v.tagSyncCmd {
   524			if err := v.run(dir, cmd, "tag", tag); err != nil {
   525				return err
   526			}
   527		}
   528		return nil
   529	}
   530	
   531	// A vcsPath describes how to convert an import path into a
   532	// version control system and repository name.
   533	type vcsPath struct {
   534		prefix string                              // prefix this description applies to
   535		regexp *lazyregexp.Regexp                  // compiled pattern for import path
   536		repo   string                              // repository to use (expand with match of re)
   537		vcs    string                              // version control system to use (expand with match of re)
   538		check  func(match map[string]string) error // additional checks
   539		ping   bool                                // ping for scheme to use to download repo
   540	}
   541	
   542	// vcsFromDir inspects dir and its parents to determine the
   543	// version control system and code repository to use.
   544	// On return, root is the import path
   545	// corresponding to the root of the repository.
   546	func vcsFromDir(dir, srcRoot string) (vcs *vcsCmd, root string, err error) {
   547		// Clean and double-check that dir is in (a subdirectory of) srcRoot.
   548		dir = filepath.Clean(dir)
   549		srcRoot = filepath.Clean(srcRoot)
   550		if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator {
   551			return nil, "", fmt.Errorf("directory %q is outside source root %q", dir, srcRoot)
   552		}
   553	
   554		var vcsRet *vcsCmd
   555		var rootRet string
   556	
   557		origDir := dir
   558		for len(dir) > len(srcRoot) {
   559			for _, vcs := range vcsList {
   560				if _, err := os.Stat(filepath.Join(dir, "."+vcs.cmd)); err == nil {
   561					root := filepath.ToSlash(dir[len(srcRoot)+1:])
   562					// Record first VCS we find, but keep looking,
   563					// to detect mistakes like one kind of VCS inside another.
   564					if vcsRet == nil {
   565						vcsRet = vcs
   566						rootRet = root
   567						continue
   568					}
   569					// Allow .git inside .git, which can arise due to submodules.
   570					if vcsRet == vcs && vcs.cmd == "git" {
   571						continue
   572					}
   573					// Otherwise, we have one VCS inside a different VCS.
   574					return nil, "", fmt.Errorf("directory %q uses %s, but parent %q uses %s",
   575						filepath.Join(srcRoot, rootRet), vcsRet.cmd, filepath.Join(srcRoot, root), vcs.cmd)
   576				}
   577			}
   578	
   579			// Move to parent.
   580			ndir := filepath.Dir(dir)
   581			if len(ndir) >= len(dir) {
   582				// Shouldn't happen, but just in case, stop.
   583				break
   584			}
   585			dir = ndir
   586		}
   587	
   588		if vcsRet != nil {
   589			return vcsRet, rootRet, nil
   590		}
   591	
   592		return nil, "", fmt.Errorf("directory %q is not using a known version control system", origDir)
   593	}
   594	
   595	// checkNestedVCS checks for an incorrectly-nested VCS-inside-VCS
   596	// situation for dir, checking parents up until srcRoot.
   597	func checkNestedVCS(vcs *vcsCmd, dir, srcRoot string) error {
   598		if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator {
   599			return fmt.Errorf("directory %q is outside source root %q", dir, srcRoot)
   600		}
   601	
   602		otherDir := dir
   603		for len(otherDir) > len(srcRoot) {
   604			for _, otherVCS := range vcsList {
   605				if _, err := os.Stat(filepath.Join(otherDir, "."+otherVCS.cmd)); err == nil {
   606					// Allow expected vcs in original dir.
   607					if otherDir == dir && otherVCS == vcs {
   608						continue
   609					}
   610					// Allow .git inside .git, which can arise due to submodules.
   611					if otherVCS == vcs && vcs.cmd == "git" {
   612						continue
   613					}
   614					// Otherwise, we have one VCS inside a different VCS.
   615					return fmt.Errorf("directory %q uses %s, but parent %q uses %s", dir, vcs.cmd, otherDir, otherVCS.cmd)
   616				}
   617			}
   618			// Move to parent.
   619			newDir := filepath.Dir(otherDir)
   620			if len(newDir) >= len(otherDir) {
   621				// Shouldn't happen, but just in case, stop.
   622				break
   623			}
   624			otherDir = newDir
   625		}
   626	
   627		return nil
   628	}
   629	
   630	// RepoRoot describes the repository root for a tree of source code.
   631	type RepoRoot struct {
   632		Repo     string // repository URL, including scheme
   633		Root     string // import path corresponding to root of repo
   634		IsCustom bool   // defined by served <meta> tags (as opposed to hard-coded pattern)
   635		VCS      string // vcs type ("mod", "git", ...)
   636	
   637		vcs *vcsCmd // internal: vcs command access
   638	}
   639	
   640	func httpPrefix(s string) string {
   641		for _, prefix := range [...]string{"http:", "https:"} {
   642			if strings.HasPrefix(s, prefix) {
   643				return prefix
   644			}
   645		}
   646		return ""
   647	}
   648	
   649	// ModuleMode specifies whether to prefer modules when looking up code sources.
   650	type ModuleMode int
   651	
   652	const (
   653		IgnoreMod ModuleMode = iota
   654		PreferMod
   655	)
   656	
   657	// RepoRootForImportPath analyzes importPath to determine the
   658	// version control system, and code repository to use.
   659	func RepoRootForImportPath(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) {
   660		rr, err := repoRootFromVCSPaths(importPath, "", security, vcsPaths)
   661		if err == errUnknownSite {
   662			rr, err = repoRootForImportDynamic(importPath, mod, security)
   663			if err != nil {
   664				err = fmt.Errorf("unrecognized import path %q (%v)", importPath, err)
   665			}
   666		}
   667		if err != nil {
   668			rr1, err1 := repoRootFromVCSPaths(importPath, "", security, vcsPathsAfterDynamic)
   669			if err1 == nil {
   670				rr = rr1
   671				err = nil
   672			}
   673		}
   674	
   675		// Should have been taken care of above, but make sure.
   676		if err == nil && strings.Contains(importPath, "...") && strings.Contains(rr.Root, "...") {
   677			// Do not allow wildcards in the repo root.
   678			rr = nil
   679			err = fmt.Errorf("cannot expand ... in %q", importPath)
   680		}
   681		return rr, err
   682	}
   683	
   684	var errUnknownSite = errors.New("dynamic lookup required to find mapping")
   685	
   686	// repoRootFromVCSPaths attempts to map importPath to a repoRoot
   687	// using the mappings defined in vcsPaths.
   688	// If scheme is non-empty, that scheme is forced.
   689	func repoRootFromVCSPaths(importPath, scheme string, security web.SecurityMode, vcsPaths []*vcsPath) (*RepoRoot, error) {
   690		// A common error is to use https://packagepath because that's what
   691		// hg and git require. Diagnose this helpfully.
   692		if prefix := httpPrefix(importPath); prefix != "" {
   693			// The importPath has been cleaned, so has only one slash. The pattern
   694			// ignores the slashes; the error message puts them back on the RHS at least.
   695			return nil, fmt.Errorf("%q not allowed in import path", prefix+"//")
   696		}
   697		for _, srv := range vcsPaths {
   698			if !strings.HasPrefix(importPath, srv.prefix) {
   699				continue
   700			}
   701			m := srv.regexp.FindStringSubmatch(importPath)
   702			if m == nil {
   703				if srv.prefix != "" {
   704					return nil, fmt.Errorf("invalid %s import path %q", srv.prefix, importPath)
   705				}
   706				continue
   707			}
   708	
   709			// Build map of named subexpression matches for expand.
   710			match := map[string]string{
   711				"prefix": srv.prefix,
   712				"import": importPath,
   713			}
   714			for i, name := range srv.regexp.SubexpNames() {
   715				if name != "" && match[name] == "" {
   716					match[name] = m[i]
   717				}
   718			}
   719			if srv.vcs != "" {
   720				match["vcs"] = expand(match, srv.vcs)
   721			}
   722			if srv.repo != "" {
   723				match["repo"] = expand(match, srv.repo)
   724			}
   725			if srv.check != nil {
   726				if err := srv.check(match); err != nil {
   727					return nil, err
   728				}
   729			}
   730			vcs := vcsByCmd(match["vcs"])
   731			if vcs == nil {
   732				return nil, fmt.Errorf("unknown version control system %q", match["vcs"])
   733			}
   734			if srv.ping {
   735				if scheme != "" {
   736					match["repo"] = scheme + "://" + match["repo"]
   737				} else {
   738					for _, scheme := range vcs.scheme {
   739						if security == web.SecureOnly && !vcs.isSecureScheme(scheme) {
   740							continue
   741						}
   742						if vcs.pingCmd != "" && vcs.ping(scheme, match["repo"]) == nil {
   743							match["repo"] = scheme + "://" + match["repo"]
   744							goto Found
   745						}
   746					}
   747					// No scheme found. Fall back to the first one.
   748					match["repo"] = vcs.scheme[0] + "://" + match["repo"]
   749				Found:
   750				}
   751			}
   752			rr := &RepoRoot{
   753				Repo: match["repo"],
   754				Root: match["root"],
   755				VCS:  vcs.cmd,
   756				vcs:  vcs,
   757			}
   758			return rr, nil
   759		}
   760		return nil, errUnknownSite
   761	}
   762	
   763	// urlForImportPath returns a partially-populated URL for the given Go import path.
   764	//
   765	// The URL leaves the Scheme field blank so that web.Get will try any scheme
   766	// allowed by the selected security mode.
   767	func urlForImportPath(importPath string) (*urlpkg.URL, error) {
   768		slash := strings.Index(importPath, "/")
   769		if slash < 0 {
   770			slash = len(importPath)
   771		}
   772		host, path := importPath[:slash], importPath[slash:]
   773		if !strings.Contains(host, ".") {
   774			return nil, errors.New("import path does not begin with hostname")
   775		}
   776		if len(path) == 0 {
   777			path = "/"
   778		}
   779		return &urlpkg.URL{Host: host, Path: path, RawQuery: "go-get=1"}, nil
   780	}
   781	
   782	// repoRootForImportDynamic finds a *RepoRoot for a custom domain that's not
   783	// statically known by repoRootForImportPathStatic.
   784	//
   785	// This handles custom import paths like "name.tld/pkg/foo" or just "name.tld".
   786	func repoRootForImportDynamic(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) {
   787		url, err := urlForImportPath(importPath)
   788		if err != nil {
   789			return nil, err
   790		}
   791		resp, err := web.Get(security, url)
   792		if err != nil {
   793			msg := "https fetch: %v"
   794			if security == web.Insecure {
   795				msg = "http/" + msg
   796			}
   797			return nil, fmt.Errorf(msg, err)
   798		}
   799		body := resp.Body
   800		defer body.Close()
   801		imports, err := parseMetaGoImports(body, mod)
   802		if err != nil {
   803			return nil, fmt.Errorf("parsing %s: %v", importPath, err)
   804		}
   805		// Find the matched meta import.
   806		mmi, err := matchGoImport(imports, importPath)
   807		if err != nil {
   808			if _, ok := err.(ImportMismatchError); !ok {
   809				return nil, fmt.Errorf("parse %s: %v", url, err)
   810			}
   811			return nil, fmt.Errorf("parse %s: no go-import meta tags (%s)", resp.URL, err)
   812		}
   813		if cfg.BuildV {
   814			log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, url)
   815		}
   816		// If the import was "uni.edu/bob/project", which said the
   817		// prefix was "uni.edu" and the RepoRoot was "evilroot.com",
   818		// make sure we don't trust Bob and check out evilroot.com to
   819		// "uni.edu" yet (possibly overwriting/preempting another
   820		// non-evil student). Instead, first verify the root and see
   821		// if it matches Bob's claim.
   822		if mmi.Prefix != importPath {
   823			if cfg.BuildV {
   824				log.Printf("get %q: verifying non-authoritative meta tag", importPath)
   825			}
   826			var imports []metaImport
   827			url, imports, err = metaImportsForPrefix(mmi.Prefix, mod, security)
   828			if err != nil {
   829				return nil, err
   830			}
   831			metaImport2, err := matchGoImport(imports, importPath)
   832			if err != nil || mmi != metaImport2 {
   833				return nil, fmt.Errorf("%s and %s disagree about go-import for %s", resp.URL, url, mmi.Prefix)
   834			}
   835		}
   836	
   837		if err := validateRepoRoot(mmi.RepoRoot); err != nil {
   838			return nil, fmt.Errorf("%s: invalid repo root %q: %v", resp.URL, mmi.RepoRoot, err)
   839		}
   840		vcs := vcsByCmd(mmi.VCS)
   841		if vcs == nil && mmi.VCS != "mod" {
   842			return nil, fmt.Errorf("%s: unknown vcs %q", resp.URL, mmi.VCS)
   843		}
   844	
   845		rr := &RepoRoot{
   846			Repo:     mmi.RepoRoot,
   847			Root:     mmi.Prefix,
   848			IsCustom: true,
   849			VCS:      mmi.VCS,
   850			vcs:      vcs,
   851		}
   852		return rr, nil
   853	}
   854	
   855	// validateRepoRoot returns an error if repoRoot does not seem to be
   856	// a valid URL with scheme.
   857	func validateRepoRoot(repoRoot string) error {
   858		url, err := urlpkg.Parse(repoRoot)
   859		if err != nil {
   860			return err
   861		}
   862		if url.Scheme == "" {
   863			return errors.New("no scheme")
   864		}
   865		if url.Scheme == "file" {
   866			return errors.New("file scheme disallowed")
   867		}
   868		return nil
   869	}
   870	
   871	var fetchGroup singleflight.Group
   872	var (
   873		fetchCacheMu sync.Mutex
   874		fetchCache   = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix
   875	)
   876	
   877	// metaImportsForPrefix takes a package's root import path as declared in a <meta> tag
   878	// and returns its HTML discovery URL and the parsed metaImport lines
   879	// found on the page.
   880	//
   881	// The importPath is of the form "golang.org/x/tools".
   882	// It is an error if no imports are found.
   883	// url will still be valid if err != nil.
   884	// The returned url will be of the form "https://golang.org/x/tools?go-get=1"
   885	func metaImportsForPrefix(importPrefix string, mod ModuleMode, security web.SecurityMode) (*urlpkg.URL, []metaImport, error) {
   886		setCache := func(res fetchResult) (fetchResult, error) {
   887			fetchCacheMu.Lock()
   888			defer fetchCacheMu.Unlock()
   889			fetchCache[importPrefix] = res
   890			return res, nil
   891		}
   892	
   893		resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) {
   894			fetchCacheMu.Lock()
   895			if res, ok := fetchCache[importPrefix]; ok {
   896				fetchCacheMu.Unlock()
   897				return res, nil
   898			}
   899			fetchCacheMu.Unlock()
   900	
   901			url, err := urlForImportPath(importPrefix)
   902			if err != nil {
   903				return setCache(fetchResult{err: err})
   904			}
   905			resp, err := web.Get(security, url)
   906			if err != nil {
   907				return setCache(fetchResult{url: url, err: fmt.Errorf("fetching %s: %v", importPrefix, err)})
   908			}
   909			body := resp.Body
   910			defer body.Close()
   911			imports, err := parseMetaGoImports(body, mod)
   912			if err != nil {
   913				return setCache(fetchResult{url: url, err: fmt.Errorf("parsing %s: %v", resp.URL, err)})
   914			}
   915			if len(imports) == 0 {
   916				err = fmt.Errorf("fetching %s: no go-import meta tag found in %s", importPrefix, resp.URL)
   917			}
   918			return setCache(fetchResult{url: url, imports: imports, err: err})
   919		})
   920		res := resi.(fetchResult)
   921		return res.url, res.imports, res.err
   922	}
   923	
   924	type fetchResult struct {
   925		url     *urlpkg.URL
   926		imports []metaImport
   927		err     error
   928	}
   929	
   930	// metaImport represents the parsed <meta name="go-import"
   931	// content="prefix vcs reporoot" /> tags from HTML files.
   932	type metaImport struct {
   933		Prefix, VCS, RepoRoot string
   934	}
   935	
   936	// pathPrefix reports whether sub is a prefix of s,
   937	// only considering entire path components.
   938	func pathPrefix(s, sub string) bool {
   939		// strings.HasPrefix is necessary but not sufficient.
   940		if !strings.HasPrefix(s, sub) {
   941			return false
   942		}
   943		// The remainder after the prefix must either be empty or start with a slash.
   944		rem := s[len(sub):]
   945		return rem == "" || rem[0] == '/'
   946	}
   947	
   948	// A ImportMismatchError is returned where metaImport/s are present
   949	// but none match our import path.
   950	type ImportMismatchError struct {
   951		importPath string
   952		mismatches []string // the meta imports that were discarded for not matching our importPath
   953	}
   954	
   955	func (m ImportMismatchError) Error() string {
   956		formattedStrings := make([]string, len(m.mismatches))
   957		for i, pre := range m.mismatches {
   958			formattedStrings[i] = fmt.Sprintf("meta tag %s did not match import path %s", pre, m.importPath)
   959		}
   960		return strings.Join(formattedStrings, ", ")
   961	}
   962	
   963	// matchGoImport returns the metaImport from imports matching importPath.
   964	// An error is returned if there are multiple matches.
   965	// errNoMatch is returned if none match.
   966	func matchGoImport(imports []metaImport, importPath string) (metaImport, error) {
   967		match := -1
   968	
   969		errImportMismatch := ImportMismatchError{importPath: importPath}
   970		for i, im := range imports {
   971			if !pathPrefix(importPath, im.Prefix) {
   972				errImportMismatch.mismatches = append(errImportMismatch.mismatches, im.Prefix)
   973				continue
   974			}
   975	
   976			if match >= 0 {
   977				if imports[match].VCS == "mod" && im.VCS != "mod" {
   978					// All the mod entries precede all the non-mod entries.
   979					// We have a mod entry and don't care about the rest,
   980					// matching or not.
   981					break
   982				}
   983				return metaImport{}, fmt.Errorf("multiple meta tags match import path %q", importPath)
   984			}
   985			match = i
   986		}
   987	
   988		if match == -1 {
   989			return metaImport{}, errImportMismatch
   990		}
   991		return imports[match], nil
   992	}
   993	
   994	// expand rewrites s to replace {k} with match[k] for each key k in match.
   995	func expand(match map[string]string, s string) string {
   996		// We want to replace each match exactly once, and the result of expansion
   997		// must not depend on the iteration order through the map.
   998		// A strings.Replacer has exactly the properties we're looking for.
   999		oldNew := make([]string, 0, 2*len(match))
  1000		for k, v := range match {
  1001			oldNew = append(oldNew, "{"+k+"}", v)
  1002		}
  1003		return strings.NewReplacer(oldNew...).Replace(s)
  1004	}
  1005	
  1006	// vcsPaths defines the meaning of import paths referring to
  1007	// commonly-used VCS hosting sites (github.com/user/dir)
  1008	// and import paths referring to a fully-qualified importPath
  1009	// containing a VCS type (foo.com/repo.git/dir)
  1010	var vcsPaths = []*vcsPath{
  1011		// Github
  1012		{
  1013			prefix: "github.com/",
  1014			regexp: lazyregexp.New(`^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`),
  1015			vcs:    "git",
  1016			repo:   "https://{root}",
  1017			check:  noVCSSuffix,
  1018		},
  1019	
  1020		// Bitbucket
  1021		{
  1022			prefix: "bitbucket.org/",
  1023			regexp: lazyregexp.New(`^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`),
  1024			repo:   "https://{root}",
  1025			check:  bitbucketVCS,
  1026		},
  1027	
  1028		// IBM DevOps Services (JazzHub)
  1029		{
  1030			prefix: "hub.jazz.net/git/",
  1031			regexp: lazyregexp.New(`^(?P<root>hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`),
  1032			vcs:    "git",
  1033			repo:   "https://{root}",
  1034			check:  noVCSSuffix,
  1035		},
  1036	
  1037		// Git at Apache
  1038		{
  1039			prefix: "git.apache.org/",
  1040			regexp: lazyregexp.New(`^(?P<root>git\.apache\.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`),
  1041			vcs:    "git",
  1042			repo:   "https://{root}",
  1043		},
  1044	
  1045		// Git at OpenStack
  1046		{
  1047			prefix: "git.openstack.org/",
  1048			regexp: lazyregexp.New(`^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`),
  1049			vcs:    "git",
  1050			repo:   "https://{root}",
  1051		},
  1052	
  1053		// chiselapp.com for fossil
  1054		{
  1055			prefix: "chiselapp.com/",
  1056			regexp: lazyregexp.New(`^(?P<root>chiselapp\.com/user/[A-Za-z0-9]+/repository/[A-Za-z0-9_.\-]+)$`),
  1057			vcs:    "fossil",
  1058			repo:   "https://{root}",
  1059		},
  1060	
  1061		// General syntax for any server.
  1062		// Must be last.
  1063		{
  1064			regexp: lazyregexp.New(`(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`),
  1065			ping:   true,
  1066		},
  1067	}
  1068	
  1069	// vcsPathsAfterDynamic gives additional vcsPaths entries
  1070	// to try after the dynamic HTML check.
  1071	// This gives those sites a chance to introduce <meta> tags
  1072	// as part of a graceful transition away from the hard-coded logic.
  1073	var vcsPathsAfterDynamic = []*vcsPath{
  1074		// Launchpad. See golang.org/issue/11436.
  1075		{
  1076			prefix: "launchpad.net/",
  1077			regexp: lazyregexp.New(`^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`),
  1078			vcs:    "bzr",
  1079			repo:   "https://{root}",
  1080			check:  launchpadVCS,
  1081		},
  1082	}
  1083	
  1084	// noVCSSuffix checks that the repository name does not
  1085	// end in .foo for any version control system foo.
  1086	// The usual culprit is ".git".
  1087	func noVCSSuffix(match map[string]string) error {
  1088		repo := match["repo"]
  1089		for _, vcs := range vcsList {
  1090			if strings.HasSuffix(repo, "."+vcs.cmd) {
  1091				return fmt.Errorf("invalid version control suffix in %s path", match["prefix"])
  1092			}
  1093		}
  1094		return nil
  1095	}
  1096	
  1097	// bitbucketVCS determines the version control system for a
  1098	// Bitbucket repository, by using the Bitbucket API.
  1099	func bitbucketVCS(match map[string]string) error {
  1100		if err := noVCSSuffix(match); err != nil {
  1101			return err
  1102		}
  1103	
  1104		var resp struct {
  1105			SCM string `json:"scm"`
  1106		}
  1107		url := &urlpkg.URL{
  1108			Scheme:   "https",
  1109			Host:     "api.bitbucket.org",
  1110			Path:     expand(match, "/2.0/repositories/{bitname}"),
  1111			RawQuery: "fields=scm",
  1112		}
  1113		data, err := web.GetBytes(url)
  1114		if err != nil {
  1115			if httpErr, ok := err.(*web.HTTPError); ok && httpErr.StatusCode == 403 {
  1116				// this may be a private repository. If so, attempt to determine which
  1117				// VCS it uses. See issue 5375.
  1118				root := match["root"]
  1119				for _, vcs := range []string{"git", "hg"} {
  1120					if vcsByCmd(vcs).ping("https", root) == nil {
  1121						resp.SCM = vcs
  1122						break
  1123					}
  1124				}
  1125			}
  1126	
  1127			if resp.SCM == "" {
  1128				return err
  1129			}
  1130		} else {
  1131			if err := json.Unmarshal(data, &resp); err != nil {
  1132				return fmt.Errorf("decoding %s: %v", url, err)
  1133			}
  1134		}
  1135	
  1136		if vcsByCmd(resp.SCM) != nil {
  1137			match["vcs"] = resp.SCM
  1138			if resp.SCM == "git" {
  1139				match["repo"] += ".git"
  1140			}
  1141			return nil
  1142		}
  1143	
  1144		return fmt.Errorf("unable to detect version control system for bitbucket.org/ path")
  1145	}
  1146	
  1147	// launchpadVCS solves the ambiguity for "lp.net/project/foo". In this case,
  1148	// "foo" could be a series name registered in Launchpad with its own branch,
  1149	// and it could also be the name of a directory within the main project
  1150	// branch one level up.
  1151	func launchpadVCS(match map[string]string) error {
  1152		if match["project"] == "" || match["series"] == "" {
  1153			return nil
  1154		}
  1155		url := &urlpkg.URL{
  1156			Scheme: "https",
  1157			Host:   "code.launchpad.net",
  1158			Path:   expand(match, "/{project}{series}/.bzr/branch-format"),
  1159		}
  1160		_, err := web.GetBytes(url)
  1161		if err != nil {
  1162			match["root"] = expand(match, "launchpad.net/{project}")
  1163			match["repo"] = expand(match, "https://{root}")
  1164		}
  1165		return nil
  1166	}
  1167	

View as plain text