...

Source file src/pkg/cmd/go/internal/sumweb/client.go

     1	// Copyright 2019 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package sumweb
     6	
     7	import (
     8		"bytes"
     9		"errors"
    10		"fmt"
    11		"strings"
    12		"sync"
    13		"sync/atomic"
    14	
    15		"cmd/go/internal/note"
    16		"cmd/go/internal/str"
    17		"cmd/go/internal/tlog"
    18	)
    19	
    20	// A Client provides the external operations
    21	// (file caching, HTTP fetches, and so on)
    22	// needed to implement the HTTP client Conn.
    23	// The methods must be safe for concurrent use by multiple goroutines.
    24	type Client interface {
    25		// ReadRemote reads and returns the content served at the given path
    26		// on the remote database server. The path begins with "/lookup" or "/tile/".
    27		// It is the implementation's responsibility to turn that path into a full URL
    28		// and make the HTTP request. ReadRemote should return an error for
    29		// any non-200 HTTP response status.
    30		ReadRemote(path string) ([]byte, error)
    31	
    32		// ReadConfig reads and returns the content of the named configuration file.
    33		// There are only a fixed set of configuration files.
    34		//
    35		// "key" returns a file containing the verifier key for the server.
    36		//
    37		// serverName + "/latest" returns a file containing the latest known
    38		// signed tree from the server. It is read and written (using WriteConfig).
    39		// To signal that the client wishes to start with an "empty" signed tree,
    40		// ReadConfig can return a successful empty result (0 bytes of data).
    41		ReadConfig(file string) ([]byte, error)
    42	
    43		// WriteConfig updates the content of the named configuration file,
    44		// changing it from the old []byte to the new []byte.
    45		// If the old []byte does not match the stored configuration,
    46		// WriteConfig must return ErrWriteConflict.
    47		// Otherwise, WriteConfig should atomically replace old with new.
    48		WriteConfig(file string, old, new []byte) error
    49	
    50		// ReadCache reads and returns the content of the named cache file.
    51		// Any returned error will be treated as equivalent to the file not existing.
    52		// There can be arbitrarily many cache files, such as:
    53		//	serverName/lookup/pkg@version
    54		//	serverName/tile/8/1/x123/456
    55		ReadCache(file string) ([]byte, error)
    56	
    57		// WriteCache writes the named cache file.
    58		WriteCache(file string, data []byte)
    59	
    60		// Log prints the given log message (such as with log.Print)
    61		Log(msg string)
    62	
    63		// SecurityError prints the given security error log message.
    64		// The Conn returns ErrSecurity from any operation that invokes SecurityError,
    65		// but the return value is mainly for testing. In a real program,
    66		// SecurityError should typically print the message and call log.Fatal or os.Exit.
    67		SecurityError(msg string)
    68	}
    69	
    70	// ErrWriteConflict signals a write conflict during Client.WriteConfig.
    71	var ErrWriteConflict = errors.New("write conflict")
    72	
    73	// ErrSecurity is returned by Conn operations that invoke Client.SecurityError.
    74	var ErrSecurity = errors.New("security error: misbehaving server")
    75	
    76	// A Conn is a client connection to a go.sum database.
    77	// All the methods are safe for simultaneous use by multiple goroutines.
    78	type Conn struct {
    79		client Client // client-provided external world
    80	
    81		didLookup uint32
    82	
    83		// one-time initialized data
    84		initOnce   sync.Once
    85		initErr    error          // init error, if any
    86		name       string         // name of accepted verifier
    87		verifiers  note.Verifiers // accepted verifiers (just one, but Verifiers for note.Open)
    88		tileReader tileReader
    89		tileHeight int
    90		nosumdb    string
    91	
    92		record    parCache // cache of record lookup, keyed by path@vers
    93		tileCache parCache // cache of c.readTile, keyed by tile
    94	
    95		latestMu  sync.Mutex
    96		latest    tlog.Tree // latest known tree head
    97		latestMsg []byte    // encoded signed note for latest
    98	
    99		tileSavedMu sync.Mutex
   100		tileSaved   map[tlog.Tile]bool // which tiles have been saved using c.client.WriteCache already
   101	}
   102	
   103	// NewConn returns a new Conn using the given Client.
   104	func NewConn(client Client) *Conn {
   105		return &Conn{
   106			client: client,
   107		}
   108	}
   109	
   110	// init initiailzes the conn (if not already initialized)
   111	// and returns any initialization error.
   112	func (c *Conn) init() error {
   113		c.initOnce.Do(c.initWork)
   114		return c.initErr
   115	}
   116	
   117	// initWork does the actual initialization work.
   118	func (c *Conn) initWork() {
   119		defer func() {
   120			if c.initErr != nil {
   121				c.initErr = fmt.Errorf("initializing sumweb.Conn: %v", c.initErr)
   122			}
   123		}()
   124	
   125		c.tileReader.c = c
   126		if c.tileHeight == 0 {
   127			c.tileHeight = 8
   128		}
   129		c.tileSaved = make(map[tlog.Tile]bool)
   130	
   131		vkey, err := c.client.ReadConfig("key")
   132		if err != nil {
   133			c.initErr = err
   134			return
   135		}
   136		verifier, err := note.NewVerifier(strings.TrimSpace(string(vkey)))
   137		if err != nil {
   138			c.initErr = err
   139			return
   140		}
   141		c.verifiers = note.VerifierList(verifier)
   142		c.name = verifier.Name()
   143	
   144		data, err := c.client.ReadConfig(c.name + "/latest")
   145		if err != nil {
   146			c.initErr = err
   147			return
   148		}
   149		if err := c.mergeLatest(data); err != nil {
   150			c.initErr = err
   151			return
   152		}
   153	}
   154	
   155	// SetTileHeight sets the tile height for the Conn.
   156	// Any call to SetTileHeight must happen before the first call to Lookup.
   157	// If SetTileHeight is not called, the Conn defaults to tile height 8.
   158	func (c *Conn) SetTileHeight(height int) {
   159		if atomic.LoadUint32(&c.didLookup) != 0 {
   160			panic("SetTileHeight used after Lookup")
   161		}
   162		if c.tileHeight != 0 {
   163			panic("multiple calls to SetTileHeight")
   164		}
   165		c.tileHeight = height
   166	}
   167	
   168	// SetGONOSUMDB sets the list of comma-separated GONOSUMDB patterns for the Conn.
   169	// For any module path matching one of the patterns,
   170	// Lookup will return ErrGONOSUMDB.
   171	// Any call to SetGONOSUMDB must happen before the first call to Lookup.
   172	func (c *Conn) SetGONOSUMDB(list string) {
   173		if atomic.LoadUint32(&c.didLookup) != 0 {
   174			panic("SetGONOSUMDB used after Lookup")
   175		}
   176		if c.nosumdb != "" {
   177			panic("multiple calls to SetGONOSUMDB")
   178		}
   179		c.nosumdb = list
   180	}
   181	
   182	// ErrGONOSUMDB is returned by Lookup for paths that match
   183	// a pattern listed in the GONOSUMDB list (set by SetGONOSUMDB,
   184	// usually from the environment variable).
   185	var ErrGONOSUMDB = errors.New("skipped (listed in GONOSUMDB)")
   186	
   187	func (c *Conn) skip(target string) bool {
   188		return str.GlobsMatchPath(c.nosumdb, target)
   189	}
   190	
   191	// Lookup returns the go.sum lines for the given module path and version.
   192	// The version may end in a /go.mod suffix, in which case Lookup returns
   193	// the go.sum lines for the module's go.mod-only hash.
   194	func (c *Conn) Lookup(path, vers string) (lines []string, err error) {
   195		atomic.StoreUint32(&c.didLookup, 1)
   196	
   197		if c.skip(path) {
   198			return nil, ErrGONOSUMDB
   199		}
   200	
   201		defer func() {
   202			if err != nil {
   203				err = fmt.Errorf("%s@%s: %v", path, vers, err)
   204			}
   205		}()
   206	
   207		if err := c.init(); err != nil {
   208			return nil, err
   209		}
   210	
   211		// Prepare encoded cache filename / URL.
   212		epath, err := encodePath(path)
   213		if err != nil {
   214			return nil, err
   215		}
   216		evers, err := encodeVersion(strings.TrimSuffix(vers, "/go.mod"))
   217		if err != nil {
   218			return nil, err
   219		}
   220		file := c.name + "/lookup/" + epath + "@" + evers
   221		remotePath := "/lookup/" + epath + "@" + evers
   222	
   223		// Fetch the data.
   224		// The lookupCache avoids redundant ReadCache/GetURL operations
   225		// (especially since go.sum lines tend to come in pairs for a given
   226		// path and version) and also avoids having multiple of the same
   227		// request in flight at once.
   228		type cached struct {
   229			data []byte
   230			err  error
   231		}
   232		result := c.record.Do(file, func() interface{} {
   233			// Try the on-disk cache, or else get from web.
   234			writeCache := false
   235			data, err := c.client.ReadCache(file)
   236			if err != nil {
   237				data, err = c.client.ReadRemote(remotePath)
   238				if err != nil {
   239					return cached{nil, err}
   240				}
   241				writeCache = true
   242			}
   243	
   244			// Validate the record before using it for anything.
   245			id, text, treeMsg, err := tlog.ParseRecord(data)
   246			if err != nil {
   247				return cached{nil, err}
   248			}
   249			if err := c.mergeLatest(treeMsg); err != nil {
   250				return cached{nil, err}
   251			}
   252			if err := c.checkRecord(id, text); err != nil {
   253				return cached{nil, err}
   254			}
   255	
   256			// Now that we've validated the record,
   257			// save it to the on-disk cache (unless that's where it came from).
   258			if writeCache {
   259				c.client.WriteCache(file, data)
   260			}
   261	
   262			return cached{data, nil}
   263		}).(cached)
   264		if result.err != nil {
   265			return nil, result.err
   266		}
   267	
   268		// Extract the lines for the specific version we want
   269		// (with or without /go.mod).
   270		prefix := path + " " + vers + " "
   271		var hashes []string
   272		for _, line := range strings.Split(string(result.data), "\n") {
   273			if strings.HasPrefix(line, prefix) {
   274				hashes = append(hashes, line)
   275			}
   276		}
   277		return hashes, nil
   278	}
   279	
   280	// mergeLatest merges the tree head in msg
   281	// with the Conn's current latest tree head,
   282	// ensuring the result is a consistent timeline.
   283	// If the result is inconsistent, mergeLatest calls c.client.SecurityError
   284	// with a detailed security error message and then
   285	// (only if c.client.SecurityError does not exit the program) returns ErrSecurity.
   286	// If the Conn's current latest tree head moves forward,
   287	// mergeLatest updates the underlying configuration file as well,
   288	// taking care to merge any independent updates to that configuration.
   289	func (c *Conn) mergeLatest(msg []byte) error {
   290		// Merge msg into our in-memory copy of the latest tree head.
   291		when, err := c.mergeLatestMem(msg)
   292		if err != nil {
   293			return err
   294		}
   295		if when != msgFuture {
   296			// msg matched our present or was in the past.
   297			// No change to our present, so no update of config file.
   298			return nil
   299		}
   300	
   301		// Flush our extended timeline back out to the configuration file.
   302		// If the configuration file has been updated in the interim,
   303		// we need to merge any updates made there as well.
   304		// Note that writeConfig is an atomic compare-and-swap.
   305		for {
   306			msg, err := c.client.ReadConfig(c.name + "/latest")
   307			if err != nil {
   308				return err
   309			}
   310			when, err := c.mergeLatestMem(msg)
   311			if err != nil {
   312				return err
   313			}
   314			if when != msgPast {
   315				// msg matched our present or was from the future,
   316				// and now our in-memory copy matches.
   317				return nil
   318			}
   319	
   320			// msg (== config) is in the past, so we need to update it.
   321			c.latestMu.Lock()
   322			latestMsg := c.latestMsg
   323			c.latestMu.Unlock()
   324			if err := c.client.WriteConfig(c.name+"/latest", msg, latestMsg); err != ErrWriteConflict {
   325				// Success or a non-write-conflict error.
   326				return err
   327			}
   328		}
   329	}
   330	
   331	const (
   332		msgPast = 1 + iota
   333		msgNow
   334		msgFuture
   335	)
   336	
   337	// mergeLatestMem is like mergeLatest but is only concerned with
   338	// updating the in-memory copy of the latest tree head (c.latest)
   339	// not the configuration file.
   340	// The when result explains when msg happened relative to our
   341	// previous idea of c.latest:
   342	// msgPast means msg was from before c.latest,
   343	// msgNow means msg was exactly c.latest, and
   344	// msgFuture means msg was from after c.latest, which has now been updated.
   345	func (c *Conn) mergeLatestMem(msg []byte) (when int, err error) {
   346		if len(msg) == 0 {
   347			// Accept empty msg as the unsigned, empty timeline.
   348			c.latestMu.Lock()
   349			latest := c.latest
   350			c.latestMu.Unlock()
   351			if latest.N == 0 {
   352				return msgNow, nil
   353			}
   354			return msgPast, nil
   355		}
   356	
   357		note, err := note.Open(msg, c.verifiers)
   358		if err != nil {
   359			return 0, fmt.Errorf("reading tree note: %v\nnote:\n%s", err, msg)
   360		}
   361		tree, err := tlog.ParseTree([]byte(note.Text))
   362		if err != nil {
   363			return 0, fmt.Errorf("reading tree: %v\ntree:\n%s", err, note.Text)
   364		}
   365	
   366		// Other lookups may be calling mergeLatest with other heads,
   367		// so c.latest is changing underfoot. We don't want to hold the
   368		// c.mu lock during tile fetches, so loop trying to update c.latest.
   369		c.latestMu.Lock()
   370		latest := c.latest
   371		latestMsg := c.latestMsg
   372		c.latestMu.Unlock()
   373	
   374		for {
   375			// If the tree head looks old, check that it is on our timeline.
   376			if tree.N <= latest.N {
   377				if err := c.checkTrees(tree, msg, latest, latestMsg); err != nil {
   378					return 0, err
   379				}
   380				if tree.N < latest.N {
   381					return msgPast, nil
   382				}
   383				return msgNow, nil
   384			}
   385	
   386			// The tree head looks new. Check that we are on its timeline and try to move our timeline forward.
   387			if err := c.checkTrees(latest, latestMsg, tree, msg); err != nil {
   388				return 0, err
   389			}
   390	
   391			// Install our msg if possible.
   392			// Otherwise we will go around again.
   393			c.latestMu.Lock()
   394			installed := false
   395			if c.latest == latest {
   396				installed = true
   397				c.latest = tree
   398				c.latestMsg = msg
   399			} else {
   400				latest = c.latest
   401				latestMsg = c.latestMsg
   402			}
   403			c.latestMu.Unlock()
   404	
   405			if installed {
   406				return msgFuture, nil
   407			}
   408		}
   409	}
   410	
   411	// checkTrees checks that older (from olderNote) is contained in newer (from newerNote).
   412	// If an error occurs, such as malformed data or a network problem, checkTrees returns that error.
   413	// If on the other hand checkTrees finds evidence of misbehavior, it prepares a detailed
   414	// message and calls log.Fatal.
   415	func (c *Conn) checkTrees(older tlog.Tree, olderNote []byte, newer tlog.Tree, newerNote []byte) error {
   416		thr := tlog.TileHashReader(newer, &c.tileReader)
   417		h, err := tlog.TreeHash(older.N, thr)
   418		if err != nil {
   419			if older.N == newer.N {
   420				return fmt.Errorf("checking tree#%d: %v", older.N, err)
   421			}
   422			return fmt.Errorf("checking tree#%d against tree#%d: %v", older.N, newer.N, err)
   423		}
   424		if h == older.Hash {
   425			return nil
   426		}
   427	
   428		// Detected a fork in the tree timeline.
   429		// Start by reporting the inconsistent signed tree notes.
   430		var buf bytes.Buffer
   431		fmt.Fprintf(&buf, "SECURITY ERROR\n")
   432		fmt.Fprintf(&buf, "go.sum database server misbehavior detected!\n\n")
   433		indent := func(b []byte) []byte {
   434			return bytes.Replace(b, []byte("\n"), []byte("\n\t"), -1)
   435		}
   436		fmt.Fprintf(&buf, "old database:\n\t%s\n", indent(olderNote))
   437		fmt.Fprintf(&buf, "new database:\n\t%s\n", indent(newerNote))
   438	
   439		// The notes alone are not enough to prove the inconsistency.
   440		// We also need to show that the newer note's tree hash for older.N
   441		// does not match older.Hash. The consumer of this report could
   442		// of course consult the server to try to verify the inconsistency,
   443		// but we are holding all the bits we need to prove it right now,
   444		// so we might as well print them and make the report not depend
   445		// on the continued availability of the misbehaving server.
   446		// Preparing this data only reuses the tiled hashes needed for
   447		// tlog.TreeHash(older.N, thr) above, so assuming thr is caching tiles,
   448		// there are no new access to the server here, and these operations cannot fail.
   449		fmt.Fprintf(&buf, "proof of misbehavior:\n\t%v", h)
   450		if p, err := tlog.ProveTree(newer.N, older.N, thr); err != nil {
   451			fmt.Fprintf(&buf, "\tinternal error: %v\n", err)
   452		} else if err := tlog.CheckTree(p, newer.N, newer.Hash, older.N, h); err != nil {
   453			fmt.Fprintf(&buf, "\tinternal error: generated inconsistent proof\n")
   454		} else {
   455			for _, h := range p {
   456				fmt.Fprintf(&buf, "\n\t%v", h)
   457			}
   458		}
   459		c.client.SecurityError(buf.String())
   460		return ErrSecurity
   461	}
   462	
   463	// checkRecord checks that record #id's hash matches data.
   464	func (c *Conn) checkRecord(id int64, data []byte) error {
   465		c.latestMu.Lock()
   466		latest := c.latest
   467		c.latestMu.Unlock()
   468	
   469		if id >= latest.N {
   470			return fmt.Errorf("cannot validate record %d in tree of size %d", id, latest.N)
   471		}
   472		hashes, err := tlog.TileHashReader(latest, &c.tileReader).ReadHashes([]int64{tlog.StoredHashIndex(0, id)})
   473		if err != nil {
   474			return err
   475		}
   476		if hashes[0] == tlog.RecordHash(data) {
   477			return nil
   478		}
   479		return fmt.Errorf("cannot authenticate record data in server response")
   480	}
   481	
   482	// tileReader is a *Conn wrapper that implements tlog.TileReader.
   483	// The separate type avoids exposing the ReadTiles and SaveTiles
   484	// methods on Conn itself.
   485	type tileReader struct {
   486		c *Conn
   487	}
   488	
   489	func (r *tileReader) Height() int {
   490		return r.c.tileHeight
   491	}
   492	
   493	// ReadTiles reads and returns the requested tiles,
   494	// either from the on-disk cache or the server.
   495	func (r *tileReader) ReadTiles(tiles []tlog.Tile) ([][]byte, error) {
   496		// Read all the tiles in parallel.
   497		data := make([][]byte, len(tiles))
   498		errs := make([]error, len(tiles))
   499		var wg sync.WaitGroup
   500		for i, tile := range tiles {
   501			wg.Add(1)
   502			go func(i int, tile tlog.Tile) {
   503				defer wg.Done()
   504				data[i], errs[i] = r.c.readTile(tile)
   505			}(i, tile)
   506		}
   507		wg.Wait()
   508	
   509		for _, err := range errs {
   510			if err != nil {
   511				return nil, err
   512			}
   513		}
   514	
   515		return data, nil
   516	}
   517	
   518	// tileCacheKey returns the cache key for the tile.
   519	func (c *Conn) tileCacheKey(tile tlog.Tile) string {
   520		return c.name + "/" + tile.Path()
   521	}
   522	
   523	// tileRemotePath returns the remote path for the tile.
   524	func (c *Conn) tileRemotePath(tile tlog.Tile) string {
   525		return "/" + tile.Path()
   526	}
   527	
   528	// readTile reads a single tile, either from the on-disk cache or the server.
   529	func (c *Conn) readTile(tile tlog.Tile) ([]byte, error) {
   530		type cached struct {
   531			data []byte
   532			err  error
   533		}
   534	
   535		result := c.tileCache.Do(tile, func() interface{} {
   536			// Try the requested tile in on-disk cache.
   537			data, err := c.client.ReadCache(c.tileCacheKey(tile))
   538			if err == nil {
   539				c.markTileSaved(tile)
   540				return cached{data, nil}
   541			}
   542	
   543			// Try the full tile in on-disk cache (if requested tile not already full).
   544			// We only save authenticated tiles to the on-disk cache,
   545			// so the recreated prefix is equally authenticated.
   546			full := tile
   547			full.W = 1 << tile.H
   548			if tile != full {
   549				data, err := c.client.ReadCache(c.tileCacheKey(full))
   550				if err == nil {
   551					c.markTileSaved(tile) // don't save tile later; we already have full
   552					return cached{data[:len(data)/full.W*tile.W], nil}
   553				}
   554			}
   555	
   556			// Try requested tile from server.
   557			data, err = c.client.ReadRemote(c.tileRemotePath(tile))
   558			if err == nil {
   559				return cached{data, nil}
   560			}
   561	
   562			// Try full tile on server.
   563			// If the partial tile does not exist, it should be because
   564			// the tile has been completed and only the complete one
   565			// is available.
   566			if tile != full {
   567				data, err := c.client.ReadRemote(c.tileRemotePath(full))
   568				if err == nil {
   569					// Note: We could save the full tile in the on-disk cache here,
   570					// but we don't know if it is valid yet, and we will only find out
   571					// about the partial data, not the full data. So let SaveTiles
   572					// save the partial tile, and we'll just refetch the full tile later
   573					// once we can validate more (or all) of it.
   574					return cached{data[:len(data)/full.W*tile.W], nil}
   575				}
   576			}
   577	
   578			// Nothing worked.
   579			// Return the error from the server fetch for the requested (not full) tile.
   580			return cached{nil, err}
   581		}).(cached)
   582	
   583		return result.data, result.err
   584	}
   585	
   586	// markTileSaved records that tile is already present in the on-disk cache,
   587	// so that a future SaveTiles for that tile can be ignored.
   588	func (c *Conn) markTileSaved(tile tlog.Tile) {
   589		c.tileSavedMu.Lock()
   590		c.tileSaved[tile] = true
   591		c.tileSavedMu.Unlock()
   592	}
   593	
   594	// SaveTiles saves the now validated tiles.
   595	func (r *tileReader) SaveTiles(tiles []tlog.Tile, data [][]byte) {
   596		c := r.c
   597	
   598		// Determine which tiles need saving.
   599		// (Tiles that came from the cache need not be saved back.)
   600		save := make([]bool, len(tiles))
   601		c.tileSavedMu.Lock()
   602		for i, tile := range tiles {
   603			if !c.tileSaved[tile] {
   604				save[i] = true
   605				c.tileSaved[tile] = true
   606			}
   607		}
   608		c.tileSavedMu.Unlock()
   609	
   610		for i, tile := range tiles {
   611			if save[i] {
   612				// If WriteCache fails here (out of disk space? i/o error?),
   613				// c.tileSaved[tile] is still true and we will not try to write it again.
   614				// Next time we run maybe we'll redownload it again and be
   615				// more successful.
   616				c.client.WriteCache(c.name+"/"+tile.Path(), data[i])
   617			}
   618		}
   619	}
   620	

View as plain text