...

Source file src/pkg/cmd/go/internal/cache/cache.go

     1	// Copyright 2017 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package cache implements a build artifact cache.
     6	package cache
     7	
     8	import (
     9		"bytes"
    10		"crypto/sha256"
    11		"encoding/hex"
    12		"errors"
    13		"fmt"
    14		"io"
    15		"io/ioutil"
    16		"os"
    17		"path/filepath"
    18		"strconv"
    19		"strings"
    20		"time"
    21	
    22		"cmd/go/internal/renameio"
    23	)
    24	
    25	// An ActionID is a cache action key, the hash of a complete description of a
    26	// repeatable computation (command line, environment variables,
    27	// input file contents, executable contents).
    28	type ActionID [HashSize]byte
    29	
    30	// An OutputID is a cache output key, the hash of an output of a computation.
    31	type OutputID [HashSize]byte
    32	
    33	// A Cache is a package cache, backed by a file system directory tree.
    34	type Cache struct {
    35		dir string
    36		now func() time.Time
    37	}
    38	
    39	// Open opens and returns the cache in the given directory.
    40	//
    41	// It is safe for multiple processes on a single machine to use the
    42	// same cache directory in a local file system simultaneously.
    43	// They will coordinate using operating system file locks and may
    44	// duplicate effort but will not corrupt the cache.
    45	//
    46	// However, it is NOT safe for multiple processes on different machines
    47	// to share a cache directory (for example, if the directory were stored
    48	// in a network file system). File locking is notoriously unreliable in
    49	// network file systems and may not suffice to protect the cache.
    50	//
    51	func Open(dir string) (*Cache, error) {
    52		info, err := os.Stat(dir)
    53		if err != nil {
    54			return nil, err
    55		}
    56		if !info.IsDir() {
    57			return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
    58		}
    59		for i := 0; i < 256; i++ {
    60			name := filepath.Join(dir, fmt.Sprintf("%02x", i))
    61			if err := os.MkdirAll(name, 0777); err != nil {
    62				return nil, err
    63			}
    64		}
    65		c := &Cache{
    66			dir: dir,
    67			now: time.Now,
    68		}
    69		return c, nil
    70	}
    71	
    72	// fileName returns the name of the file corresponding to the given id.
    73	func (c *Cache) fileName(id [HashSize]byte, key string) string {
    74		return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
    75	}
    76	
    77	var errMissing = errors.New("cache entry not found")
    78	
    79	const (
    80		// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
    81		hexSize   = HashSize * 2
    82		entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
    83	)
    84	
    85	// verify controls whether to run the cache in verify mode.
    86	// In verify mode, the cache always returns errMissing from Get
    87	// but then double-checks in Put that the data being written
    88	// exactly matches any existing entry. This provides an easy
    89	// way to detect program behavior that would have been different
    90	// had the cache entry been returned from Get.
    91	//
    92	// verify is enabled by setting the environment variable
    93	// GODEBUG=gocacheverify=1.
    94	var verify = false
    95	
    96	// DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
    97	var DebugTest = false
    98	
    99	func init() { initEnv() }
   100	
   101	func initEnv() {
   102		verify = false
   103		debugHash = false
   104		debug := strings.Split(os.Getenv("GODEBUG"), ",")
   105		for _, f := range debug {
   106			if f == "gocacheverify=1" {
   107				verify = true
   108			}
   109			if f == "gocachehash=1" {
   110				debugHash = true
   111			}
   112			if f == "gocachetest=1" {
   113				DebugTest = true
   114			}
   115		}
   116	}
   117	
   118	// Get looks up the action ID in the cache,
   119	// returning the corresponding output ID and file size, if any.
   120	// Note that finding an output ID does not guarantee that the
   121	// saved file for that output ID is still available.
   122	func (c *Cache) Get(id ActionID) (Entry, error) {
   123		if verify {
   124			return Entry{}, errMissing
   125		}
   126		return c.get(id)
   127	}
   128	
   129	type Entry struct {
   130		OutputID OutputID
   131		Size     int64
   132		Time     time.Time
   133	}
   134	
   135	// get is Get but does not respect verify mode, so that Put can use it.
   136	func (c *Cache) get(id ActionID) (Entry, error) {
   137		missing := func() (Entry, error) {
   138			return Entry{}, errMissing
   139		}
   140		f, err := os.Open(c.fileName(id, "a"))
   141		if err != nil {
   142			return missing()
   143		}
   144		defer f.Close()
   145		entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
   146		if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF {
   147			return missing()
   148		}
   149		if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
   150			return missing()
   151		}
   152		eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
   153		eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
   154		esize, entry := entry[1:1+20], entry[1+20:]
   155		etime, entry := entry[1:1+20], entry[1+20:]
   156		var buf [HashSize]byte
   157		if _, err := hex.Decode(buf[:], eid); err != nil || buf != id {
   158			return missing()
   159		}
   160		if _, err := hex.Decode(buf[:], eout); err != nil {
   161			return missing()
   162		}
   163		i := 0
   164		for i < len(esize) && esize[i] == ' ' {
   165			i++
   166		}
   167		size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
   168		if err != nil || size < 0 {
   169			return missing()
   170		}
   171		i = 0
   172		for i < len(etime) && etime[i] == ' ' {
   173			i++
   174		}
   175		tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
   176		if err != nil || tm < 0 {
   177			return missing()
   178		}
   179	
   180		c.used(c.fileName(id, "a"))
   181	
   182		return Entry{buf, size, time.Unix(0, tm)}, nil
   183	}
   184	
   185	// GetFile looks up the action ID in the cache and returns
   186	// the name of the corresponding data file.
   187	func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
   188		entry, err = c.Get(id)
   189		if err != nil {
   190			return "", Entry{}, err
   191		}
   192		file = c.OutputFile(entry.OutputID)
   193		info, err := os.Stat(file)
   194		if err != nil || info.Size() != entry.Size {
   195			return "", Entry{}, errMissing
   196		}
   197		return file, entry, nil
   198	}
   199	
   200	// GetBytes looks up the action ID in the cache and returns
   201	// the corresponding output bytes.
   202	// GetBytes should only be used for data that can be expected to fit in memory.
   203	func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
   204		entry, err := c.Get(id)
   205		if err != nil {
   206			return nil, entry, err
   207		}
   208		data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID))
   209		if sha256.Sum256(data) != entry.OutputID {
   210			return nil, entry, errMissing
   211		}
   212		return data, entry, nil
   213	}
   214	
   215	// OutputFile returns the name of the cache file storing output with the given OutputID.
   216	func (c *Cache) OutputFile(out OutputID) string {
   217		file := c.fileName(out, "d")
   218		c.used(file)
   219		return file
   220	}
   221	
   222	// Time constants for cache expiration.
   223	//
   224	// We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
   225	// to avoid causing many unnecessary inode updates. The mtimes therefore
   226	// roughly reflect "time of last use" but may in fact be older by at most an hour.
   227	//
   228	// We scan the cache for entries to delete at most once per trimInterval (1 day).
   229	//
   230	// When we do scan the cache, we delete entries that have not been used for
   231	// at least trimLimit (5 days). Statistics gathered from a month of usage by
   232	// Go developers found that essentially all reuse of cached entries happened
   233	// within 5 days of the previous reuse. See golang.org/issue/22990.
   234	const (
   235		mtimeInterval = 1 * time.Hour
   236		trimInterval  = 24 * time.Hour
   237		trimLimit     = 5 * 24 * time.Hour
   238	)
   239	
   240	// used makes a best-effort attempt to update mtime on file,
   241	// so that mtime reflects cache access time.
   242	//
   243	// Because the reflection only needs to be approximate,
   244	// and to reduce the amount of disk activity caused by using
   245	// cache entries, used only updates the mtime if the current
   246	// mtime is more than an hour old. This heuristic eliminates
   247	// nearly all of the mtime updates that would otherwise happen,
   248	// while still keeping the mtimes useful for cache trimming.
   249	func (c *Cache) used(file string) {
   250		info, err := os.Stat(file)
   251		if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
   252			return
   253		}
   254		os.Chtimes(file, c.now(), c.now())
   255	}
   256	
   257	// Trim removes old cache entries that are likely not to be reused.
   258	func (c *Cache) Trim() {
   259		now := c.now()
   260	
   261		// We maintain in dir/trim.txt the time of the last completed cache trim.
   262		// If the cache has been trimmed recently enough, do nothing.
   263		// This is the common case.
   264		data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt"))
   265		t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
   266		if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
   267			return
   268		}
   269	
   270		// Trim each of the 256 subdirectories.
   271		// We subtract an additional mtimeInterval
   272		// to account for the imprecision of our "last used" mtimes.
   273		cutoff := now.Add(-trimLimit - mtimeInterval)
   274		for i := 0; i < 256; i++ {
   275			subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
   276			c.trimSubdir(subdir, cutoff)
   277		}
   278	
   279		// Ignore errors from here: if we don't write the complete timestamp, the
   280		// cache will appear older than it is, and we'll trim it again next time.
   281		renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666)
   282	}
   283	
   284	// trimSubdir trims a single cache subdirectory.
   285	func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
   286		// Read all directory entries from subdir before removing
   287		// any files, in case removing files invalidates the file offset
   288		// in the directory scan. Also, ignore error from f.Readdirnames,
   289		// because we don't care about reporting the error and we still
   290		// want to process any entries found before the error.
   291		f, err := os.Open(subdir)
   292		if err != nil {
   293			return
   294		}
   295		names, _ := f.Readdirnames(-1)
   296		f.Close()
   297	
   298		for _, name := range names {
   299			// Remove only cache entries (xxxx-a and xxxx-d).
   300			if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
   301				continue
   302			}
   303			entry := filepath.Join(subdir, name)
   304			info, err := os.Stat(entry)
   305			if err == nil && info.ModTime().Before(cutoff) {
   306				os.Remove(entry)
   307			}
   308		}
   309	}
   310	
   311	// putIndexEntry adds an entry to the cache recording that executing the action
   312	// with the given id produces an output with the given output id (hash) and size.
   313	func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
   314		// Note: We expect that for one reason or another it may happen
   315		// that repeating an action produces a different output hash
   316		// (for example, if the output contains a time stamp or temp dir name).
   317		// While not ideal, this is also not a correctness problem, so we
   318		// don't make a big deal about it. In particular, we leave the action
   319		// cache entries writable specifically so that they can be overwritten.
   320		//
   321		// Setting GODEBUG=gocacheverify=1 does make a big deal:
   322		// in verify mode we are double-checking that the cache entries
   323		// are entirely reproducible. As just noted, this may be unrealistic
   324		// in some cases but the check is also useful for shaking out real bugs.
   325		entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
   326		if verify && allowVerify {
   327			old, err := c.get(id)
   328			if err == nil && (old.OutputID != out || old.Size != size) {
   329				// panic to show stack trace, so we can see what code is generating this cache entry.
   330				msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
   331				panic(msg)
   332			}
   333		}
   334		file := c.fileName(id, "a")
   335	
   336		// Copy file to cache directory.
   337		mode := os.O_WRONLY | os.O_CREATE
   338		f, err := os.OpenFile(file, mode, 0666)
   339		if err != nil {
   340			return err
   341		}
   342		_, err = f.WriteString(entry)
   343		if err == nil {
   344			// Truncate the file only *after* writing it.
   345			// (This should be a no-op, but truncate just in case of previous corruption.)
   346			//
   347			// This differs from ioutil.WriteFile, which truncates to 0 *before* writing
   348			// via os.O_TRUNC. Truncating only after writing ensures that a second write
   349			// of the same content to the same file is idempotent, and does not — even
   350			// temporarily! — undo the effect of the first write.
   351			err = f.Truncate(int64(len(entry)))
   352		}
   353		if closeErr := f.Close(); err == nil {
   354			err = closeErr
   355		}
   356		if err != nil {
   357			// TODO(bcmills): This Remove potentially races with another go command writing to file.
   358			// Can we eliminate it?
   359			os.Remove(file)
   360			return err
   361		}
   362		os.Chtimes(file, c.now(), c.now()) // mainly for tests
   363	
   364		return nil
   365	}
   366	
   367	// Put stores the given output in the cache as the output for the action ID.
   368	// It may read file twice. The content of file must not change between the two passes.
   369	func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
   370		return c.put(id, file, true)
   371	}
   372	
   373	// PutNoVerify is like Put but disables the verify check
   374	// when GODEBUG=goverifycache=1 is set.
   375	// It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
   376	// like test output containing times and the like.
   377	func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
   378		return c.put(id, file, false)
   379	}
   380	
   381	func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
   382		// Compute output ID.
   383		h := sha256.New()
   384		if _, err := file.Seek(0, 0); err != nil {
   385			return OutputID{}, 0, err
   386		}
   387		size, err := io.Copy(h, file)
   388		if err != nil {
   389			return OutputID{}, 0, err
   390		}
   391		var out OutputID
   392		h.Sum(out[:0])
   393	
   394		// Copy to cached output file (if not already present).
   395		if err := c.copyFile(file, out, size); err != nil {
   396			return out, size, err
   397		}
   398	
   399		// Add to cache index.
   400		return out, size, c.putIndexEntry(id, out, size, allowVerify)
   401	}
   402	
   403	// PutBytes stores the given bytes in the cache as the output for the action ID.
   404	func (c *Cache) PutBytes(id ActionID, data []byte) error {
   405		_, _, err := c.Put(id, bytes.NewReader(data))
   406		return err
   407	}
   408	
   409	// copyFile copies file into the cache, expecting it to have the given
   410	// output ID and size, if that file is not present already.
   411	func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
   412		name := c.fileName(out, "d")
   413		info, err := os.Stat(name)
   414		if err == nil && info.Size() == size {
   415			// Check hash.
   416			if f, err := os.Open(name); err == nil {
   417				h := sha256.New()
   418				io.Copy(h, f)
   419				f.Close()
   420				var out2 OutputID
   421				h.Sum(out2[:0])
   422				if out == out2 {
   423					return nil
   424				}
   425			}
   426			// Hash did not match. Fall through and rewrite file.
   427		}
   428	
   429		// Copy file to cache directory.
   430		mode := os.O_RDWR | os.O_CREATE
   431		if err == nil && info.Size() > size { // shouldn't happen but fix in case
   432			mode |= os.O_TRUNC
   433		}
   434		f, err := os.OpenFile(name, mode, 0666)
   435		if err != nil {
   436			return err
   437		}
   438		defer f.Close()
   439		if size == 0 {
   440			// File now exists with correct size.
   441			// Only one possible zero-length file, so contents are OK too.
   442			// Early return here makes sure there's a "last byte" for code below.
   443			return nil
   444		}
   445	
   446		// From here on, if any of the I/O writing the file fails,
   447		// we make a best-effort attempt to truncate the file f
   448		// before returning, to avoid leaving bad bytes in the file.
   449	
   450		// Copy file to f, but also into h to double-check hash.
   451		if _, err := file.Seek(0, 0); err != nil {
   452			f.Truncate(0)
   453			return err
   454		}
   455		h := sha256.New()
   456		w := io.MultiWriter(f, h)
   457		if _, err := io.CopyN(w, file, size-1); err != nil {
   458			f.Truncate(0)
   459			return err
   460		}
   461		// Check last byte before writing it; writing it will make the size match
   462		// what other processes expect to find and might cause them to start
   463		// using the file.
   464		buf := make([]byte, 1)
   465		if _, err := file.Read(buf); err != nil {
   466			f.Truncate(0)
   467			return err
   468		}
   469		h.Write(buf)
   470		sum := h.Sum(nil)
   471		if !bytes.Equal(sum, out[:]) {
   472			f.Truncate(0)
   473			return fmt.Errorf("file content changed underfoot")
   474		}
   475	
   476		// Commit cache file entry.
   477		if _, err := f.Write(buf); err != nil {
   478			f.Truncate(0)
   479			return err
   480		}
   481		if err := f.Close(); err != nil {
   482			// Data might not have been written,
   483			// but file may look like it is the right size.
   484			// To be extra careful, remove cached file.
   485			os.Remove(name)
   486			return err
   487		}
   488		os.Chtimes(name, c.now(), c.now()) // mainly for tests
   489	
   490		return nil
   491	}
   492	

View as plain text