Source file src/pkg/cmd/go/internal/tlog/tlog.go

     1	// Copyright 2019 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package tlog implements a tamper-evident log
     6	// used in the Go module go.sum database server.
     7	//
     8	// This package is part of a DRAFT of what the go.sum database server will look like.
     9	// Do not assume the details here are final!
    10	//
    11	// This package follows the design of Certificate Transparency (RFC 6962)
    12	// and its proofs are compatible with that system.
    13	// See TestCertificateTransparency.
    14	//
    15	package tlog
    16	
    17	import (
    18		"crypto/sha256"
    19		"encoding/base64"
    20		"errors"
    21		"fmt"
    22		"math/bits"
    23	)
    24	
    25	// A Hash is a hash identifying a log record or tree root.
    26	type Hash [HashSize]byte
    27	
    28	// HashSize is the size of a Hash in bytes.
    29	const HashSize = 32
    30	
    31	// String returns a base64 representation of the hash for printing.
    32	func (h Hash) String() string {
    33		return base64.StdEncoding.EncodeToString(h[:])
    34	}
    35	
    36	// MarshalJSON marshals the hash as a JSON string containing the base64-encoded hash.
    37	func (h Hash) MarshalJSON() ([]byte, error) {
    38		return []byte(`"` + h.String() + `"`), nil
    39	}
    40	
    41	// UnmarshalJSON unmarshals a hash from JSON string containing the a base64-encoded hash.
    42	func (h *Hash) UnmarshalJSON(data []byte) error {
    43		if len(data) != 1+44+1 || data[0] != '"' || data[len(data)-2] != '=' || data[len(data)-1] != '"' {
    44			return errors.New("cannot decode hash")
    45		}
    46	
    47		// As of Go 1.12, base64.StdEncoding.Decode insists on
    48		// slicing into target[33:] even when it only writes 32 bytes.
    49		// Since we already checked that the hash ends in = above,
    50		// we can use base64.RawStdEncoding with the = removed;
    51		// RawStdEncoding does not exhibit the same bug.
    52		// We decode into a temporary to avoid writing anything to *h
    53		// unless the entire input is well-formed.
    54		var tmp Hash
    55		n, err := base64.RawStdEncoding.Decode(tmp[:], data[1:len(data)-2])
    56		if err != nil || n != HashSize {
    57			return errors.New("cannot decode hash")
    58		}
    59		*h = tmp
    60		return nil
    61	}
    62	
    63	// ParseHash parses the base64-encoded string form of a hash.
    64	func ParseHash(s string) (Hash, error) {
    65		data, err := base64.StdEncoding.DecodeString(s)
    66		if err != nil || len(data) != HashSize {
    67			return Hash{}, fmt.Errorf("malformed hash")
    68		}
    69		var h Hash
    70		copy(h[:], data)
    71		return h, nil
    72	}
    73	
    74	// maxpow2 returns k, the maximum power of 2 smaller than n,
    75	// as well as l = log₂ k (so k = 1<<l).
    76	func maxpow2(n int64) (k int64, l int) {
    77		l = 0
    78		for 1<<uint(l+1) < n {
    79			l++
    80		}
    81		return 1 << uint(l), l
    82	}
    83	
    84	var zeroPrefix = []byte{0x00}
    85	
    86	// RecordHash returns the content hash for the given record data.
    87	func RecordHash(data []byte) Hash {
    88		// SHA256(0x00 || data)
    89		// https://tools.ietf.org/html/rfc6962#section-2.1
    90		h := sha256.New()
    91		h.Write(zeroPrefix)
    92		h.Write(data)
    93		var h1 Hash
    94		h.Sum(h1[:0])
    95		return h1
    96	}
    97	
    98	// NodeHash returns the hash for an interior tree node with the given left and right hashes.
    99	func NodeHash(left, right Hash) Hash {
   100		// SHA256(0x01 || left || right)
   101		// https://tools.ietf.org/html/rfc6962#section-2.1
   102		// We use a stack buffer to assemble the hash input
   103		// to avoid allocating a hash struct with sha256.New.
   104		var buf [1 + HashSize + HashSize]byte
   105		buf[0] = 0x01
   106		copy(buf[1:], left[:])
   107		copy(buf[1+HashSize:], right[:])
   108		return sha256.Sum256(buf[:])
   109	}
   110	
   111	// For information about the stored hash index ordering,
   112	// see section 3.3 of Crosby and Wallach's paper
   113	// "Efficient Data Structures for Tamper-Evident Logging".
   114	// https://www.usenix.org/legacy/event/sec09/tech/full_papers/crosby.pdf
   115	
   116	// StoredHashIndex maps the tree coordinates (level, n)
   117	// to a dense linear ordering that can be used for hash storage.
   118	// Hash storage implementations that store hashes in sequential
   119	// storage can use this function to compute where to read or write
   120	// a given hash.
   121	func StoredHashIndex(level int, n int64) int64 {
   122		// Level L's n'th hash is written right after level L+1's 2n+1'th hash.
   123		// Work our way down to the level 0 ordering.
   124		// We'll add back the orignal level count at the end.
   125		for l := level; l > 0; l-- {
   126			n = 2*n + 1
   127		}
   128	
   129		// Level 0's n'th hash is written at n+n/2+n/4+... (eventually n/2ⁱ hits zero).
   130		i := int64(0)
   131		for ; n > 0; n >>= 1 {
   132			i += n
   133		}
   134	
   135		return i + int64(level)
   136	}
   137	
   138	// SplitStoredHashIndex is the inverse of StoredHashIndex.
   139	// That is, SplitStoredHashIndex(StoredHashIndex(level, n)) == level, n.
   140	func SplitStoredHashIndex(index int64) (level int, n int64) {
   141		// Determine level 0 record before index.
   142		// StoredHashIndex(0, n) < 2*n,
   143		// so the n we want is in [index/2, index/2+log₂(index)].
   144		n = index / 2
   145		indexN := StoredHashIndex(0, n)
   146		if indexN > index {
   147			panic("bad math")
   148		}
   149		for {
   150			// Each new record n adds 1 + trailingZeros(n) hashes.
   151			x := indexN + 1 + int64(bits.TrailingZeros64(uint64(n+1)))
   152			if x > index {
   153				break
   154			}
   155			n++
   156			indexN = x
   157		}
   158		// The hash we want was commited with record n,
   159		// meaning it is one of (0, n), (1, n/2), (2, n/4), ...
   160		level = int(index - indexN)
   161		return level, n >> uint(level)
   162	}
   163	
   164	// StoredHashCount returns the number of stored hashes
   165	// that are expected for a tree with n records.
   166	func StoredHashCount(n int64) int64 {
   167		if n == 0 {
   168			return 0
   169		}
   170		// The tree will have the hashes up to the last leaf hash.
   171		numHash := StoredHashIndex(0, n-1) + 1
   172		// And it will have any hashes for subtrees completed by that leaf.
   173		for i := uint64(n - 1); i&1 != 0; i >>= 1 {
   174			numHash++
   175		}
   176		return numHash
   177	}
   178	
   179	// StoredHashes returns the hashes that must be stored when writing
   180	// record n with the given data. The hashes should be stored starting
   181	// at StoredHashIndex(0, n). The result will have at most 1 + log₂ n hashes,
   182	// but it will average just under two per call for a sequence of calls for n=1..k.
   183	//
   184	// StoredHashes may read up to log n earlier hashes from r
   185	// in order to compute hashes for completed subtrees.
   186	func StoredHashes(n int64, data []byte, r HashReader) ([]Hash, error) {
   187		return StoredHashesForRecordHash(n, RecordHash(data), r)
   188	}
   189	
   190	// StoredHashesForRecordHash is like StoredHashes but takes
   191	// as its second argument RecordHash(data) instead of data itself.
   192	func StoredHashesForRecordHash(n int64, h Hash, r HashReader) ([]Hash, error) {
   193		// Start with the record hash.
   194		hashes := []Hash{h}
   195	
   196		// Build list of indexes needed for hashes for completed subtrees.
   197		// Each trailing 1 bit in the binary representation of n completes a subtree
   198		// and consumes a hash from an adjacent subtree.
   199		m := int(bits.TrailingZeros64(uint64(n + 1)))
   200		indexes := make([]int64, m)
   201		for i := 0; i < m; i++ {
   202			// We arrange indexes in sorted order.
   203			// Note that n>>i is always odd.
   204			indexes[m-1-i] = StoredHashIndex(i, n>>uint(i)-1)
   205		}
   206	
   207		// Fetch hashes.
   208		old, err := r.ReadHashes(indexes)
   209		if err != nil {
   210			return nil, err
   211		}
   212		if len(old) != len(indexes) {
   213			return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(old))
   214		}
   215	
   216		// Build new hashes.
   217		for i := 0; i < m; i++ {
   218			h = NodeHash(old[m-1-i], h)
   219			hashes = append(hashes, h)
   220		}
   221		return hashes, nil
   222	}
   223	
   224	// A HashReader can read hashes for nodes in the log's tree structure.
   225	type HashReader interface {
   226		// ReadHashes returns the hashes with the given stored hash indexes
   227		// (see StoredHashIndex and SplitStoredHashIndex).
   228		// ReadHashes must return a slice of hashes the same length as indexes,
   229		// or else it must return a non-nil error.
   230		// ReadHashes may run faster if indexes is sorted in increasing order.
   231		ReadHashes(indexes []int64) ([]Hash, error)
   232	}
   233	
   234	// A HashReaderFunc is a function implementing HashReader.
   235	type HashReaderFunc func([]int64) ([]Hash, error)
   236	
   237	func (f HashReaderFunc) ReadHashes(indexes []int64) ([]Hash, error) {
   238		return f(indexes)
   239	}
   240	
   241	// TreeHash computes the hash for the root of the tree with n records,
   242	// using the HashReader to obtain previously stored hashes
   243	// (those returned by StoredHashes during the writes of those n records).
   244	// TreeHash makes a single call to ReadHash requesting at most 1 + log₂ n hashes.
   245	// The tree of size zero is defined to have an all-zero Hash.
   246	func TreeHash(n int64, r HashReader) (Hash, error) {
   247		if n == 0 {
   248			return Hash{}, nil
   249		}
   250		indexes := subTreeIndex(0, n, nil)
   251		hashes, err := r.ReadHashes(indexes)
   252		if err != nil {
   253			return Hash{}, err
   254		}
   255		if len(hashes) != len(indexes) {
   256			return Hash{}, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
   257		}
   258		hash, hashes := subTreeHash(0, n, hashes)
   259		if len(hashes) != 0 {
   260			panic("tlog: bad index math in TreeHash")
   261		}
   262		return hash, nil
   263	}
   264	
   265	// subTreeIndex returns the storage indexes needed to compute
   266	// the hash for the subtree containing records [lo, hi),
   267	// appending them to need and returning the result.
   268	// See https://tools.ietf.org/html/rfc6962#section-2.1
   269	func subTreeIndex(lo, hi int64, need []int64) []int64 {
   270		// See subTreeHash below for commentary.
   271		for lo < hi {
   272			k, level := maxpow2(hi - lo + 1)
   273			if lo&(k-1) != 0 {
   274				panic("tlog: bad math in subTreeIndex")
   275			}
   276			need = append(need, StoredHashIndex(level, lo>>uint(level)))
   277			lo += k
   278		}
   279		return need
   280	}
   281	
   282	// subTreeHash computes the hash for the subtree containing records [lo, hi),
   283	// assuming that hashes are the hashes corresponding to the indexes
   284	// returned by subTreeIndex(lo, hi).
   285	// It returns any leftover hashes.
   286	func subTreeHash(lo, hi int64, hashes []Hash) (Hash, []Hash) {
   287		// Repeatedly partition the tree into a left side with 2^level nodes,
   288		// for as large a level as possible, and a right side with the fringe.
   289		// The left hash is stored directly and can be read from storage.
   290		// The right side needs further computation.
   291		numTree := 0
   292		for lo < hi {
   293			k, _ := maxpow2(hi - lo + 1)
   294			if lo&(k-1) != 0 || lo >= hi {
   295				panic("tlog: bad math in subTreeHash")
   296			}
   297			numTree++
   298			lo += k
   299		}
   300	
   301		if len(hashes) < numTree {
   302			panic("tlog: bad index math in subTreeHash")
   303		}
   304	
   305		// Reconstruct hash.
   306		h := hashes[numTree-1]
   307		for i := numTree - 2; i >= 0; i-- {
   308			h = NodeHash(hashes[i], h)
   309		}
   310		return h, hashes[numTree:]
   311	}
   312	
   313	// A RecordProof is a verifiable proof that a particular log root contains a particular record.
   314	// RFC 6962 calls this a “Merkle audit path.”
   315	type RecordProof []Hash
   316	
   317	// ProveRecord returns the proof that the tree of size t contains the record with index n.
   318	func ProveRecord(t, n int64, r HashReader) (RecordProof, error) {
   319		if t < 0 || n < 0 || n >= t {
   320			return nil, fmt.Errorf("tlog: invalid inputs in ProveRecord")
   321		}
   322		indexes := leafProofIndex(0, t, n, nil)
   323		if len(indexes) == 0 {
   324			return RecordProof{}, nil
   325		}
   326		hashes, err := r.ReadHashes(indexes)
   327		if err != nil {
   328			return nil, err
   329		}
   330		if len(hashes) != len(indexes) {
   331			return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
   332		}
   333	
   334		p, hashes := leafProof(0, t, n, hashes)
   335		if len(hashes) != 0 {
   336			panic("tlog: bad index math in ProveRecord")
   337		}
   338		return p, nil
   339	}
   340	
   341	// leafProofIndex builds the list of indexes needed to construct the proof
   342	// that leaf n is contained in the subtree with leaves [lo, hi).
   343	// It appends those indexes to need and returns the result.
   344	// See https://tools.ietf.org/html/rfc6962#section-2.1.1
   345	func leafProofIndex(lo, hi, n int64, need []int64) []int64 {
   346		// See leafProof below for commentary.
   347		if !(lo <= n && n < hi) {
   348			panic("tlog: bad math in leafProofIndex")
   349		}
   350		if lo+1 == hi {
   351			return need
   352		}
   353		if k, _ := maxpow2(hi - lo); n < lo+k {
   354			need = leafProofIndex(lo, lo+k, n, need)
   355			need = subTreeIndex(lo+k, hi, need)
   356		} else {
   357			need = subTreeIndex(lo, lo+k, need)
   358			need = leafProofIndex(lo+k, hi, n, need)
   359		}
   360		return need
   361	}
   362	
   363	// leafProof constructs the proof that leaf n is contained in the subtree with leaves [lo, hi).
   364	// It returns any leftover hashes as well.
   365	// See https://tools.ietf.org/html/rfc6962#section-2.1.1
   366	func leafProof(lo, hi, n int64, hashes []Hash) (RecordProof, []Hash) {
   367		// We must have lo <= n < hi or else the code here has a bug.
   368		if !(lo <= n && n < hi) {
   369			panic("tlog: bad math in leafProof")
   370		}
   371	
   372		if lo+1 == hi { // n == lo
   373			// Reached the leaf node.
   374			// The verifier knows what the leaf hash is, so we don't need to send it.
   375			return RecordProof{}, hashes
   376		}
   377	
   378		// Walk down the tree toward n.
   379		// Record the hash of the path not taken (needed for verifying the proof).
   380		var p RecordProof
   381		var th Hash
   382		if k, _ := maxpow2(hi - lo); n < lo+k {
   383			// n is on left side
   384			p, hashes = leafProof(lo, lo+k, n, hashes)
   385			th, hashes = subTreeHash(lo+k, hi, hashes)
   386		} else {
   387			// n is on right side
   388			th, hashes = subTreeHash(lo, lo+k, hashes)
   389			p, hashes = leafProof(lo+k, hi, n, hashes)
   390		}
   391		return append(p, th), hashes
   392	}
   393	
   394	var errProofFailed = errors.New("invalid transparency proof")
   395	
   396	// CheckRecord verifies that p is a valid proof that the tree of size t
   397	// with hash th has an n'th record with hash h.
   398	func CheckRecord(p RecordProof, t int64, th Hash, n int64, h Hash) error {
   399		if t < 0 || n < 0 || n >= t {
   400			return fmt.Errorf("tlog: invalid inputs in CheckRecord")
   401		}
   402		th2, err := runRecordProof(p, 0, t, n, h)
   403		if err != nil {
   404			return err
   405		}
   406		if th2 == th {
   407			return nil
   408		}
   409		return errProofFailed
   410	}
   411	
   412	// runRecordProof runs the proof p that leaf n is contained in the subtree with leaves [lo, hi).
   413	// Running the proof means constructing and returning the implied hash of that
   414	// subtree.
   415	func runRecordProof(p RecordProof, lo, hi, n int64, leafHash Hash) (Hash, error) {
   416		// We must have lo <= n < hi or else the code here has a bug.
   417		if !(lo <= n && n < hi) {
   418			panic("tlog: bad math in runRecordProof")
   419		}
   420	
   421		if lo+1 == hi { // m == lo
   422			// Reached the leaf node.
   423			// The proof must not have any unnecessary hashes.
   424			if len(p) != 0 {
   425				return Hash{}, errProofFailed
   426			}
   427			return leafHash, nil
   428		}
   429	
   430		if len(p) == 0 {
   431			return Hash{}, errProofFailed
   432		}
   433	
   434		k, _ := maxpow2(hi - lo)
   435		if n < lo+k {
   436			th, err := runRecordProof(p[:len(p)-1], lo, lo+k, n, leafHash)
   437			if err != nil {
   438				return Hash{}, err
   439			}
   440			return NodeHash(th, p[len(p)-1]), nil
   441		} else {
   442			th, err := runRecordProof(p[:len(p)-1], lo+k, hi, n, leafHash)
   443			if err != nil {
   444				return Hash{}, err
   445			}
   446			return NodeHash(p[len(p)-1], th), nil
   447		}
   448	}
   449	
   450	// A TreeProof is a verifiable proof that a particular log tree contains
   451	// as a prefix all records present in an earlier tree.
   452	// RFC 6962 calls this a “Merkle consistency proof.”
   453	type TreeProof []Hash
   454	
   455	// ProveTree returns the proof that the tree of size t contains
   456	// as a prefix all the records from the tree of smaller size n.
   457	func ProveTree(t, n int64, h HashReader) (TreeProof, error) {
   458		if t < 1 || n < 1 || n > t {
   459			return nil, fmt.Errorf("tlog: invalid inputs in ProveTree")
   460		}
   461		indexes := treeProofIndex(0, t, n, nil)
   462		if len(indexes) == 0 {
   463			return TreeProof{}, nil
   464		}
   465		hashes, err := h.ReadHashes(indexes)
   466		if err != nil {
   467			return nil, err
   468		}
   469		if len(hashes) != len(indexes) {
   470			return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
   471		}
   472	
   473		p, hashes := treeProof(0, t, n, hashes)
   474		if len(hashes) != 0 {
   475			panic("tlog: bad index math in ProveTree")
   476		}
   477		return p, nil
   478	}
   479	
   480	// treeProofIndex builds the list of indexes needed to construct
   481	// the sub-proof related to the subtree containing records [lo, hi).
   482	// See https://tools.ietf.org/html/rfc6962#section-2.1.2.
   483	func treeProofIndex(lo, hi, n int64, need []int64) []int64 {
   484		// See treeProof below for commentary.
   485		if !(lo < n && n <= hi) {
   486			panic("tlog: bad math in treeProofIndex")
   487		}
   488	
   489		if n == hi {
   490			if lo == 0 {
   491				return need
   492			}
   493			return subTreeIndex(lo, hi, need)
   494		}
   495	
   496		if k, _ := maxpow2(hi - lo); n <= lo+k {
   497			need = treeProofIndex(lo, lo+k, n, need)
   498			need = subTreeIndex(lo+k, hi, need)
   499		} else {
   500			need = subTreeIndex(lo, lo+k, need)
   501			need = treeProofIndex(lo+k, hi, n, need)
   502		}
   503		return need
   504	}
   505	
   506	// treeProof constructs the sub-proof related to the subtree containing records [lo, hi).
   507	// It returns any leftover hashes as well.
   508	// See https://tools.ietf.org/html/rfc6962#section-2.1.2.
   509	func treeProof(lo, hi, n int64, hashes []Hash) (TreeProof, []Hash) {
   510		// We must have lo < n <= hi or else the code here has a bug.
   511		if !(lo < n && n <= hi) {
   512			panic("tlog: bad math in treeProof")
   513		}
   514	
   515		// Reached common ground.
   516		if n == hi {
   517			if lo == 0 {
   518				// This subtree corresponds exactly to the old tree.
   519				// The verifier knows that hash, so we don't need to send it.
   520				return TreeProof{}, hashes
   521			}
   522			th, hashes := subTreeHash(lo, hi, hashes)
   523			return TreeProof{th}, hashes
   524		}
   525	
   526		// Interior node for the proof.
   527		// Decide whether to walk down the left or right side.
   528		var p TreeProof
   529		var th Hash
   530		if k, _ := maxpow2(hi - lo); n <= lo+k {
   531			// m is on left side
   532			p, hashes = treeProof(lo, lo+k, n, hashes)
   533			th, hashes = subTreeHash(lo+k, hi, hashes)
   534		} else {
   535			// m is on right side
   536			th, hashes = subTreeHash(lo, lo+k, hashes)
   537			p, hashes = treeProof(lo+k, hi, n, hashes)
   538		}
   539		return append(p, th), hashes
   540	}
   541	
   542	// CheckTree verifies that p is a valid proof that the tree of size t with hash th
   543	// contains as a prefix the tree of size n with hash h.
   544	func CheckTree(p TreeProof, t int64, th Hash, n int64, h Hash) error {
   545		if t < 1 || n < 1 || n > t {
   546			return fmt.Errorf("tlog: invalid inputs in CheckTree")
   547		}
   548		h2, th2, err := runTreeProof(p, 0, t, n, h)
   549		if err != nil {
   550			return err
   551		}
   552		if th2 == th && h2 == h {
   553			return nil
   554		}
   555		return errProofFailed
   556	}
   557	
   558	// runTreeProof runs the sub-proof p related to the subtree containing records [lo, hi),
   559	// where old is the hash of the old tree with n records.
   560	// Running the proof means constructing and returning the implied hashes of that
   561	// subtree in both the old and new tree.
   562	func runTreeProof(p TreeProof, lo, hi, n int64, old Hash) (Hash, Hash, error) {
   563		// We must have lo < n <= hi or else the code here has a bug.
   564		if !(lo < n && n <= hi) {
   565			panic("tlog: bad math in runTreeProof")
   566		}
   567	
   568		// Reached common ground.
   569		if n == hi {
   570			if lo == 0 {
   571				if len(p) != 0 {
   572					return Hash{}, Hash{}, errProofFailed
   573				}
   574				return old, old, nil
   575			}
   576			if len(p) != 1 {
   577				return Hash{}, Hash{}, errProofFailed
   578			}
   579			return p[0], p[0], nil
   580		}
   581	
   582		if len(p) == 0 {
   583			return Hash{}, Hash{}, errProofFailed
   584		}
   585	
   586		// Interior node for the proof.
   587		k, _ := maxpow2(hi - lo)
   588		if n <= lo+k {
   589			oh, th, err := runTreeProof(p[:len(p)-1], lo, lo+k, n, old)
   590			if err != nil {
   591				return Hash{}, Hash{}, err
   592			}
   593			return oh, NodeHash(th, p[len(p)-1]), nil
   594		} else {
   595			oh, th, err := runTreeProof(p[:len(p)-1], lo+k, hi, n, old)
   596			if err != nil {
   597				return Hash{}, Hash{}, err
   598			}
   599			return NodeHash(p[len(p)-1], oh), NodeHash(p[len(p)-1], th), nil
   600		}
   601	}
   602
View as plain text