...

Source file src/pkg/archive/tar/format.go

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package tar
     6	
     7	import "strings"
     8	
     9	// Format represents the tar archive format.
    10	//
    11	// The original tar format was introduced in Unix V7.
    12	// Since then, there have been multiple competing formats attempting to
    13	// standardize or extend the V7 format to overcome its limitations.
    14	// The most common formats are the USTAR, PAX, and GNU formats,
    15	// each with their own advantages and limitations.
    16	//
    17	// The following table captures the capabilities of each format:
    18	//
    19	//	                  |  USTAR |       PAX |       GNU
    20	//	------------------+--------+-----------+----------
    21	//	Name              |   256B | unlimited | unlimited
    22	//	Linkname          |   100B | unlimited | unlimited
    23	//	Size              | uint33 | unlimited |    uint89
    24	//	Mode              | uint21 |    uint21 |    uint57
    25	//	Uid/Gid           | uint21 | unlimited |    uint57
    26	//	Uname/Gname       |    32B | unlimited |       32B
    27	//	ModTime           | uint33 | unlimited |     int89
    28	//	AccessTime        |    n/a | unlimited |     int89
    29	//	ChangeTime        |    n/a | unlimited |     int89
    30	//	Devmajor/Devminor | uint21 |    uint21 |    uint57
    31	//	------------------+--------+-----------+----------
    32	//	string encoding   |  ASCII |     UTF-8 |    binary
    33	//	sub-second times  |     no |       yes |        no
    34	//	sparse files      |     no |       yes |       yes
    35	//
    36	// The table's upper portion shows the Header fields, where each format reports
    37	// the maximum number of bytes allowed for each string field and
    38	// the integer type used to store each numeric field
    39	// (where timestamps are stored as the number of seconds since the Unix epoch).
    40	//
    41	// The table's lower portion shows specialized features of each format,
    42	// such as supported string encodings, support for sub-second timestamps,
    43	// or support for sparse files.
    44	//
    45	// The Writer currently provides no support for sparse files.
    46	type Format int
    47	
    48	// Constants to identify various tar formats.
    49	const (
    50		// Deliberately hide the meaning of constants from public API.
    51		_ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc...
    52	
    53		// FormatUnknown indicates that the format is unknown.
    54		FormatUnknown
    55	
    56		// The format of the original Unix V7 tar tool prior to standardization.
    57		formatV7
    58	
    59		// FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
    60		//
    61		// While this format is compatible with most tar readers,
    62		// the format has several limitations making it unsuitable for some usages.
    63		// Most notably, it cannot support sparse files, files larger than 8GiB,
    64		// filenames larger than 256 characters, and non-ASCII filenames.
    65		//
    66		// Reference:
    67		//	http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
    68		FormatUSTAR
    69	
    70		// FormatPAX represents the PAX header format defined in POSIX.1-2001.
    71		//
    72		// PAX extends USTAR by writing a special file with Typeflag TypeXHeader
    73		// preceding the original header. This file contains a set of key-value
    74		// records, which are used to overcome USTAR's shortcomings, in addition to
    75		// providing the ability to have sub-second resolution for timestamps.
    76		//
    77		// Some newer formats add their own extensions to PAX by defining their
    78		// own keys and assigning certain semantic meaning to the associated values.
    79		// For example, sparse file support in PAX is implemented using keys
    80		// defined by the GNU manual (e.g., "GNU.sparse.map").
    81		//
    82		// Reference:
    83		//	http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
    84		FormatPAX
    85	
    86		// FormatGNU represents the GNU header format.
    87		//
    88		// The GNU header format is older than the USTAR and PAX standards and
    89		// is not compatible with them. The GNU format supports
    90		// arbitrary file sizes, filenames of arbitrary encoding and length,
    91		// sparse files, and other features.
    92		//
    93		// It is recommended that PAX be chosen over GNU unless the target
    94		// application can only parse GNU formatted archives.
    95		//
    96		// Reference:
    97		//	https://www.gnu.org/software/tar/manual/html_node/Standard.html
    98		FormatGNU
    99	
   100		// Schily's tar format, which is incompatible with USTAR.
   101		// This does not cover STAR extensions to the PAX format; these fall under
   102		// the PAX format.
   103		formatSTAR
   104	
   105		formatMax
   106	)
   107	
   108	func (f Format) has(f2 Format) bool   { return f&f2 != 0 }
   109	func (f *Format) mayBe(f2 Format)     { *f |= f2 }
   110	func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 }
   111	func (f *Format) mustNotBe(f2 Format) { *f &^= f2 }
   112	
   113	var formatNames = map[Format]string{
   114		formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR",
   115	}
   116	
   117	func (f Format) String() string {
   118		var ss []string
   119		for f2 := Format(1); f2 < formatMax; f2 <<= 1 {
   120			if f.has(f2) {
   121				ss = append(ss, formatNames[f2])
   122			}
   123		}
   124		switch len(ss) {
   125		case 0:
   126			return "<unknown>"
   127		case 1:
   128			return ss[0]
   129		default:
   130			return "(" + strings.Join(ss, " | ") + ")"
   131		}
   132	}
   133	
   134	// Magics used to identify various formats.
   135	const (
   136		magicGNU, versionGNU     = "ustar ", " \x00"
   137		magicUSTAR, versionUSTAR = "ustar\x00", "00"
   138		trailerSTAR              = "tar\x00"
   139	)
   140	
   141	// Size constants from various tar specifications.
   142	const (
   143		blockSize  = 512 // Size of each block in a tar stream
   144		nameSize   = 100 // Max length of the name field in USTAR format
   145		prefixSize = 155 // Max length of the prefix field in USTAR format
   146	)
   147	
   148	// blockPadding computes the number of bytes needed to pad offset up to the
   149	// nearest block edge where 0 <= n < blockSize.
   150	func blockPadding(offset int64) (n int64) {
   151		return -offset & (blockSize - 1)
   152	}
   153	
   154	var zeroBlock block
   155	
   156	type block [blockSize]byte
   157	
   158	// Convert block to any number of formats.
   159	func (b *block) V7() *headerV7       { return (*headerV7)(b) }
   160	func (b *block) GNU() *headerGNU     { return (*headerGNU)(b) }
   161	func (b *block) STAR() *headerSTAR   { return (*headerSTAR)(b) }
   162	func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
   163	func (b *block) Sparse() sparseArray { return sparseArray(b[:]) }
   164	
   165	// GetFormat checks that the block is a valid tar header based on the checksum.
   166	// It then attempts to guess the specific format based on magic values.
   167	// If the checksum fails, then FormatUnknown is returned.
   168	func (b *block) GetFormat() Format {
   169		// Verify checksum.
   170		var p parser
   171		value := p.parseOctal(b.V7().Chksum())
   172		chksum1, chksum2 := b.ComputeChecksum()
   173		if p.err != nil || (value != chksum1 && value != chksum2) {
   174			return FormatUnknown
   175		}
   176	
   177		// Guess the magic values.
   178		magic := string(b.USTAR().Magic())
   179		version := string(b.USTAR().Version())
   180		trailer := string(b.STAR().Trailer())
   181		switch {
   182		case magic == magicUSTAR && trailer == trailerSTAR:
   183			return formatSTAR
   184		case magic == magicUSTAR:
   185			return FormatUSTAR | FormatPAX
   186		case magic == magicGNU && version == versionGNU:
   187			return FormatGNU
   188		default:
   189			return formatV7
   190		}
   191	}
   192	
   193	// SetFormat writes the magic values necessary for specified format
   194	// and then updates the checksum accordingly.
   195	func (b *block) SetFormat(format Format) {
   196		// Set the magic values.
   197		switch {
   198		case format.has(formatV7):
   199			// Do nothing.
   200		case format.has(FormatGNU):
   201			copy(b.GNU().Magic(), magicGNU)
   202			copy(b.GNU().Version(), versionGNU)
   203		case format.has(formatSTAR):
   204			copy(b.STAR().Magic(), magicUSTAR)
   205			copy(b.STAR().Version(), versionUSTAR)
   206			copy(b.STAR().Trailer(), trailerSTAR)
   207		case format.has(FormatUSTAR | FormatPAX):
   208			copy(b.USTAR().Magic(), magicUSTAR)
   209			copy(b.USTAR().Version(), versionUSTAR)
   210		default:
   211			panic("invalid format")
   212		}
   213	
   214		// Update checksum.
   215		// This field is special in that it is terminated by a NULL then space.
   216		var f formatter
   217		field := b.V7().Chksum()
   218		chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
   219		f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
   220		field[7] = ' '
   221	}
   222	
   223	// ComputeChecksum computes the checksum for the header block.
   224	// POSIX specifies a sum of the unsigned byte values, but the Sun tar used
   225	// signed byte values.
   226	// We compute and return both.
   227	func (b *block) ComputeChecksum() (unsigned, signed int64) {
   228		for i, c := range b {
   229			if 148 <= i && i < 156 {
   230				c = ' ' // Treat the checksum field itself as all spaces.
   231			}
   232			unsigned += int64(c)
   233			signed += int64(int8(c))
   234		}
   235		return unsigned, signed
   236	}
   237	
   238	// Reset clears the block with all zeros.
   239	func (b *block) Reset() {
   240		*b = block{}
   241	}
   242	
   243	type headerV7 [blockSize]byte
   244	
   245	func (h *headerV7) Name() []byte     { return h[000:][:100] }
   246	func (h *headerV7) Mode() []byte     { return h[100:][:8] }
   247	func (h *headerV7) UID() []byte      { return h[108:][:8] }
   248	func (h *headerV7) GID() []byte      { return h[116:][:8] }
   249	func (h *headerV7) Size() []byte     { return h[124:][:12] }
   250	func (h *headerV7) ModTime() []byte  { return h[136:][:12] }
   251	func (h *headerV7) Chksum() []byte   { return h[148:][:8] }
   252	func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
   253	func (h *headerV7) LinkName() []byte { return h[157:][:100] }
   254	
   255	type headerGNU [blockSize]byte
   256	
   257	func (h *headerGNU) V7() *headerV7       { return (*headerV7)(h) }
   258	func (h *headerGNU) Magic() []byte       { return h[257:][:6] }
   259	func (h *headerGNU) Version() []byte     { return h[263:][:2] }
   260	func (h *headerGNU) UserName() []byte    { return h[265:][:32] }
   261	func (h *headerGNU) GroupName() []byte   { return h[297:][:32] }
   262	func (h *headerGNU) DevMajor() []byte    { return h[329:][:8] }
   263	func (h *headerGNU) DevMinor() []byte    { return h[337:][:8] }
   264	func (h *headerGNU) AccessTime() []byte  { return h[345:][:12] }
   265	func (h *headerGNU) ChangeTime() []byte  { return h[357:][:12] }
   266	func (h *headerGNU) Sparse() sparseArray { return sparseArray(h[386:][:24*4+1]) }
   267	func (h *headerGNU) RealSize() []byte    { return h[483:][:12] }
   268	
   269	type headerSTAR [blockSize]byte
   270	
   271	func (h *headerSTAR) V7() *headerV7      { return (*headerV7)(h) }
   272	func (h *headerSTAR) Magic() []byte      { return h[257:][:6] }
   273	func (h *headerSTAR) Version() []byte    { return h[263:][:2] }
   274	func (h *headerSTAR) UserName() []byte   { return h[265:][:32] }
   275	func (h *headerSTAR) GroupName() []byte  { return h[297:][:32] }
   276	func (h *headerSTAR) DevMajor() []byte   { return h[329:][:8] }
   277	func (h *headerSTAR) DevMinor() []byte   { return h[337:][:8] }
   278	func (h *headerSTAR) Prefix() []byte     { return h[345:][:131] }
   279	func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
   280	func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
   281	func (h *headerSTAR) Trailer() []byte    { return h[508:][:4] }
   282	
   283	type headerUSTAR [blockSize]byte
   284	
   285	func (h *headerUSTAR) V7() *headerV7     { return (*headerV7)(h) }
   286	func (h *headerUSTAR) Magic() []byte     { return h[257:][:6] }
   287	func (h *headerUSTAR) Version() []byte   { return h[263:][:2] }
   288	func (h *headerUSTAR) UserName() []byte  { return h[265:][:32] }
   289	func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
   290	func (h *headerUSTAR) DevMajor() []byte  { return h[329:][:8] }
   291	func (h *headerUSTAR) DevMinor() []byte  { return h[337:][:8] }
   292	func (h *headerUSTAR) Prefix() []byte    { return h[345:][:155] }
   293	
   294	type sparseArray []byte
   295	
   296	func (s sparseArray) Entry(i int) sparseElem { return sparseElem(s[i*24:]) }
   297	func (s sparseArray) IsExtended() []byte     { return s[24*s.MaxEntries():][:1] }
   298	func (s sparseArray) MaxEntries() int        { return len(s) / 24 }
   299	
   300	type sparseElem []byte
   301	
   302	func (s sparseElem) Offset() []byte { return s[00:][:12] }
   303	func (s sparseElem) Length() []byte { return s[12:][:12] }
   304	

View as plain text