Source file src/pkg/archive/tar/strconv.go

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package tar
     6	
     7	import (
     8		"bytes"
     9		"fmt"
    10		"strconv"
    11		"strings"
    12		"time"
    13	)
    14	
    15	// hasNUL reports whether the NUL character exists within s.
    16	func hasNUL(s string) bool {
    17		return strings.IndexByte(s, 0) >= 0
    18	}
    19	
    20	// isASCII reports whether the input is an ASCII C-style string.
    21	func isASCII(s string) bool {
    22		for _, c := range s {
    23			if c >= 0x80 || c == 0x00 {
    24				return false
    25			}
    26		}
    27		return true
    28	}
    29	
    30	// toASCII converts the input to an ASCII C-style string.
    31	// This a best effort conversion, so invalid characters are dropped.
    32	func toASCII(s string) string {
    33		if isASCII(s) {
    34			return s
    35		}
    36		b := make([]byte, 0, len(s))
    37		for _, c := range s {
    38			if c < 0x80 && c != 0x00 {
    39				b = append(b, byte(c))
    40			}
    41		}
    42		return string(b)
    43	}
    44	
    45	type parser struct {
    46		err error // Last error seen
    47	}
    48	
    49	type formatter struct {
    50		err error // Last error seen
    51	}
    52	
    53	// parseString parses bytes as a NUL-terminated C-style string.
    54	// If a NUL byte is not found then the whole slice is returned as a string.
    55	func (*parser) parseString(b []byte) string {
    56		if i := bytes.IndexByte(b, 0); i >= 0 {
    57			return string(b[:i])
    58		}
    59		return string(b)
    60	}
    61	
    62	// formatString copies s into b, NUL-terminating if possible.
    63	func (f *formatter) formatString(b []byte, s string) {
    64		if len(s) > len(b) {
    65			f.err = ErrFieldTooLong
    66		}
    67		copy(b, s)
    68		if len(s) < len(b) {
    69			b[len(s)] = 0
    70		}
    71	
    72		// Some buggy readers treat regular files with a trailing slash
    73		// in the V7 path field as a directory even though the full path
    74		// recorded elsewhere (e.g., via PAX record) contains no trailing slash.
    75		if len(s) > len(b) && b[len(b)-1] == '/' {
    76			n := len(strings.TrimRight(s[:len(b)], "/"))
    77			b[n] = 0 // Replace trailing slash with NUL terminator
    78		}
    79	}
    80	
    81	// fitsInBase256 reports whether x can be encoded into n bytes using base-256
    82	// encoding. Unlike octal encoding, base-256 encoding does not require that the
    83	// string ends with a NUL character. Thus, all n bytes are available for output.
    84	//
    85	// If operating in binary mode, this assumes strict GNU binary mode; which means
    86	// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
    87	// equivalent to the sign bit in two's complement form.
    88	func fitsInBase256(n int, x int64) bool {
    89		binBits := uint(n-1) * 8
    90		return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
    91	}
    92	
    93	// parseNumeric parses the input as being encoded in either base-256 or octal.
    94	// This function may return negative numbers.
    95	// If parsing fails or an integer overflow occurs, err will be set.
    96	func (p *parser) parseNumeric(b []byte) int64 {
    97		// Check for base-256 (binary) format first.
    98		// If the first bit is set, then all following bits constitute a two's
    99		// complement encoded number in big-endian byte order.
   100		if len(b) > 0 && b[0]&0x80 != 0 {
   101			// Handling negative numbers relies on the following identity:
   102			//	-a-1 == ^a
   103			//
   104			// If the number is negative, we use an inversion mask to invert the
   105			// data bytes and treat the value as an unsigned number.
   106			var inv byte // 0x00 if positive or zero, 0xff if negative
   107			if b[0]&0x40 != 0 {
   108				inv = 0xff
   109			}
   110	
   111			var x uint64
   112			for i, c := range b {
   113				c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
   114				if i == 0 {
   115					c &= 0x7f // Ignore signal bit in first byte
   116				}
   117				if (x >> 56) > 0 {
   118					p.err = ErrHeader // Integer overflow
   119					return 0
   120				}
   121				x = x<<8 | uint64(c)
   122			}
   123			if (x >> 63) > 0 {
   124				p.err = ErrHeader // Integer overflow
   125				return 0
   126			}
   127			if inv == 0xff {
   128				return ^int64(x)
   129			}
   130			return int64(x)
   131		}
   132	
   133		// Normal case is base-8 (octal) format.
   134		return p.parseOctal(b)
   135	}
   136	
   137	// formatNumeric encodes x into b using base-8 (octal) encoding if possible.
   138	// Otherwise it will attempt to use base-256 (binary) encoding.
   139	func (f *formatter) formatNumeric(b []byte, x int64) {
   140		if fitsInOctal(len(b), x) {
   141			f.formatOctal(b, x)
   142			return
   143		}
   144	
   145		if fitsInBase256(len(b), x) {
   146			for i := len(b) - 1; i >= 0; i-- {
   147				b[i] = byte(x)
   148				x >>= 8
   149			}
   150			b[0] |= 0x80 // Highest bit indicates binary format
   151			return
   152		}
   153	
   154		f.formatOctal(b, 0) // Last resort, just write zero
   155		f.err = ErrFieldTooLong
   156	}
   157	
   158	func (p *parser) parseOctal(b []byte) int64 {
   159		// Because unused fields are filled with NULs, we need
   160		// to skip leading NULs. Fields may also be padded with
   161		// spaces or NULs.
   162		// So we remove leading and trailing NULs and spaces to
   163		// be sure.
   164		b = bytes.Trim(b, " \x00")
   165	
   166		if len(b) == 0 {
   167			return 0
   168		}
   169		x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
   170		if perr != nil {
   171			p.err = ErrHeader
   172		}
   173		return int64(x)
   174	}
   175	
   176	func (f *formatter) formatOctal(b []byte, x int64) {
   177		if !fitsInOctal(len(b), x) {
   178			x = 0 // Last resort, just write zero
   179			f.err = ErrFieldTooLong
   180		}
   181	
   182		s := strconv.FormatInt(x, 8)
   183		// Add leading zeros, but leave room for a NUL.
   184		if n := len(b) - len(s) - 1; n > 0 {
   185			s = strings.Repeat("0", n) + s
   186		}
   187		f.formatString(b, s)
   188	}
   189	
   190	// fitsInOctal reports whether the integer x fits in a field n-bytes long
   191	// using octal encoding with the appropriate NUL terminator.
   192	func fitsInOctal(n int, x int64) bool {
   193		octBits := uint(n-1) * 3
   194		return x >= 0 && (n >= 22 || x < 1<<octBits)
   195	}
   196	
   197	// parsePAXTime takes a string of the form %d.%d as described in the PAX
   198	// specification. Note that this implementation allows for negative timestamps,
   199	// which is allowed for by the PAX specification, but not always portable.
   200	func parsePAXTime(s string) (time.Time, error) {
   201		const maxNanoSecondDigits = 9
   202	
   203		// Split string into seconds and sub-seconds parts.
   204		ss, sn := s, ""
   205		if pos := strings.IndexByte(s, '.'); pos >= 0 {
   206			ss, sn = s[:pos], s[pos+1:]
   207		}
   208	
   209		// Parse the seconds.
   210		secs, err := strconv.ParseInt(ss, 10, 64)
   211		if err != nil {
   212			return time.Time{}, ErrHeader
   213		}
   214		if len(sn) == 0 {
   215			return time.Unix(secs, 0), nil // No sub-second values
   216		}
   217	
   218		// Parse the nanoseconds.
   219		if strings.Trim(sn, "0123456789") != "" {
   220			return time.Time{}, ErrHeader
   221		}
   222		if len(sn) < maxNanoSecondDigits {
   223			sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
   224		} else {
   225			sn = sn[:maxNanoSecondDigits] // Right truncate
   226		}
   227		nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
   228		if len(ss) > 0 && ss[0] == '-' {
   229			return time.Unix(secs, -1*nsecs), nil // Negative correction
   230		}
   231		return time.Unix(secs, nsecs), nil
   232	}
   233	
   234	// formatPAXTime converts ts into a time of the form %d.%d as described in the
   235	// PAX specification. This function is capable of negative timestamps.
   236	func formatPAXTime(ts time.Time) (s string) {
   237		secs, nsecs := ts.Unix(), ts.Nanosecond()
   238		if nsecs == 0 {
   239			return strconv.FormatInt(secs, 10)
   240		}
   241	
   242		// If seconds is negative, then perform correction.
   243		sign := ""
   244		if secs < 0 {
   245			sign = "-"             // Remember sign
   246			secs = -(secs + 1)     // Add a second to secs
   247			nsecs = -(nsecs - 1e9) // Take that second away from nsecs
   248		}
   249		return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0")
   250	}
   251	
   252	// parsePAXRecord parses the input PAX record string into a key-value pair.
   253	// If parsing is successful, it will slice off the currently read record and
   254	// return the remainder as r.
   255	func parsePAXRecord(s string) (k, v, r string, err error) {
   256		// The size field ends at the first space.
   257		sp := strings.IndexByte(s, ' ')
   258		if sp == -1 {
   259			return "", "", s, ErrHeader
   260		}
   261	
   262		// Parse the first token as a decimal integer.
   263		n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
   264		if perr != nil || n < 5 || int64(len(s)) < n {
   265			return "", "", s, ErrHeader
   266		}
   267	
   268		// Extract everything between the space and the final newline.
   269		rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
   270		if nl != "\n" {
   271			return "", "", s, ErrHeader
   272		}
   273	
   274		// The first equals separates the key from the value.
   275		eq := strings.IndexByte(rec, '=')
   276		if eq == -1 {
   277			return "", "", s, ErrHeader
   278		}
   279		k, v = rec[:eq], rec[eq+1:]
   280	
   281		if !validPAXRecord(k, v) {
   282			return "", "", s, ErrHeader
   283		}
   284		return k, v, rem, nil
   285	}
   286	
   287	// formatPAXRecord formats a single PAX record, prefixing it with the
   288	// appropriate length.
   289	func formatPAXRecord(k, v string) (string, error) {
   290		if !validPAXRecord(k, v) {
   291			return "", ErrHeader
   292		}
   293	
   294		const padding = 3 // Extra padding for ' ', '=', and '\n'
   295		size := len(k) + len(v) + padding
   296		size += len(strconv.Itoa(size))
   297		record := strconv.Itoa(size) + " " + k + "=" + v + "\n"
   298	
   299		// Final adjustment if adding size field increased the record size.
   300		if len(record) != size {
   301			size = len(record)
   302			record = strconv.Itoa(size) + " " + k + "=" + v + "\n"
   303		}
   304		return record, nil
   305	}
   306	
   307	// validPAXRecord reports whether the key-value pair is valid where each
   308	// record is formatted as:
   309	//	"%d %s=%s\n" % (size, key, value)
   310	//
   311	// Keys and values should be UTF-8, but the number of bad writers out there
   312	// forces us to be a more liberal.
   313	// Thus, we only reject all keys with NUL, and only reject NULs in values
   314	// for the PAX version of the USTAR string fields.
   315	// The key must not contain an '=' character.
   316	func validPAXRecord(k, v string) bool {
   317		if k == "" || strings.IndexByte(k, '=') >= 0 {
   318			return false
   319		}
   320		switch k {
   321		case paxPath, paxLinkpath, paxUname, paxGname:
   322			return !hasNUL(v)
   323		default:
   324			return !hasNUL(k)
   325		}
   326	}
   327
View as plain text