...

Source file src/strconv/atoi.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package strconv
     6	
     7	import "errors"
     8	
     9	// lower(c) is a lower-case letter if and only if
    10	// c is either that lower-case letter or the equivalent upper-case letter.
    11	// Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
    12	// Note that lower of non-letters can produce other non-letters.
    13	func lower(c byte) byte {
    14		return c | ('x' - 'X')
    15	}
    16	
    17	// ErrRange indicates that a value is out of range for the target type.
    18	var ErrRange = errors.New("value out of range")
    19	
    20	// ErrSyntax indicates that a value does not have the right syntax for the target type.
    21	var ErrSyntax = errors.New("invalid syntax")
    22	
    23	// A NumError records a failed conversion.
    24	type NumError struct {
    25		Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat)
    26		Num  string // the input
    27		Err  error  // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.)
    28	}
    29	
    30	func (e *NumError) Error() string {
    31		return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error()
    32	}
    33	
    34	func syntaxError(fn, str string) *NumError {
    35		return &NumError{fn, str, ErrSyntax}
    36	}
    37	
    38	func rangeError(fn, str string) *NumError {
    39		return &NumError{fn, str, ErrRange}
    40	}
    41	
    42	func baseError(fn, str string, base int) *NumError {
    43		return &NumError{fn, str, errors.New("invalid base " + Itoa(base))}
    44	}
    45	
    46	func bitSizeError(fn, str string, bitSize int) *NumError {
    47		return &NumError{fn, str, errors.New("invalid bit size " + Itoa(bitSize))}
    48	}
    49	
    50	const intSize = 32 << (^uint(0) >> 63)
    51	
    52	// IntSize is the size in bits of an int or uint value.
    53	const IntSize = intSize
    54	
    55	const maxUint64 = 1<<64 - 1
    56	
    57	// ParseUint is like ParseInt but for unsigned numbers.
    58	func ParseUint(s string, base int, bitSize int) (uint64, error) {
    59		const fnParseUint = "ParseUint"
    60	
    61		if s == "" || !underscoreOK(s) {
    62			return 0, syntaxError(fnParseUint, s)
    63		}
    64	
    65		base0 := base == 0
    66	
    67		s0 := s
    68		switch {
    69		case 2 <= base && base <= 36:
    70			// valid base; nothing to do
    71	
    72		case base == 0:
    73			// Look for octal, hex prefix.
    74			base = 10
    75			if s[0] == '0' {
    76				switch {
    77				case len(s) >= 3 && lower(s[1]) == 'b':
    78					base = 2
    79					s = s[2:]
    80				case len(s) >= 3 && lower(s[1]) == 'o':
    81					base = 8
    82					s = s[2:]
    83				case len(s) >= 3 && lower(s[1]) == 'x':
    84					base = 16
    85					s = s[2:]
    86				default:
    87					base = 8
    88					s = s[1:]
    89				}
    90			}
    91	
    92		default:
    93			return 0, baseError(fnParseUint, s0, base)
    94		}
    95	
    96		if bitSize == 0 {
    97			bitSize = int(IntSize)
    98		} else if bitSize < 0 || bitSize > 64 {
    99			return 0, bitSizeError(fnParseUint, s0, bitSize)
   100		}
   101	
   102		// Cutoff is the smallest number such that cutoff*base > maxUint64.
   103		// Use compile-time constants for common cases.
   104		var cutoff uint64
   105		switch base {
   106		case 10:
   107			cutoff = maxUint64/10 + 1
   108		case 16:
   109			cutoff = maxUint64/16 + 1
   110		default:
   111			cutoff = maxUint64/uint64(base) + 1
   112		}
   113	
   114		maxVal := uint64(1)<<uint(bitSize) - 1
   115	
   116		var n uint64
   117		for _, c := range []byte(s) {
   118			var d byte
   119			switch {
   120			case c == '_' && base0:
   121				// underscoreOK already called
   122				continue
   123			case '0' <= c && c <= '9':
   124				d = c - '0'
   125			case 'a' <= lower(c) && lower(c) <= 'z':
   126				d = lower(c) - 'a' + 10
   127			default:
   128				return 0, syntaxError(fnParseUint, s0)
   129			}
   130	
   131			if d >= byte(base) {
   132				return 0, syntaxError(fnParseUint, s0)
   133			}
   134	
   135			if n >= cutoff {
   136				// n*base overflows
   137				return maxVal, rangeError(fnParseUint, s0)
   138			}
   139			n *= uint64(base)
   140	
   141			n1 := n + uint64(d)
   142			if n1 < n || n1 > maxVal {
   143				// n+v overflows
   144				return maxVal, rangeError(fnParseUint, s0)
   145			}
   146			n = n1
   147		}
   148	
   149		return n, nil
   150	}
   151	
   152	// ParseInt interprets a string s in the given base (0, 2 to 36) and
   153	// bit size (0 to 64) and returns the corresponding value i.
   154	//
   155	// If base == 0, the base is implied by the string's prefix:
   156	// base 2 for "0b", base 8 for "0" or "0o", base 16 for "0x",
   157	// and base 10 otherwise. Also, for base == 0 only, underscore
   158	// characters are permitted per the Go integer literal syntax.
   159	// If base is below 0, is 1, or is above 36, an error is returned.
   160	//
   161	// The bitSize argument specifies the integer type
   162	// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
   163	// correspond to int, int8, int16, int32, and int64.
   164	// If bitSize is below 0 or above 64, an error is returned.
   165	//
   166	// The errors that ParseInt returns have concrete type *NumError
   167	// and include err.Num = s. If s is empty or contains invalid
   168	// digits, err.Err = ErrSyntax and the returned value is 0;
   169	// if the value corresponding to s cannot be represented by a
   170	// signed integer of the given size, err.Err = ErrRange and the
   171	// returned value is the maximum magnitude integer of the
   172	// appropriate bitSize and sign.
   173	func ParseInt(s string, base int, bitSize int) (i int64, err error) {
   174		const fnParseInt = "ParseInt"
   175	
   176		if s == "" {
   177			return 0, syntaxError(fnParseInt, s)
   178		}
   179	
   180		// Pick off leading sign.
   181		s0 := s
   182		neg := false
   183		if s[0] == '+' {
   184			s = s[1:]
   185		} else if s[0] == '-' {
   186			neg = true
   187			s = s[1:]
   188		}
   189	
   190		// Convert unsigned and check range.
   191		var un uint64
   192		un, err = ParseUint(s, base, bitSize)
   193		if err != nil && err.(*NumError).Err != ErrRange {
   194			err.(*NumError).Func = fnParseInt
   195			err.(*NumError).Num = s0
   196			return 0, err
   197		}
   198	
   199		if bitSize == 0 {
   200			bitSize = int(IntSize)
   201		}
   202	
   203		cutoff := uint64(1 << uint(bitSize-1))
   204		if !neg && un >= cutoff {
   205			return int64(cutoff - 1), rangeError(fnParseInt, s0)
   206		}
   207		if neg && un > cutoff {
   208			return -int64(cutoff), rangeError(fnParseInt, s0)
   209		}
   210		n := int64(un)
   211		if neg {
   212			n = -n
   213		}
   214		return n, nil
   215	}
   216	
   217	// Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
   218	func Atoi(s string) (int, error) {
   219		const fnAtoi = "Atoi"
   220	
   221		sLen := len(s)
   222		if intSize == 32 && (0 < sLen && sLen < 10) ||
   223			intSize == 64 && (0 < sLen && sLen < 19) {
   224			// Fast path for small integers that fit int type.
   225			s0 := s
   226			if s[0] == '-' || s[0] == '+' {
   227				s = s[1:]
   228				if len(s) < 1 {
   229					return 0, &NumError{fnAtoi, s0, ErrSyntax}
   230				}
   231			}
   232	
   233			n := 0
   234			for _, ch := range []byte(s) {
   235				ch -= '0'
   236				if ch > 9 {
   237					return 0, &NumError{fnAtoi, s0, ErrSyntax}
   238				}
   239				n = n*10 + int(ch)
   240			}
   241			if s0[0] == '-' {
   242				n = -n
   243			}
   244			return n, nil
   245		}
   246	
   247		// Slow path for invalid, big, or underscored integers.
   248		i64, err := ParseInt(s, 10, 0)
   249		if nerr, ok := err.(*NumError); ok {
   250			nerr.Func = fnAtoi
   251		}
   252		return int(i64), err
   253	}
   254	
   255	// underscoreOK reports whether the underscores in s are allowed.
   256	// Checking them in this one function lets all the parsers skip over them simply.
   257	// Underscore must appear only between digits or between a base prefix and a digit.
   258	func underscoreOK(s string) bool {
   259		// saw tracks the last character (class) we saw:
   260		// ^ for beginning of number,
   261		// 0 for a digit or base prefix,
   262		// _ for an underscore,
   263		// ! for none of the above.
   264		saw := '^'
   265		i := 0
   266	
   267		// Optional sign.
   268		if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
   269			s = s[1:]
   270		}
   271	
   272		// Optional base prefix.
   273		hex := false
   274		if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
   275			i = 2
   276			saw = '0' // base prefix counts as a digit for "underscore as digit separator"
   277			hex = lower(s[1]) == 'x'
   278		}
   279	
   280		// Number proper.
   281		for ; i < len(s); i++ {
   282			// Digits are always okay.
   283			if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
   284				saw = '0'
   285				continue
   286			}
   287			// Underscore must follow digit.
   288			if s[i] == '_' {
   289				if saw != '0' {
   290					return false
   291				}
   292				saw = '_'
   293				continue
   294			}
   295			// Underscore must also be followed by digit.
   296			if saw == '_' {
   297				return false
   298			}
   299			// Saw non-digit, non-underscore.
   300			saw = '!'
   301		}
   302		return saw != '_'
   303	}
   304	

View as plain text