...

Source file src/strconv/atof.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package strconv
     6	
     7	// decimal to binary floating point conversion.
     8	// Algorithm:
     9	//   1) Store input in multiprecision decimal.
    10	//   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
    11	//   3) Multiply by 2^precision and round to get mantissa.
    12	
    13	import "math"
    14	
    15	var optimize = true // set to false to force slow-path conversions for testing
    16	
    17	func equalIgnoreCase(s1, s2 string) bool {
    18		if len(s1) != len(s2) {
    19			return false
    20		}
    21		for i := 0; i < len(s1); i++ {
    22			c1 := s1[i]
    23			if 'A' <= c1 && c1 <= 'Z' {
    24				c1 += 'a' - 'A'
    25			}
    26			c2 := s2[i]
    27			if 'A' <= c2 && c2 <= 'Z' {
    28				c2 += 'a' - 'A'
    29			}
    30			if c1 != c2 {
    31				return false
    32			}
    33		}
    34		return true
    35	}
    36	
    37	func special(s string) (f float64, ok bool) {
    38		if len(s) == 0 {
    39			return
    40		}
    41		switch s[0] {
    42		default:
    43			return
    44		case '+':
    45			if equalIgnoreCase(s, "+inf") || equalIgnoreCase(s, "+infinity") {
    46				return math.Inf(1), true
    47			}
    48		case '-':
    49			if equalIgnoreCase(s, "-inf") || equalIgnoreCase(s, "-infinity") {
    50				return math.Inf(-1), true
    51			}
    52		case 'n', 'N':
    53			if equalIgnoreCase(s, "nan") {
    54				return math.NaN(), true
    55			}
    56		case 'i', 'I':
    57			if equalIgnoreCase(s, "inf") || equalIgnoreCase(s, "infinity") {
    58				return math.Inf(1), true
    59			}
    60		}
    61		return
    62	}
    63	
    64	func (b *decimal) set(s string) (ok bool) {
    65		i := 0
    66		b.neg = false
    67		b.trunc = false
    68	
    69		// optional sign
    70		if i >= len(s) {
    71			return
    72		}
    73		switch {
    74		case s[i] == '+':
    75			i++
    76		case s[i] == '-':
    77			b.neg = true
    78			i++
    79		}
    80	
    81		// digits
    82		sawdot := false
    83		sawdigits := false
    84		for ; i < len(s); i++ {
    85			switch {
    86			case s[i] == '_':
    87				// underscoreOK already called
    88				continue
    89			case s[i] == '.':
    90				if sawdot {
    91					return
    92				}
    93				sawdot = true
    94				b.dp = b.nd
    95				continue
    96	
    97			case '0' <= s[i] && s[i] <= '9':
    98				sawdigits = true
    99				if s[i] == '0' && b.nd == 0 { // ignore leading zeros
   100					b.dp--
   101					continue
   102				}
   103				if b.nd < len(b.d) {
   104					b.d[b.nd] = s[i]
   105					b.nd++
   106				} else if s[i] != '0' {
   107					b.trunc = true
   108				}
   109				continue
   110			}
   111			break
   112		}
   113		if !sawdigits {
   114			return
   115		}
   116		if !sawdot {
   117			b.dp = b.nd
   118		}
   119	
   120		// optional exponent moves decimal point.
   121		// if we read a very large, very long number,
   122		// just be sure to move the decimal point by
   123		// a lot (say, 100000).  it doesn't matter if it's
   124		// not the exact number.
   125		if i < len(s) && lower(s[i]) == 'e' {
   126			i++
   127			if i >= len(s) {
   128				return
   129			}
   130			esign := 1
   131			if s[i] == '+' {
   132				i++
   133			} else if s[i] == '-' {
   134				i++
   135				esign = -1
   136			}
   137			if i >= len(s) || s[i] < '0' || s[i] > '9' {
   138				return
   139			}
   140			e := 0
   141			for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   142				if s[i] == '_' {
   143					// underscoreOK already called
   144					continue
   145				}
   146				if e < 10000 {
   147					e = e*10 + int(s[i]) - '0'
   148				}
   149			}
   150			b.dp += e * esign
   151		}
   152	
   153		if i != len(s) {
   154			return
   155		}
   156	
   157		ok = true
   158		return
   159	}
   160	
   161	// readFloat reads a decimal mantissa and exponent from a float
   162	// string representation. It returns ok==false if the number could
   163	// not fit return types or is invalid.
   164	func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) {
   165		i := 0
   166	
   167		// optional sign
   168		if i >= len(s) {
   169			return
   170		}
   171		switch {
   172		case s[i] == '+':
   173			i++
   174		case s[i] == '-':
   175			neg = true
   176			i++
   177		}
   178	
   179		// digits
   180		base := uint64(10)
   181		maxMantDigits := 19 // 10^19 fits in uint64
   182		expChar := byte('e')
   183		if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
   184			base = 16
   185			maxMantDigits = 16 // 16^16 fits in uint64
   186			i += 2
   187			expChar = 'p'
   188			hex = true
   189		}
   190		sawdot := false
   191		sawdigits := false
   192		nd := 0
   193		ndMant := 0
   194		dp := 0
   195		for ; i < len(s); i++ {
   196			switch c := s[i]; true {
   197			case c == '_':
   198				// underscoreOK already called
   199				continue
   200	
   201			case c == '.':
   202				if sawdot {
   203					return
   204				}
   205				sawdot = true
   206				dp = nd
   207				continue
   208	
   209			case '0' <= c && c <= '9':
   210				sawdigits = true
   211				if c == '0' && nd == 0 { // ignore leading zeros
   212					dp--
   213					continue
   214				}
   215				nd++
   216				if ndMant < maxMantDigits {
   217					mantissa *= base
   218					mantissa += uint64(c - '0')
   219					ndMant++
   220				} else if c != '0' {
   221					trunc = true
   222				}
   223				continue
   224	
   225			case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
   226				sawdigits = true
   227				nd++
   228				if ndMant < maxMantDigits {
   229					mantissa *= 16
   230					mantissa += uint64(lower(c) - 'a' + 10)
   231					ndMant++
   232				} else {
   233					trunc = true
   234				}
   235				continue
   236			}
   237			break
   238		}
   239		if !sawdigits {
   240			return
   241		}
   242		if !sawdot {
   243			dp = nd
   244		}
   245	
   246		if base == 16 {
   247			dp *= 4
   248			ndMant *= 4
   249		}
   250	
   251		// optional exponent moves decimal point.
   252		// if we read a very large, very long number,
   253		// just be sure to move the decimal point by
   254		// a lot (say, 100000).  it doesn't matter if it's
   255		// not the exact number.
   256		if i < len(s) && lower(s[i]) == expChar {
   257			i++
   258			if i >= len(s) {
   259				return
   260			}
   261			esign := 1
   262			if s[i] == '+' {
   263				i++
   264			} else if s[i] == '-' {
   265				i++
   266				esign = -1
   267			}
   268			if i >= len(s) || s[i] < '0' || s[i] > '9' {
   269				return
   270			}
   271			e := 0
   272			for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   273				if s[i] == '_' {
   274					// underscoreOK already called
   275					continue
   276				}
   277				if e < 10000 {
   278					e = e*10 + int(s[i]) - '0'
   279				}
   280			}
   281			dp += e * esign
   282		} else if base == 16 {
   283			// Must have exponent.
   284			return
   285		}
   286	
   287		if i != len(s) {
   288			return
   289		}
   290	
   291		if mantissa != 0 {
   292			exp = dp - ndMant
   293		}
   294		ok = true
   295		return
   296	}
   297	
   298	// decimal power of ten to binary power of two.
   299	var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
   300	
   301	func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
   302		var exp int
   303		var mant uint64
   304	
   305		// Zero is always a special case.
   306		if d.nd == 0 {
   307			mant = 0
   308			exp = flt.bias
   309			goto out
   310		}
   311	
   312		// Obvious overflow/underflow.
   313		// These bounds are for 64-bit floats.
   314		// Will have to change if we want to support 80-bit floats in the future.
   315		if d.dp > 310 {
   316			goto overflow
   317		}
   318		if d.dp < -330 {
   319			// zero
   320			mant = 0
   321			exp = flt.bias
   322			goto out
   323		}
   324	
   325		// Scale by powers of two until in range [0.5, 1.0)
   326		exp = 0
   327		for d.dp > 0 {
   328			var n int
   329			if d.dp >= len(powtab) {
   330				n = 27
   331			} else {
   332				n = powtab[d.dp]
   333			}
   334			d.Shift(-n)
   335			exp += n
   336		}
   337		for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
   338			var n int
   339			if -d.dp >= len(powtab) {
   340				n = 27
   341			} else {
   342				n = powtab[-d.dp]
   343			}
   344			d.Shift(n)
   345			exp -= n
   346		}
   347	
   348		// Our range is [0.5,1) but floating point range is [1,2).
   349		exp--
   350	
   351		// Minimum representable exponent is flt.bias+1.
   352		// If the exponent is smaller, move it up and
   353		// adjust d accordingly.
   354		if exp < flt.bias+1 {
   355			n := flt.bias + 1 - exp
   356			d.Shift(-n)
   357			exp += n
   358		}
   359	
   360		if exp-flt.bias >= 1<<flt.expbits-1 {
   361			goto overflow
   362		}
   363	
   364		// Extract 1+flt.mantbits bits.
   365		d.Shift(int(1 + flt.mantbits))
   366		mant = d.RoundedInteger()
   367	
   368		// Rounding might have added a bit; shift down.
   369		if mant == 2<<flt.mantbits {
   370			mant >>= 1
   371			exp++
   372			if exp-flt.bias >= 1<<flt.expbits-1 {
   373				goto overflow
   374			}
   375		}
   376	
   377		// Denormalized?
   378		if mant&(1<<flt.mantbits) == 0 {
   379			exp = flt.bias
   380		}
   381		goto out
   382	
   383	overflow:
   384		// ±Inf
   385		mant = 0
   386		exp = 1<<flt.expbits - 1 + flt.bias
   387		overflow = true
   388	
   389	out:
   390		// Assemble bits.
   391		bits := mant & (uint64(1)<<flt.mantbits - 1)
   392		bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   393		if d.neg {
   394			bits |= 1 << flt.mantbits << flt.expbits
   395		}
   396		return bits, overflow
   397	}
   398	
   399	// Exact powers of 10.
   400	var float64pow10 = []float64{
   401		1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   402		1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   403		1e20, 1e21, 1e22,
   404	}
   405	var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
   406	
   407	// If possible to convert decimal representation to 64-bit float f exactly,
   408	// entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
   409	// Three common cases:
   410	//	value is exact integer
   411	//	value is exact integer * exact power of ten
   412	//	value is exact integer / exact power of ten
   413	// These all produce potentially inexact but correctly rounded answers.
   414	func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
   415		if mantissa>>float64info.mantbits != 0 {
   416			return
   417		}
   418		f = float64(mantissa)
   419		if neg {
   420			f = -f
   421		}
   422		switch {
   423		case exp == 0:
   424			// an integer.
   425			return f, true
   426		// Exact integers are <= 10^15.
   427		// Exact powers of ten are <= 10^22.
   428		case exp > 0 && exp <= 15+22: // int * 10^k
   429			// If exponent is big but number of digits is not,
   430			// can move a few zeros into the integer part.
   431			if exp > 22 {
   432				f *= float64pow10[exp-22]
   433				exp = 22
   434			}
   435			if f > 1e15 || f < -1e15 {
   436				// the exponent was really too large.
   437				return
   438			}
   439			return f * float64pow10[exp], true
   440		case exp < 0 && exp >= -22: // int / 10^k
   441			return f / float64pow10[-exp], true
   442		}
   443		return
   444	}
   445	
   446	// If possible to compute mantissa*10^exp to 32-bit float f exactly,
   447	// entirely in floating-point math, do so, avoiding the machinery above.
   448	func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
   449		if mantissa>>float32info.mantbits != 0 {
   450			return
   451		}
   452		f = float32(mantissa)
   453		if neg {
   454			f = -f
   455		}
   456		switch {
   457		case exp == 0:
   458			return f, true
   459		// Exact integers are <= 10^7.
   460		// Exact powers of ten are <= 10^10.
   461		case exp > 0 && exp <= 7+10: // int * 10^k
   462			// If exponent is big but number of digits is not,
   463			// can move a few zeros into the integer part.
   464			if exp > 10 {
   465				f *= float32pow10[exp-10]
   466				exp = 10
   467			}
   468			if f > 1e7 || f < -1e7 {
   469				// the exponent was really too large.
   470				return
   471			}
   472			return f * float32pow10[exp], true
   473		case exp < 0 && exp >= -10: // int / 10^k
   474			return f / float32pow10[-exp], true
   475		}
   476		return
   477	}
   478	
   479	// atofHex converts the hex floating-point string s
   480	// to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
   481	// and returns it as a float64.
   482	// The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
   483	// If trunc is true, trailing non-zero bits have been omitted from the mantissa.
   484	func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
   485		maxExp := 1<<flt.expbits + flt.bias - 2
   486		minExp := flt.bias + 1
   487		exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
   488	
   489		// Shift mantissa and exponent to bring representation into float range.
   490		// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
   491		// For rounding, we need two more, where the bottom bit represents
   492		// whether that bit or any later bit was non-zero.
   493		// (If the mantissa has already lost non-zero bits, trunc is true,
   494		// and we OR in a 1 below after shifting left appropriately.)
   495		for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
   496			mantissa <<= 1
   497			exp--
   498		}
   499		if trunc {
   500			mantissa |= 1
   501		}
   502		for mantissa>>(1+flt.mantbits+2) != 0 {
   503			mantissa = mantissa>>1 | mantissa&1
   504			exp++
   505		}
   506	
   507		// If exponent is too negative,
   508		// denormalize in hopes of making it representable.
   509		// (The -2 is for the rounding bits.)
   510		for mantissa > 1 && exp < minExp-2 {
   511			mantissa = mantissa>>1 | mantissa&1
   512			exp++
   513		}
   514	
   515		// Round using two bottom bits.
   516		round := mantissa & 3
   517		mantissa >>= 2
   518		round |= mantissa & 1 // round to even (round up if mantissa is odd)
   519		exp += 2
   520		if round == 3 {
   521			mantissa++
   522			if mantissa == 1<<(1+flt.mantbits) {
   523				mantissa >>= 1
   524				exp++
   525			}
   526		}
   527	
   528		if mantissa>>flt.mantbits == 0 { // Denormal or zero.
   529			exp = flt.bias
   530		}
   531		var err error
   532		if exp > maxExp { // infinity and range error
   533			mantissa = 1 << flt.mantbits
   534			exp = maxExp + 1
   535			err = rangeError(fnParseFloat, s)
   536		}
   537	
   538		bits := mantissa & (1<<flt.mantbits - 1)
   539		bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   540		if neg {
   541			bits |= 1 << flt.mantbits << flt.expbits
   542		}
   543		if flt == &float32info {
   544			return float64(math.Float32frombits(uint32(bits))), err
   545		}
   546		return math.Float64frombits(bits), err
   547	}
   548	
   549	const fnParseFloat = "ParseFloat"
   550	
   551	func atof32(s string) (f float32, err error) {
   552		if val, ok := special(s); ok {
   553			return float32(val), nil
   554		}
   555	
   556		mantissa, exp, neg, trunc, hex, ok := readFloat(s)
   557		if hex && ok {
   558			f, err := atofHex(s, &float32info, mantissa, exp, neg, trunc)
   559			return float32(f), err
   560		}
   561	
   562		if optimize && ok {
   563			// Try pure floating-point arithmetic conversion.
   564			if !trunc {
   565				if f, ok := atof32exact(mantissa, exp, neg); ok {
   566					return f, nil
   567				}
   568			}
   569			// Try another fast path.
   570			ext := new(extFloat)
   571			if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float32info); ok {
   572				b, ovf := ext.floatBits(&float32info)
   573				f = math.Float32frombits(uint32(b))
   574				if ovf {
   575					err = rangeError(fnParseFloat, s)
   576				}
   577				return f, err
   578			}
   579		}
   580	
   581		// Slow fallback.
   582		var d decimal
   583		if !d.set(s) {
   584			return 0, syntaxError(fnParseFloat, s)
   585		}
   586		b, ovf := d.floatBits(&float32info)
   587		f = math.Float32frombits(uint32(b))
   588		if ovf {
   589			err = rangeError(fnParseFloat, s)
   590		}
   591		return f, err
   592	}
   593	
   594	func atof64(s string) (f float64, err error) {
   595		if val, ok := special(s); ok {
   596			return val, nil
   597		}
   598	
   599		mantissa, exp, neg, trunc, hex, ok := readFloat(s)
   600		if hex && ok {
   601			return atofHex(s, &float64info, mantissa, exp, neg, trunc)
   602		}
   603	
   604		if optimize && ok {
   605			// Try pure floating-point arithmetic conversion.
   606			if !trunc {
   607				if f, ok := atof64exact(mantissa, exp, neg); ok {
   608					return f, nil
   609				}
   610			}
   611			// Try another fast path.
   612			ext := new(extFloat)
   613			if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float64info); ok {
   614				b, ovf := ext.floatBits(&float64info)
   615				f = math.Float64frombits(b)
   616				if ovf {
   617					err = rangeError(fnParseFloat, s)
   618				}
   619				return f, err
   620			}
   621		}
   622	
   623		// Slow fallback.
   624		var d decimal
   625		if !d.set(s) {
   626			return 0, syntaxError(fnParseFloat, s)
   627		}
   628		b, ovf := d.floatBits(&float64info)
   629		f = math.Float64frombits(b)
   630		if ovf {
   631			err = rangeError(fnParseFloat, s)
   632		}
   633		return f, err
   634	}
   635	
   636	// ParseFloat converts the string s to a floating-point number
   637	// with the precision specified by bitSize: 32 for float32, or 64 for float64.
   638	// When bitSize=32, the result still has type float64, but it will be
   639	// convertible to float32 without changing its value.
   640	//
   641	// ParseFloat accepts decimal and hexadecimal floating-point number syntax.
   642	// If s is well-formed and near a valid floating-point number,
   643	// ParseFloat returns the nearest floating-point number rounded
   644	// using IEEE754 unbiased rounding.
   645	// (Parsing a hexadecimal floating-point value only rounds when
   646	// there are more bits in the hexadecimal representation than
   647	// will fit in the mantissa.)
   648	//
   649	// The errors that ParseFloat returns have concrete type *NumError
   650	// and include err.Num = s.
   651	//
   652	// If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
   653	//
   654	// If s is syntactically well-formed but is more than 1/2 ULP
   655	// away from the largest floating point number of the given size,
   656	// ParseFloat returns f = ±Inf, err.Err = ErrRange.
   657	//
   658	// ParseFloat recognizes the strings "NaN", "+Inf", and "-Inf" as their
   659	// respective special floating point values. It ignores case when matching.
   660	func ParseFloat(s string, bitSize int) (float64, error) {
   661		if !underscoreOK(s) {
   662			return 0, syntaxError(fnParseFloat, s)
   663		}
   664		if bitSize == 32 {
   665			f, err := atof32(s)
   666			return float64(f), err
   667		}
   668		return atof64(s)
   669	}
   670	

View as plain text