...

Source file src/pkg/math/big/float.go

     1	// Copyright 2014 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// This file implements multi-precision floating-point numbers.
     6	// Like in the GNU MPFR library (https://www.mpfr.org/), operands
     7	// can be of mixed precision. Unlike MPFR, the rounding mode is
     8	// not specified with each operation, but with each operand. The
     9	// rounding mode of the result operand determines the rounding
    10	// mode of an operation. This is a from-scratch implementation.
    11	
    12	package big
    13	
    14	import (
    15		"fmt"
    16		"math"
    17		"math/bits"
    18	)
    19	
    20	const debugFloat = false // enable for debugging
    21	
    22	// A nonzero finite Float represents a multi-precision floating point number
    23	//
    24	//   sign × mantissa × 2**exponent
    25	//
    26	// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
    27	// A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
    28	// All Floats are ordered, and the ordering of two Floats x and y
    29	// is defined by x.Cmp(y).
    30	//
    31	// Each Float value also has a precision, rounding mode, and accuracy.
    32	// The precision is the maximum number of mantissa bits available to
    33	// represent the value. The rounding mode specifies how a result should
    34	// be rounded to fit into the mantissa bits, and accuracy describes the
    35	// rounding error with respect to the exact result.
    36	//
    37	// Unless specified otherwise, all operations (including setters) that
    38	// specify a *Float variable for the result (usually via the receiver
    39	// with the exception of MantExp), round the numeric result according
    40	// to the precision and rounding mode of the result variable.
    41	//
    42	// If the provided result precision is 0 (see below), it is set to the
    43	// precision of the argument with the largest precision value before any
    44	// rounding takes place, and the rounding mode remains unchanged. Thus,
    45	// uninitialized Floats provided as result arguments will have their
    46	// precision set to a reasonable value determined by the operands, and
    47	// their mode is the zero value for RoundingMode (ToNearestEven).
    48	//
    49	// By setting the desired precision to 24 or 53 and using matching rounding
    50	// mode (typically ToNearestEven), Float operations produce the same results
    51	// as the corresponding float32 or float64 IEEE-754 arithmetic for operands
    52	// that correspond to normal (i.e., not denormal) float32 or float64 numbers.
    53	// Exponent underflow and overflow lead to a 0 or an Infinity for different
    54	// values than IEEE-754 because Float exponents have a much larger range.
    55	//
    56	// The zero (uninitialized) value for a Float is ready to use and represents
    57	// the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
    58	//
    59	// Operations always take pointer arguments (*Float) rather
    60	// than Float values, and each unique Float value requires
    61	// its own unique *Float pointer. To "copy" a Float value,
    62	// an existing (or newly allocated) Float must be set to
    63	// a new value using the Float.Set method; shallow copies
    64	// of Floats are not supported and may lead to errors.
    65	type Float struct {
    66		prec uint32
    67		mode RoundingMode
    68		acc  Accuracy
    69		form form
    70		neg  bool
    71		mant nat
    72		exp  int32
    73	}
    74	
    75	// An ErrNaN panic is raised by a Float operation that would lead to
    76	// a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
    77	type ErrNaN struct {
    78		msg string
    79	}
    80	
    81	func (err ErrNaN) Error() string {
    82		return err.msg
    83	}
    84	
    85	// NewFloat allocates and returns a new Float set to x,
    86	// with precision 53 and rounding mode ToNearestEven.
    87	// NewFloat panics with ErrNaN if x is a NaN.
    88	func NewFloat(x float64) *Float {
    89		if math.IsNaN(x) {
    90			panic(ErrNaN{"NewFloat(NaN)"})
    91		}
    92		return new(Float).SetFloat64(x)
    93	}
    94	
    95	// Exponent and precision limits.
    96	const (
    97		MaxExp  = math.MaxInt32  // largest supported exponent
    98		MinExp  = math.MinInt32  // smallest supported exponent
    99		MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
   100	)
   101	
   102	// Internal representation: The mantissa bits x.mant of a nonzero finite
   103	// Float x are stored in a nat slice long enough to hold up to x.prec bits;
   104	// the slice may (but doesn't have to) be shorter if the mantissa contains
   105	// trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
   106	// the msb is shifted all the way "to the left"). Thus, if the mantissa has
   107	// trailing 0 bits or x.prec is not a multiple of the Word size _W,
   108	// x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
   109	// to the value 0.5; the exponent x.exp shifts the binary point as needed.
   110	//
   111	// A zero or non-finite Float x ignores x.mant and x.exp.
   112	//
   113	// x                 form      neg      mant         exp
   114	// ----------------------------------------------------------
   115	// ±0                zero      sign     -            -
   116	// 0 < |x| < +Inf    finite    sign     mantissa     exponent
   117	// ±Inf              inf       sign     -            -
   118	
   119	// A form value describes the internal representation.
   120	type form byte
   121	
   122	// The form value order is relevant - do not change!
   123	const (
   124		zero form = iota
   125		finite
   126		inf
   127	)
   128	
   129	// RoundingMode determines how a Float value is rounded to the
   130	// desired precision. Rounding may change the Float value; the
   131	// rounding error is described by the Float's Accuracy.
   132	type RoundingMode byte
   133	
   134	// These constants define supported rounding modes.
   135	const (
   136		ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
   137		ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
   138		ToZero                            // == IEEE 754-2008 roundTowardZero
   139		AwayFromZero                      // no IEEE 754-2008 equivalent
   140		ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
   141		ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
   142	)
   143	
   144	//go:generate stringer -type=RoundingMode
   145	
   146	// Accuracy describes the rounding error produced by the most recent
   147	// operation that generated a Float value, relative to the exact value.
   148	type Accuracy int8
   149	
   150	// Constants describing the Accuracy of a Float.
   151	const (
   152		Below Accuracy = -1
   153		Exact Accuracy = 0
   154		Above Accuracy = +1
   155	)
   156	
   157	//go:generate stringer -type=Accuracy
   158	
   159	// SetPrec sets z's precision to prec and returns the (possibly) rounded
   160	// value of z. Rounding occurs according to z's rounding mode if the mantissa
   161	// cannot be represented in prec bits without loss of precision.
   162	// SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
   163	// If prec > MaxPrec, it is set to MaxPrec.
   164	func (z *Float) SetPrec(prec uint) *Float {
   165		z.acc = Exact // optimistically assume no rounding is needed
   166	
   167		// special case
   168		if prec == 0 {
   169			z.prec = 0
   170			if z.form == finite {
   171				// truncate z to 0
   172				z.acc = makeAcc(z.neg)
   173				z.form = zero
   174			}
   175			return z
   176		}
   177	
   178		// general case
   179		if prec > MaxPrec {
   180			prec = MaxPrec
   181		}
   182		old := z.prec
   183		z.prec = uint32(prec)
   184		if z.prec < old {
   185			z.round(0)
   186		}
   187		return z
   188	}
   189	
   190	func makeAcc(above bool) Accuracy {
   191		if above {
   192			return Above
   193		}
   194		return Below
   195	}
   196	
   197	// SetMode sets z's rounding mode to mode and returns an exact z.
   198	// z remains unchanged otherwise.
   199	// z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
   200	func (z *Float) SetMode(mode RoundingMode) *Float {
   201		z.mode = mode
   202		z.acc = Exact
   203		return z
   204	}
   205	
   206	// Prec returns the mantissa precision of x in bits.
   207	// The result may be 0 for |x| == 0 and |x| == Inf.
   208	func (x *Float) Prec() uint {
   209		return uint(x.prec)
   210	}
   211	
   212	// MinPrec returns the minimum precision required to represent x exactly
   213	// (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
   214	// The result is 0 for |x| == 0 and |x| == Inf.
   215	func (x *Float) MinPrec() uint {
   216		if x.form != finite {
   217			return 0
   218		}
   219		return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
   220	}
   221	
   222	// Mode returns the rounding mode of x.
   223	func (x *Float) Mode() RoundingMode {
   224		return x.mode
   225	}
   226	
   227	// Acc returns the accuracy of x produced by the most recent operation.
   228	func (x *Float) Acc() Accuracy {
   229		return x.acc
   230	}
   231	
   232	// Sign returns:
   233	//
   234	//	-1 if x <   0
   235	//	 0 if x is ±0
   236	//	+1 if x >   0
   237	//
   238	func (x *Float) Sign() int {
   239		if debugFloat {
   240			x.validate()
   241		}
   242		if x.form == zero {
   243			return 0
   244		}
   245		if x.neg {
   246			return -1
   247		}
   248		return 1
   249	}
   250	
   251	// MantExp breaks x into its mantissa and exponent components
   252	// and returns the exponent. If a non-nil mant argument is
   253	// provided its value is set to the mantissa of x, with the
   254	// same precision and rounding mode as x. The components
   255	// satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
   256	// Calling MantExp with a nil argument is an efficient way to
   257	// get the exponent of the receiver.
   258	//
   259	// Special cases are:
   260	//
   261	//	(  ±0).MantExp(mant) = 0, with mant set to   ±0
   262	//	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
   263	//
   264	// x and mant may be the same in which case x is set to its
   265	// mantissa value.
   266	func (x *Float) MantExp(mant *Float) (exp int) {
   267		if debugFloat {
   268			x.validate()
   269		}
   270		if x.form == finite {
   271			exp = int(x.exp)
   272		}
   273		if mant != nil {
   274			mant.Copy(x)
   275			if mant.form == finite {
   276				mant.exp = 0
   277			}
   278		}
   279		return
   280	}
   281	
   282	func (z *Float) setExpAndRound(exp int64, sbit uint) {
   283		if exp < MinExp {
   284			// underflow
   285			z.acc = makeAcc(z.neg)
   286			z.form = zero
   287			return
   288		}
   289	
   290		if exp > MaxExp {
   291			// overflow
   292			z.acc = makeAcc(!z.neg)
   293			z.form = inf
   294			return
   295		}
   296	
   297		z.form = finite
   298		z.exp = int32(exp)
   299		z.round(sbit)
   300	}
   301	
   302	// SetMantExp sets z to mant × 2**exp and returns z.
   303	// The result z has the same precision and rounding mode
   304	// as mant. SetMantExp is an inverse of MantExp but does
   305	// not require 0.5 <= |mant| < 1.0. Specifically:
   306	//
   307	//	mant := new(Float)
   308	//	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
   309	//
   310	// Special cases are:
   311	//
   312	//	z.SetMantExp(  ±0, exp) =   ±0
   313	//	z.SetMantExp(±Inf, exp) = ±Inf
   314	//
   315	// z and mant may be the same in which case z's exponent
   316	// is set to exp.
   317	func (z *Float) SetMantExp(mant *Float, exp int) *Float {
   318		if debugFloat {
   319			z.validate()
   320			mant.validate()
   321		}
   322		z.Copy(mant)
   323		if z.form != finite {
   324			return z
   325		}
   326		z.setExpAndRound(int64(z.exp)+int64(exp), 0)
   327		return z
   328	}
   329	
   330	// Signbit reports whether x is negative or negative zero.
   331	func (x *Float) Signbit() bool {
   332		return x.neg
   333	}
   334	
   335	// IsInf reports whether x is +Inf or -Inf.
   336	func (x *Float) IsInf() bool {
   337		return x.form == inf
   338	}
   339	
   340	// IsInt reports whether x is an integer.
   341	// ±Inf values are not integers.
   342	func (x *Float) IsInt() bool {
   343		if debugFloat {
   344			x.validate()
   345		}
   346		// special cases
   347		if x.form != finite {
   348			return x.form == zero
   349		}
   350		// x.form == finite
   351		if x.exp <= 0 {
   352			return false
   353		}
   354		// x.exp > 0
   355		return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
   356	}
   357	
   358	// debugging support
   359	func (x *Float) validate() {
   360		if !debugFloat {
   361			// avoid performance bugs
   362			panic("validate called but debugFloat is not set")
   363		}
   364		if x.form != finite {
   365			return
   366		}
   367		m := len(x.mant)
   368		if m == 0 {
   369			panic("nonzero finite number with empty mantissa")
   370		}
   371		const msb = 1 << (_W - 1)
   372		if x.mant[m-1]&msb == 0 {
   373			panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
   374		}
   375		if x.prec == 0 {
   376			panic("zero precision finite number")
   377		}
   378	}
   379	
   380	// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
   381	// sbit must be 0 or 1 and summarizes any "sticky bit" information one might
   382	// have before calling round. z's mantissa must be normalized (with the msb set)
   383	// or empty.
   384	//
   385	// CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
   386	// sign of z. For correct rounding, the sign of z must be set correctly before
   387	// calling round.
   388	func (z *Float) round(sbit uint) {
   389		if debugFloat {
   390			z.validate()
   391		}
   392	
   393		z.acc = Exact
   394		if z.form != finite {
   395			// ±0 or ±Inf => nothing left to do
   396			return
   397		}
   398		// z.form == finite && len(z.mant) > 0
   399		// m > 0 implies z.prec > 0 (checked by validate)
   400	
   401		m := uint32(len(z.mant)) // present mantissa length in words
   402		bits := m * _W           // present mantissa bits; bits > 0
   403		if bits <= z.prec {
   404			// mantissa fits => nothing to do
   405			return
   406		}
   407		// bits > z.prec
   408	
   409		// Rounding is based on two bits: the rounding bit (rbit) and the
   410		// sticky bit (sbit). The rbit is the bit immediately before the
   411		// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
   412		// of the bits before the rbit are set (the "0.25", "0.125", etc.):
   413		//
   414		//   rbit  sbit  => "fractional part"
   415		//
   416		//   0     0        == 0
   417		//   0     1        >  0  , < 0.5
   418		//   1     0        == 0.5
   419		//   1     1        >  0.5, < 1.0
   420	
   421		// bits > z.prec: mantissa too large => round
   422		r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
   423		rbit := z.mant.bit(r) & 1    // rounding bit; be safe and ensure it's a single bit
   424		// The sticky bit is only needed for rounding ToNearestEven
   425		// or when the rounding bit is zero. Avoid computation otherwise.
   426		if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) {
   427			sbit = z.mant.sticky(r)
   428		}
   429		sbit &= 1 // be safe and ensure it's a single bit
   430	
   431		// cut off extra words
   432		n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
   433		if m > n {
   434			copy(z.mant, z.mant[m-n:]) // move n last words to front
   435			z.mant = z.mant[:n]
   436		}
   437	
   438		// determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word
   439		ntz := n*_W - z.prec // 0 <= ntz < _W
   440		lsb := Word(1) << ntz
   441	
   442		// round if result is inexact
   443		if rbit|sbit != 0 {
   444			// Make rounding decision: The result mantissa is truncated ("rounded down")
   445			// by default. Decide if we need to increment, or "round up", the (unsigned)
   446			// mantissa.
   447			inc := false
   448			switch z.mode {
   449			case ToNegativeInf:
   450				inc = z.neg
   451			case ToZero:
   452				// nothing to do
   453			case ToNearestEven:
   454				inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0)
   455			case ToNearestAway:
   456				inc = rbit != 0
   457			case AwayFromZero:
   458				inc = true
   459			case ToPositiveInf:
   460				inc = !z.neg
   461			default:
   462				panic("unreachable")
   463			}
   464	
   465			// A positive result (!z.neg) is Above the exact result if we increment,
   466			// and it's Below if we truncate (Exact results require no rounding).
   467			// For a negative result (z.neg) it is exactly the opposite.
   468			z.acc = makeAcc(inc != z.neg)
   469	
   470			if inc {
   471				// add 1 to mantissa
   472				if addVW(z.mant, z.mant, lsb) != 0 {
   473					// mantissa overflow => adjust exponent
   474					if z.exp >= MaxExp {
   475						// exponent overflow
   476						z.form = inf
   477						return
   478					}
   479					z.exp++
   480					// adjust mantissa: divide by 2 to compensate for exponent adjustment
   481					shrVU(z.mant, z.mant, 1)
   482					// set msb == carry == 1 from the mantissa overflow above
   483					const msb = 1 << (_W - 1)
   484					z.mant[n-1] |= msb
   485				}
   486			}
   487		}
   488	
   489		// zero out trailing bits in least-significant word
   490		z.mant[0] &^= lsb - 1
   491	
   492		if debugFloat {
   493			z.validate()
   494		}
   495	}
   496	
   497	func (z *Float) setBits64(neg bool, x uint64) *Float {
   498		if z.prec == 0 {
   499			z.prec = 64
   500		}
   501		z.acc = Exact
   502		z.neg = neg
   503		if x == 0 {
   504			z.form = zero
   505			return z
   506		}
   507		// x != 0
   508		z.form = finite
   509		s := bits.LeadingZeros64(x)
   510		z.mant = z.mant.setUint64(x << uint(s))
   511		z.exp = int32(64 - s) // always fits
   512		if z.prec < 64 {
   513			z.round(0)
   514		}
   515		return z
   516	}
   517	
   518	// SetUint64 sets z to the (possibly rounded) value of x and returns z.
   519	// If z's precision is 0, it is changed to 64 (and rounding will have
   520	// no effect).
   521	func (z *Float) SetUint64(x uint64) *Float {
   522		return z.setBits64(false, x)
   523	}
   524	
   525	// SetInt64 sets z to the (possibly rounded) value of x and returns z.
   526	// If z's precision is 0, it is changed to 64 (and rounding will have
   527	// no effect).
   528	func (z *Float) SetInt64(x int64) *Float {
   529		u := x
   530		if u < 0 {
   531			u = -u
   532		}
   533		// We cannot simply call z.SetUint64(uint64(u)) and change
   534		// the sign afterwards because the sign affects rounding.
   535		return z.setBits64(x < 0, uint64(u))
   536	}
   537	
   538	// SetFloat64 sets z to the (possibly rounded) value of x and returns z.
   539	// If z's precision is 0, it is changed to 53 (and rounding will have
   540	// no effect). SetFloat64 panics with ErrNaN if x is a NaN.
   541	func (z *Float) SetFloat64(x float64) *Float {
   542		if z.prec == 0 {
   543			z.prec = 53
   544		}
   545		if math.IsNaN(x) {
   546			panic(ErrNaN{"Float.SetFloat64(NaN)"})
   547		}
   548		z.acc = Exact
   549		z.neg = math.Signbit(x) // handle -0, -Inf correctly
   550		if x == 0 {
   551			z.form = zero
   552			return z
   553		}
   554		if math.IsInf(x, 0) {
   555			z.form = inf
   556			return z
   557		}
   558		// normalized x != 0
   559		z.form = finite
   560		fmant, exp := math.Frexp(x) // get normalized mantissa
   561		z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
   562		z.exp = int32(exp) // always fits
   563		if z.prec < 53 {
   564			z.round(0)
   565		}
   566		return z
   567	}
   568	
   569	// fnorm normalizes mantissa m by shifting it to the left
   570	// such that the msb of the most-significant word (msw) is 1.
   571	// It returns the shift amount. It assumes that len(m) != 0.
   572	func fnorm(m nat) int64 {
   573		if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
   574			panic("msw of mantissa is 0")
   575		}
   576		s := nlz(m[len(m)-1])
   577		if s > 0 {
   578			c := shlVU(m, m, s)
   579			if debugFloat && c != 0 {
   580				panic("nlz or shlVU incorrect")
   581			}
   582		}
   583		return int64(s)
   584	}
   585	
   586	// SetInt sets z to the (possibly rounded) value of x and returns z.
   587	// If z's precision is 0, it is changed to the larger of x.BitLen()
   588	// or 64 (and rounding will have no effect).
   589	func (z *Float) SetInt(x *Int) *Float {
   590		// TODO(gri) can be more efficient if z.prec > 0
   591		// but small compared to the size of x, or if there
   592		// are many trailing 0's.
   593		bits := uint32(x.BitLen())
   594		if z.prec == 0 {
   595			z.prec = umax32(bits, 64)
   596		}
   597		z.acc = Exact
   598		z.neg = x.neg
   599		if len(x.abs) == 0 {
   600			z.form = zero
   601			return z
   602		}
   603		// x != 0
   604		z.mant = z.mant.set(x.abs)
   605		fnorm(z.mant)
   606		z.setExpAndRound(int64(bits), 0)
   607		return z
   608	}
   609	
   610	// SetRat sets z to the (possibly rounded) value of x and returns z.
   611	// If z's precision is 0, it is changed to the largest of a.BitLen(),
   612	// b.BitLen(), or 64; with x = a/b.
   613	func (z *Float) SetRat(x *Rat) *Float {
   614		if x.IsInt() {
   615			return z.SetInt(x.Num())
   616		}
   617		var a, b Float
   618		a.SetInt(x.Num())
   619		b.SetInt(x.Denom())
   620		if z.prec == 0 {
   621			z.prec = umax32(a.prec, b.prec)
   622		}
   623		return z.Quo(&a, &b)
   624	}
   625	
   626	// SetInf sets z to the infinite Float -Inf if signbit is
   627	// set, or +Inf if signbit is not set, and returns z. The
   628	// precision of z is unchanged and the result is always
   629	// Exact.
   630	func (z *Float) SetInf(signbit bool) *Float {
   631		z.acc = Exact
   632		z.form = inf
   633		z.neg = signbit
   634		return z
   635	}
   636	
   637	// Set sets z to the (possibly rounded) value of x and returns z.
   638	// If z's precision is 0, it is changed to the precision of x
   639	// before setting z (and rounding will have no effect).
   640	// Rounding is performed according to z's precision and rounding
   641	// mode; and z's accuracy reports the result error relative to the
   642	// exact (not rounded) result.
   643	func (z *Float) Set(x *Float) *Float {
   644		if debugFloat {
   645			x.validate()
   646		}
   647		z.acc = Exact
   648		if z != x {
   649			z.form = x.form
   650			z.neg = x.neg
   651			if x.form == finite {
   652				z.exp = x.exp
   653				z.mant = z.mant.set(x.mant)
   654			}
   655			if z.prec == 0 {
   656				z.prec = x.prec
   657			} else if z.prec < x.prec {
   658				z.round(0)
   659			}
   660		}
   661		return z
   662	}
   663	
   664	// Copy sets z to x, with the same precision, rounding mode, and
   665	// accuracy as x, and returns z. x is not changed even if z and
   666	// x are the same.
   667	func (z *Float) Copy(x *Float) *Float {
   668		if debugFloat {
   669			x.validate()
   670		}
   671		if z != x {
   672			z.prec = x.prec
   673			z.mode = x.mode
   674			z.acc = x.acc
   675			z.form = x.form
   676			z.neg = x.neg
   677			if z.form == finite {
   678				z.mant = z.mant.set(x.mant)
   679				z.exp = x.exp
   680			}
   681		}
   682		return z
   683	}
   684	
   685	// msb32 returns the 32 most significant bits of x.
   686	func msb32(x nat) uint32 {
   687		i := len(x) - 1
   688		if i < 0 {
   689			return 0
   690		}
   691		if debugFloat && x[i]&(1<<(_W-1)) == 0 {
   692			panic("x not normalized")
   693		}
   694		switch _W {
   695		case 32:
   696			return uint32(x[i])
   697		case 64:
   698			return uint32(x[i] >> 32)
   699		}
   700		panic("unreachable")
   701	}
   702	
   703	// msb64 returns the 64 most significant bits of x.
   704	func msb64(x nat) uint64 {
   705		i := len(x) - 1
   706		if i < 0 {
   707			return 0
   708		}
   709		if debugFloat && x[i]&(1<<(_W-1)) == 0 {
   710			panic("x not normalized")
   711		}
   712		switch _W {
   713		case 32:
   714			v := uint64(x[i]) << 32
   715			if i > 0 {
   716				v |= uint64(x[i-1])
   717			}
   718			return v
   719		case 64:
   720			return uint64(x[i])
   721		}
   722		panic("unreachable")
   723	}
   724	
   725	// Uint64 returns the unsigned integer resulting from truncating x
   726	// towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
   727	// if x is an integer and Below otherwise.
   728	// The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
   729	// for x > math.MaxUint64.
   730	func (x *Float) Uint64() (uint64, Accuracy) {
   731		if debugFloat {
   732			x.validate()
   733		}
   734	
   735		switch x.form {
   736		case finite:
   737			if x.neg {
   738				return 0, Above
   739			}
   740			// 0 < x < +Inf
   741			if x.exp <= 0 {
   742				// 0 < x < 1
   743				return 0, Below
   744			}
   745			// 1 <= x < Inf
   746			if x.exp <= 64 {
   747				// u = trunc(x) fits into a uint64
   748				u := msb64(x.mant) >> (64 - uint32(x.exp))
   749				if x.MinPrec() <= 64 {
   750					return u, Exact
   751				}
   752				return u, Below // x truncated
   753			}
   754			// x too large
   755			return math.MaxUint64, Below
   756	
   757		case zero:
   758			return 0, Exact
   759	
   760		case inf:
   761			if x.neg {
   762				return 0, Above
   763			}
   764			return math.MaxUint64, Below
   765		}
   766	
   767		panic("unreachable")
   768	}
   769	
   770	// Int64 returns the integer resulting from truncating x towards zero.
   771	// If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
   772	// an integer, and Above (x < 0) or Below (x > 0) otherwise.
   773	// The result is (math.MinInt64, Above) for x < math.MinInt64,
   774	// and (math.MaxInt64, Below) for x > math.MaxInt64.
   775	func (x *Float) Int64() (int64, Accuracy) {
   776		if debugFloat {
   777			x.validate()
   778		}
   779	
   780		switch x.form {
   781		case finite:
   782			// 0 < |x| < +Inf
   783			acc := makeAcc(x.neg)
   784			if x.exp <= 0 {
   785				// 0 < |x| < 1
   786				return 0, acc
   787			}
   788			// x.exp > 0
   789	
   790			// 1 <= |x| < +Inf
   791			if x.exp <= 63 {
   792				// i = trunc(x) fits into an int64 (excluding math.MinInt64)
   793				i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
   794				if x.neg {
   795					i = -i
   796				}
   797				if x.MinPrec() <= uint(x.exp) {
   798					return i, Exact
   799				}
   800				return i, acc // x truncated
   801			}
   802			if x.neg {
   803				// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
   804				if x.exp == 64 && x.MinPrec() == 1 {
   805					acc = Exact
   806				}
   807				return math.MinInt64, acc
   808			}
   809			// x too large
   810			return math.MaxInt64, Below
   811	
   812		case zero:
   813			return 0, Exact
   814	
   815		case inf:
   816			if x.neg {
   817				return math.MinInt64, Above
   818			}
   819			return math.MaxInt64, Below
   820		}
   821	
   822		panic("unreachable")
   823	}
   824	
   825	// Float32 returns the float32 value nearest to x. If x is too small to be
   826	// represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
   827	// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
   828	// If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
   829	// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
   830	func (x *Float) Float32() (float32, Accuracy) {
   831		if debugFloat {
   832			x.validate()
   833		}
   834	
   835		switch x.form {
   836		case finite:
   837			// 0 < |x| < +Inf
   838	
   839			const (
   840				fbits = 32                //        float size
   841				mbits = 23                //        mantissa size (excluding implicit msb)
   842				ebits = fbits - mbits - 1 //     8  exponent size
   843				bias  = 1<<(ebits-1) - 1  //   127  exponent bias
   844				dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
   845				emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
   846				emax  = bias              //   127  largest unbiased exponent (normal)
   847			)
   848	
   849			// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa.
   850			e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
   851	
   852			// Compute precision p for float32 mantissa.
   853			// If the exponent is too small, we have a denormal number before
   854			// rounding and fewer than p mantissa bits of precision available
   855			// (the exponent remains fixed but the mantissa gets shifted right).
   856			p := mbits + 1 // precision of normal float
   857			if e < emin {
   858				// recompute precision
   859				p = mbits + 1 - emin + int(e)
   860				// If p == 0, the mantissa of x is shifted so much to the right
   861				// that its msb falls immediately to the right of the float32
   862				// mantissa space. In other words, if the smallest denormal is
   863				// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
   864				// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
   865				// If m == 0.5, it is rounded down to even, i.e., 0.0.
   866				// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
   867				if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
   868					// underflow to ±0
   869					if x.neg {
   870						var z float32
   871						return -z, Above
   872					}
   873					return 0.0, Below
   874				}
   875				// otherwise, round up
   876				// We handle p == 0 explicitly because it's easy and because
   877				// Float.round doesn't support rounding to 0 bits of precision.
   878				if p == 0 {
   879					if x.neg {
   880						return -math.SmallestNonzeroFloat32, Below
   881					}
   882					return math.SmallestNonzeroFloat32, Above
   883				}
   884			}
   885			// p > 0
   886	
   887			// round
   888			var r Float
   889			r.prec = uint32(p)
   890			r.Set(x)
   891			e = r.exp - 1
   892	
   893			// Rounding may have caused r to overflow to ±Inf
   894			// (rounding never causes underflows to 0).
   895			// If the exponent is too large, also overflow to ±Inf.
   896			if r.form == inf || e > emax {
   897				// overflow
   898				if x.neg {
   899					return float32(math.Inf(-1)), Below
   900				}
   901				return float32(math.Inf(+1)), Above
   902			}
   903			// e <= emax
   904	
   905			// Determine sign, biased exponent, and mantissa.
   906			var sign, bexp, mant uint32
   907			if x.neg {
   908				sign = 1 << (fbits - 1)
   909			}
   910	
   911			// Rounding may have caused a denormal number to
   912			// become normal. Check again.
   913			if e < emin {
   914				// denormal number: recompute precision
   915				// Since rounding may have at best increased precision
   916				// and we have eliminated p <= 0 early, we know p > 0.
   917				// bexp == 0 for denormals
   918				p = mbits + 1 - emin + int(e)
   919				mant = msb32(r.mant) >> uint(fbits-p)
   920			} else {
   921				// normal number: emin <= e <= emax
   922				bexp = uint32(e+bias) << mbits
   923				mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
   924			}
   925	
   926			return math.Float32frombits(sign | bexp | mant), r.acc
   927	
   928		case zero:
   929			if x.neg {
   930				var z float32
   931				return -z, Exact
   932			}
   933			return 0.0, Exact
   934	
   935		case inf:
   936			if x.neg {
   937				return float32(math.Inf(-1)), Exact
   938			}
   939			return float32(math.Inf(+1)), Exact
   940		}
   941	
   942		panic("unreachable")
   943	}
   944	
   945	// Float64 returns the float64 value nearest to x. If x is too small to be
   946	// represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
   947	// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
   948	// If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
   949	// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
   950	func (x *Float) Float64() (float64, Accuracy) {
   951		if debugFloat {
   952			x.validate()
   953		}
   954	
   955		switch x.form {
   956		case finite:
   957			// 0 < |x| < +Inf
   958	
   959			const (
   960				fbits = 64                //        float size
   961				mbits = 52                //        mantissa size (excluding implicit msb)
   962				ebits = fbits - mbits - 1 //    11  exponent size
   963				bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
   964				dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
   965				emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
   966				emax  = bias              //  1023  largest unbiased exponent (normal)
   967			)
   968	
   969			// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa.
   970			e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
   971	
   972			// Compute precision p for float64 mantissa.
   973			// If the exponent is too small, we have a denormal number before
   974			// rounding and fewer than p mantissa bits of precision available
   975			// (the exponent remains fixed but the mantissa gets shifted right).
   976			p := mbits + 1 // precision of normal float
   977			if e < emin {
   978				// recompute precision
   979				p = mbits + 1 - emin + int(e)
   980				// If p == 0, the mantissa of x is shifted so much to the right
   981				// that its msb falls immediately to the right of the float64
   982				// mantissa space. In other words, if the smallest denormal is
   983				// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
   984				// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
   985				// If m == 0.5, it is rounded down to even, i.e., 0.0.
   986				// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
   987				if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
   988					// underflow to ±0
   989					if x.neg {
   990						var z float64
   991						return -z, Above
   992					}
   993					return 0.0, Below
   994				}
   995				// otherwise, round up
   996				// We handle p == 0 explicitly because it's easy and because
   997				// Float.round doesn't support rounding to 0 bits of precision.
   998				if p == 0 {
   999					if x.neg {
  1000						return -math.SmallestNonzeroFloat64, Below
  1001					}
  1002					return math.SmallestNonzeroFloat64, Above
  1003				}
  1004			}
  1005			// p > 0
  1006	
  1007			// round
  1008			var r Float
  1009			r.prec = uint32(p)
  1010			r.Set(x)
  1011			e = r.exp - 1
  1012	
  1013			// Rounding may have caused r to overflow to ±Inf
  1014			// (rounding never causes underflows to 0).
  1015			// If the exponent is too large, also overflow to ±Inf.
  1016			if r.form == inf || e > emax {
  1017				// overflow
  1018				if x.neg {
  1019					return math.Inf(-1), Below
  1020				}
  1021				return math.Inf(+1), Above
  1022			}
  1023			// e <= emax
  1024	
  1025			// Determine sign, biased exponent, and mantissa.
  1026			var sign, bexp, mant uint64
  1027			if x.neg {
  1028				sign = 1 << (fbits - 1)
  1029			}
  1030	
  1031			// Rounding may have caused a denormal number to
  1032			// become normal. Check again.
  1033			if e < emin {
  1034				// denormal number: recompute precision
  1035				// Since rounding may have at best increased precision
  1036				// and we have eliminated p <= 0 early, we know p > 0.
  1037				// bexp == 0 for denormals
  1038				p = mbits + 1 - emin + int(e)
  1039				mant = msb64(r.mant) >> uint(fbits-p)
  1040			} else {
  1041				// normal number: emin <= e <= emax
  1042				bexp = uint64(e+bias) << mbits
  1043				mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
  1044			}
  1045	
  1046			return math.Float64frombits(sign | bexp | mant), r.acc
  1047	
  1048		case zero:
  1049			if x.neg {
  1050				var z float64
  1051				return -z, Exact
  1052			}
  1053			return 0.0, Exact
  1054	
  1055		case inf:
  1056			if x.neg {
  1057				return math.Inf(-1), Exact
  1058			}
  1059			return math.Inf(+1), Exact
  1060		}
  1061	
  1062		panic("unreachable")
  1063	}
  1064	
  1065	// Int returns the result of truncating x towards zero;
  1066	// or nil if x is an infinity.
  1067	// The result is Exact if x.IsInt(); otherwise it is Below
  1068	// for x > 0, and Above for x < 0.
  1069	// If a non-nil *Int argument z is provided, Int stores
  1070	// the result in z instead of allocating a new Int.
  1071	func (x *Float) Int(z *Int) (*Int, Accuracy) {
  1072		if debugFloat {
  1073			x.validate()
  1074		}
  1075	
  1076		if z == nil && x.form <= finite {
  1077			z = new(Int)
  1078		}
  1079	
  1080		switch x.form {
  1081		case finite:
  1082			// 0 < |x| < +Inf
  1083			acc := makeAcc(x.neg)
  1084			if x.exp <= 0 {
  1085				// 0 < |x| < 1
  1086				return z.SetInt64(0), acc
  1087			}
  1088			// x.exp > 0
  1089	
  1090			// 1 <= |x| < +Inf
  1091			// determine minimum required precision for x
  1092			allBits := uint(len(x.mant)) * _W
  1093			exp := uint(x.exp)
  1094			if x.MinPrec() <= exp {
  1095				acc = Exact
  1096			}
  1097			// shift mantissa as needed
  1098			if z == nil {
  1099				z = new(Int)
  1100			}
  1101			z.neg = x.neg
  1102			switch {
  1103			case exp > allBits:
  1104				z.abs = z.abs.shl(x.mant, exp-allBits)
  1105			default:
  1106				z.abs = z.abs.set(x.mant)
  1107			case exp < allBits:
  1108				z.abs = z.abs.shr(x.mant, allBits-exp)
  1109			}
  1110			return z, acc
  1111	
  1112		case zero:
  1113			return z.SetInt64(0), Exact
  1114	
  1115		case inf:
  1116			return nil, makeAcc(x.neg)
  1117		}
  1118	
  1119		panic("unreachable")
  1120	}
  1121	
  1122	// Rat returns the rational number corresponding to x;
  1123	// or nil if x is an infinity.
  1124	// The result is Exact if x is not an Inf.
  1125	// If a non-nil *Rat argument z is provided, Rat stores
  1126	// the result in z instead of allocating a new Rat.
  1127	func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
  1128		if debugFloat {
  1129			x.validate()
  1130		}
  1131	
  1132		if z == nil && x.form <= finite {
  1133			z = new(Rat)
  1134		}
  1135	
  1136		switch x.form {
  1137		case finite:
  1138			// 0 < |x| < +Inf
  1139			allBits := int32(len(x.mant)) * _W
  1140			// build up numerator and denominator
  1141			z.a.neg = x.neg
  1142			switch {
  1143			case x.exp > allBits:
  1144				z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
  1145				z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1146				// z already in normal form
  1147			default:
  1148				z.a.abs = z.a.abs.set(x.mant)
  1149				z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1150				// z already in normal form
  1151			case x.exp < allBits:
  1152				z.a.abs = z.a.abs.set(x.mant)
  1153				t := z.b.abs.setUint64(1)
  1154				z.b.abs = t.shl(t, uint(allBits-x.exp))
  1155				z.norm()
  1156			}
  1157			return z, Exact
  1158	
  1159		case zero:
  1160			return z.SetInt64(0), Exact
  1161	
  1162		case inf:
  1163			return nil, makeAcc(x.neg)
  1164		}
  1165	
  1166		panic("unreachable")
  1167	}
  1168	
  1169	// Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
  1170	// and returns z.
  1171	func (z *Float) Abs(x *Float) *Float {
  1172		z.Set(x)
  1173		z.neg = false
  1174		return z
  1175	}
  1176	
  1177	// Neg sets z to the (possibly rounded) value of x with its sign negated,
  1178	// and returns z.
  1179	func (z *Float) Neg(x *Float) *Float {
  1180		z.Set(x)
  1181		z.neg = !z.neg
  1182		return z
  1183	}
  1184	
  1185	func validateBinaryOperands(x, y *Float) {
  1186		if !debugFloat {
  1187			// avoid performance bugs
  1188			panic("validateBinaryOperands called but debugFloat is not set")
  1189		}
  1190		if len(x.mant) == 0 {
  1191			panic("empty mantissa for x")
  1192		}
  1193		if len(y.mant) == 0 {
  1194			panic("empty mantissa for y")
  1195		}
  1196	}
  1197	
  1198	// z = x + y, ignoring signs of x and y for the addition
  1199	// but using the sign of z for rounding the result.
  1200	// x and y must have a non-empty mantissa and valid exponent.
  1201	func (z *Float) uadd(x, y *Float) {
  1202		// Note: This implementation requires 2 shifts most of the
  1203		// time. It is also inefficient if exponents or precisions
  1204		// differ by wide margins. The following article describes
  1205		// an efficient (but much more complicated) implementation
  1206		// compatible with the internal representation used here:
  1207		//
  1208		// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
  1209		// Point Addition With Exact Rounding (as in the MPFR Library)"
  1210		// http://www.vinc17.net/research/papers/rnc6.pdf
  1211	
  1212		if debugFloat {
  1213			validateBinaryOperands(x, y)
  1214		}
  1215	
  1216		// compute exponents ex, ey for mantissa with "binary point"
  1217		// on the right (mantissa.0) - use int64 to avoid overflow
  1218		ex := int64(x.exp) - int64(len(x.mant))*_W
  1219		ey := int64(y.exp) - int64(len(y.mant))*_W
  1220	
  1221		al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
  1222	
  1223		// TODO(gri) having a combined add-and-shift primitive
  1224		//           could make this code significantly faster
  1225		switch {
  1226		case ex < ey:
  1227			if al {
  1228				t := nat(nil).shl(y.mant, uint(ey-ex))
  1229				z.mant = z.mant.add(x.mant, t)
  1230			} else {
  1231				z.mant = z.mant.shl(y.mant, uint(ey-ex))
  1232				z.mant = z.mant.add(x.mant, z.mant)
  1233			}
  1234		default:
  1235			// ex == ey, no shift needed
  1236			z.mant = z.mant.add(x.mant, y.mant)
  1237		case ex > ey:
  1238			if al {
  1239				t := nat(nil).shl(x.mant, uint(ex-ey))
  1240				z.mant = z.mant.add(t, y.mant)
  1241			} else {
  1242				z.mant = z.mant.shl(x.mant, uint(ex-ey))
  1243				z.mant = z.mant.add(z.mant, y.mant)
  1244			}
  1245			ex = ey
  1246		}
  1247		// len(z.mant) > 0
  1248	
  1249		z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1250	}
  1251	
  1252	// z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
  1253	// but using the sign of z for rounding the result.
  1254	// x and y must have a non-empty mantissa and valid exponent.
  1255	func (z *Float) usub(x, y *Float) {
  1256		// This code is symmetric to uadd.
  1257		// We have not factored the common code out because
  1258		// eventually uadd (and usub) should be optimized
  1259		// by special-casing, and the code will diverge.
  1260	
  1261		if debugFloat {
  1262			validateBinaryOperands(x, y)
  1263		}
  1264	
  1265		ex := int64(x.exp) - int64(len(x.mant))*_W
  1266		ey := int64(y.exp) - int64(len(y.mant))*_W
  1267	
  1268		al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
  1269	
  1270		switch {
  1271		case ex < ey:
  1272			if al {
  1273				t := nat(nil).shl(y.mant, uint(ey-ex))
  1274				z.mant = t.sub(x.mant, t)
  1275			} else {
  1276				z.mant = z.mant.shl(y.mant, uint(ey-ex))
  1277				z.mant = z.mant.sub(x.mant, z.mant)
  1278			}
  1279		default:
  1280			// ex == ey, no shift needed
  1281			z.mant = z.mant.sub(x.mant, y.mant)
  1282		case ex > ey:
  1283			if al {
  1284				t := nat(nil).shl(x.mant, uint(ex-ey))
  1285				z.mant = t.sub(t, y.mant)
  1286			} else {
  1287				z.mant = z.mant.shl(x.mant, uint(ex-ey))
  1288				z.mant = z.mant.sub(z.mant, y.mant)
  1289			}
  1290			ex = ey
  1291		}
  1292	
  1293		// operands may have canceled each other out
  1294		if len(z.mant) == 0 {
  1295			z.acc = Exact
  1296			z.form = zero
  1297			z.neg = false
  1298			return
  1299		}
  1300		// len(z.mant) > 0
  1301	
  1302		z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1303	}
  1304	
  1305	// z = x * y, ignoring signs of x and y for the multiplication
  1306	// but using the sign of z for rounding the result.
  1307	// x and y must have a non-empty mantissa and valid exponent.
  1308	func (z *Float) umul(x, y *Float) {
  1309		if debugFloat {
  1310			validateBinaryOperands(x, y)
  1311		}
  1312	
  1313		// Note: This is doing too much work if the precision
  1314		// of z is less than the sum of the precisions of x
  1315		// and y which is often the case (e.g., if all floats
  1316		// have the same precision).
  1317		// TODO(gri) Optimize this for the common case.
  1318	
  1319		e := int64(x.exp) + int64(y.exp)
  1320		if x == y {
  1321			z.mant = z.mant.sqr(x.mant)
  1322		} else {
  1323			z.mant = z.mant.mul(x.mant, y.mant)
  1324		}
  1325		z.setExpAndRound(e-fnorm(z.mant), 0)
  1326	}
  1327	
  1328	// z = x / y, ignoring signs of x and y for the division
  1329	// but using the sign of z for rounding the result.
  1330	// x and y must have a non-empty mantissa and valid exponent.
  1331	func (z *Float) uquo(x, y *Float) {
  1332		if debugFloat {
  1333			validateBinaryOperands(x, y)
  1334		}
  1335	
  1336		// mantissa length in words for desired result precision + 1
  1337		// (at least one extra bit so we get the rounding bit after
  1338		// the division)
  1339		n := int(z.prec/_W) + 1
  1340	
  1341		// compute adjusted x.mant such that we get enough result precision
  1342		xadj := x.mant
  1343		if d := n - len(x.mant) + len(y.mant); d > 0 {
  1344			// d extra words needed => add d "0 digits" to x
  1345			xadj = make(nat, len(x.mant)+d)
  1346			copy(xadj[d:], x.mant)
  1347		}
  1348		// TODO(gri): If we have too many digits (d < 0), we should be able
  1349		// to shorten x for faster division. But we must be extra careful
  1350		// with rounding in that case.
  1351	
  1352		// Compute d before division since there may be aliasing of x.mant
  1353		// (via xadj) or y.mant with z.mant.
  1354		d := len(xadj) - len(y.mant)
  1355	
  1356		// divide
  1357		var r nat
  1358		z.mant, r = z.mant.div(nil, xadj, y.mant)
  1359		e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
  1360	
  1361		// The result is long enough to include (at least) the rounding bit.
  1362		// If there's a non-zero remainder, the corresponding fractional part
  1363		// (if it were computed), would have a non-zero sticky bit (if it were
  1364		// zero, it couldn't have a non-zero remainder).
  1365		var sbit uint
  1366		if len(r) > 0 {
  1367			sbit = 1
  1368		}
  1369	
  1370		z.setExpAndRound(e-fnorm(z.mant), sbit)
  1371	}
  1372	
  1373	// ucmp returns -1, 0, or +1, depending on whether
  1374	// |x| < |y|, |x| == |y|, or |x| > |y|.
  1375	// x and y must have a non-empty mantissa and valid exponent.
  1376	func (x *Float) ucmp(y *Float) int {
  1377		if debugFloat {
  1378			validateBinaryOperands(x, y)
  1379		}
  1380	
  1381		switch {
  1382		case x.exp < y.exp:
  1383			return -1
  1384		case x.exp > y.exp:
  1385			return +1
  1386		}
  1387		// x.exp == y.exp
  1388	
  1389		// compare mantissas
  1390		i := len(x.mant)
  1391		j := len(y.mant)
  1392		for i > 0 || j > 0 {
  1393			var xm, ym Word
  1394			if i > 0 {
  1395				i--
  1396				xm = x.mant[i]
  1397			}
  1398			if j > 0 {
  1399				j--
  1400				ym = y.mant[j]
  1401			}
  1402			switch {
  1403			case xm < ym:
  1404				return -1
  1405			case xm > ym:
  1406				return +1
  1407			}
  1408		}
  1409	
  1410		return 0
  1411	}
  1412	
  1413	// Handling of sign bit as defined by IEEE 754-2008, section 6.3:
  1414	//
  1415	// When neither the inputs nor result are NaN, the sign of a product or
  1416	// quotient is the exclusive OR of the operands’ signs; the sign of a sum,
  1417	// or of a difference x−y regarded as a sum x+(−y), differs from at most
  1418	// one of the addends’ signs; and the sign of the result of conversions,
  1419	// the quantize operation, the roundToIntegral operations, and the
  1420	// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
  1421	// These rules shall apply even when operands or results are zero or infinite.
  1422	//
  1423	// When the sum of two operands with opposite signs (or the difference of
  1424	// two operands with like signs) is exactly zero, the sign of that sum (or
  1425	// difference) shall be +0 in all rounding-direction attributes except
  1426	// roundTowardNegative; under that attribute, the sign of an exact zero
  1427	// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
  1428	// sign as x even when x is zero.
  1429	//
  1430	// See also: https://play.golang.org/p/RtH3UCt5IH
  1431	
  1432	// Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
  1433	// it is changed to the larger of x's or y's precision before the operation.
  1434	// Rounding is performed according to z's precision and rounding mode; and
  1435	// z's accuracy reports the result error relative to the exact (not rounded)
  1436	// result. Add panics with ErrNaN if x and y are infinities with opposite
  1437	// signs. The value of z is undefined in that case.
  1438	func (z *Float) Add(x, y *Float) *Float {
  1439		if debugFloat {
  1440			x.validate()
  1441			y.validate()
  1442		}
  1443	
  1444		if z.prec == 0 {
  1445			z.prec = umax32(x.prec, y.prec)
  1446		}
  1447	
  1448		if x.form == finite && y.form == finite {
  1449			// x + y (common case)
  1450	
  1451			// Below we set z.neg = x.neg, and when z aliases y this will
  1452			// change the y operand's sign. This is fine, because if an
  1453			// operand aliases the receiver it'll be overwritten, but we still
  1454			// want the original x.neg and y.neg values when we evaluate
  1455			// x.neg != y.neg, so we need to save y.neg before setting z.neg.
  1456			yneg := y.neg
  1457	
  1458			z.neg = x.neg
  1459			if x.neg == yneg {
  1460				// x + y == x + y
  1461				// (-x) + (-y) == -(x + y)
  1462				z.uadd(x, y)
  1463			} else {
  1464				// x + (-y) == x - y == -(y - x)
  1465				// (-x) + y == y - x == -(x - y)
  1466				if x.ucmp(y) > 0 {
  1467					z.usub(x, y)
  1468				} else {
  1469					z.neg = !z.neg
  1470					z.usub(y, x)
  1471				}
  1472			}
  1473			if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact {
  1474				z.neg = true
  1475			}
  1476			return z
  1477		}
  1478	
  1479		if x.form == inf && y.form == inf && x.neg != y.neg {
  1480			// +Inf + -Inf
  1481			// -Inf + +Inf
  1482			// value of z is undefined but make sure it's valid
  1483			z.acc = Exact
  1484			z.form = zero
  1485			z.neg = false
  1486			panic(ErrNaN{"addition of infinities with opposite signs"})
  1487		}
  1488	
  1489		if x.form == zero && y.form == zero {
  1490			// ±0 + ±0
  1491			z.acc = Exact
  1492			z.form = zero
  1493			z.neg = x.neg && y.neg // -0 + -0 == -0
  1494			return z
  1495		}
  1496	
  1497		if x.form == inf || y.form == zero {
  1498			// ±Inf + y
  1499			// x + ±0
  1500			return z.Set(x)
  1501		}
  1502	
  1503		// ±0 + y
  1504		// x + ±Inf
  1505		return z.Set(y)
  1506	}
  1507	
  1508	// Sub sets z to the rounded difference x-y and returns z.
  1509	// Precision, rounding, and accuracy reporting are as for Add.
  1510	// Sub panics with ErrNaN if x and y are infinities with equal
  1511	// signs. The value of z is undefined in that case.
  1512	func (z *Float) Sub(x, y *Float) *Float {
  1513		if debugFloat {
  1514			x.validate()
  1515			y.validate()
  1516		}
  1517	
  1518		if z.prec == 0 {
  1519			z.prec = umax32(x.prec, y.prec)
  1520		}
  1521	
  1522		if x.form == finite && y.form == finite {
  1523			// x - y (common case)
  1524			yneg := y.neg
  1525			z.neg = x.neg
  1526			if x.neg != yneg {
  1527				// x - (-y) == x + y
  1528				// (-x) - y == -(x + y)
  1529				z.uadd(x, y)
  1530			} else {
  1531				// x - y == x - y == -(y - x)
  1532				// (-x) - (-y) == y - x == -(x - y)
  1533				if x.ucmp(y) > 0 {
  1534					z.usub(x, y)
  1535				} else {
  1536					z.neg = !z.neg
  1537					z.usub(y, x)
  1538				}
  1539			}
  1540			if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact {
  1541				z.neg = true
  1542			}
  1543			return z
  1544		}
  1545	
  1546		if x.form == inf && y.form == inf && x.neg == y.neg {
  1547			// +Inf - +Inf
  1548			// -Inf - -Inf
  1549			// value of z is undefined but make sure it's valid
  1550			z.acc = Exact
  1551			z.form = zero
  1552			z.neg = false
  1553			panic(ErrNaN{"subtraction of infinities with equal signs"})
  1554		}
  1555	
  1556		if x.form == zero && y.form == zero {
  1557			// ±0 - ±0
  1558			z.acc = Exact
  1559			z.form = zero
  1560			z.neg = x.neg && !y.neg // -0 - +0 == -0
  1561			return z
  1562		}
  1563	
  1564		if x.form == inf || y.form == zero {
  1565			// ±Inf - y
  1566			// x - ±0
  1567			return z.Set(x)
  1568		}
  1569	
  1570		// ±0 - y
  1571		// x - ±Inf
  1572		return z.Neg(y)
  1573	}
  1574	
  1575	// Mul sets z to the rounded product x*y and returns z.
  1576	// Precision, rounding, and accuracy reporting are as for Add.
  1577	// Mul panics with ErrNaN if one operand is zero and the other
  1578	// operand an infinity. The value of z is undefined in that case.
  1579	func (z *Float) Mul(x, y *Float) *Float {
  1580		if debugFloat {
  1581			x.validate()
  1582			y.validate()
  1583		}
  1584	
  1585		if z.prec == 0 {
  1586			z.prec = umax32(x.prec, y.prec)
  1587		}
  1588	
  1589		z.neg = x.neg != y.neg
  1590	
  1591		if x.form == finite && y.form == finite {
  1592			// x * y (common case)
  1593			z.umul(x, y)
  1594			return z
  1595		}
  1596	
  1597		z.acc = Exact
  1598		if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
  1599			// ±0 * ±Inf
  1600			// ±Inf * ±0
  1601			// value of z is undefined but make sure it's valid
  1602			z.form = zero
  1603			z.neg = false
  1604			panic(ErrNaN{"multiplication of zero with infinity"})
  1605		}
  1606	
  1607		if x.form == inf || y.form == inf {
  1608			// ±Inf * y
  1609			// x * ±Inf
  1610			z.form = inf
  1611			return z
  1612		}
  1613	
  1614		// ±0 * y
  1615		// x * ±0
  1616		z.form = zero
  1617		return z
  1618	}
  1619	
  1620	// Quo sets z to the rounded quotient x/y and returns z.
  1621	// Precision, rounding, and accuracy reporting are as for Add.
  1622	// Quo panics with ErrNaN if both operands are zero or infinities.
  1623	// The value of z is undefined in that case.
  1624	func (z *Float) Quo(x, y *Float) *Float {
  1625		if debugFloat {
  1626			x.validate()
  1627			y.validate()
  1628		}
  1629	
  1630		if z.prec == 0 {
  1631			z.prec = umax32(x.prec, y.prec)
  1632		}
  1633	
  1634		z.neg = x.neg != y.neg
  1635	
  1636		if x.form == finite && y.form == finite {
  1637			// x / y (common case)
  1638			z.uquo(x, y)
  1639			return z
  1640		}
  1641	
  1642		z.acc = Exact
  1643		if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
  1644			// ±0 / ±0
  1645			// ±Inf / ±Inf
  1646			// value of z is undefined but make sure it's valid
  1647			z.form = zero
  1648			z.neg = false
  1649			panic(ErrNaN{"division of zero by zero or infinity by infinity"})
  1650		}
  1651	
  1652		if x.form == zero || y.form == inf {
  1653			// ±0 / y
  1654			// x / ±Inf
  1655			z.form = zero
  1656			return z
  1657		}
  1658	
  1659		// x / ±0
  1660		// ±Inf / y
  1661		z.form = inf
  1662		return z
  1663	}
  1664	
  1665	// Cmp compares x and y and returns:
  1666	//
  1667	//   -1 if x <  y
  1668	//    0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
  1669	//   +1 if x >  y
  1670	//
  1671	func (x *Float) Cmp(y *Float) int {
  1672		if debugFloat {
  1673			x.validate()
  1674			y.validate()
  1675		}
  1676	
  1677		mx := x.ord()
  1678		my := y.ord()
  1679		switch {
  1680		case mx < my:
  1681			return -1
  1682		case mx > my:
  1683			return +1
  1684		}
  1685		// mx == my
  1686	
  1687		// only if |mx| == 1 we have to compare the mantissae
  1688		switch mx {
  1689		case -1:
  1690			return y.ucmp(x)
  1691		case +1:
  1692			return x.ucmp(y)
  1693		}
  1694	
  1695		return 0
  1696	}
  1697	
  1698	// ord classifies x and returns:
  1699	//
  1700	//	-2 if -Inf == x
  1701	//	-1 if -Inf < x < 0
  1702	//	 0 if x == 0 (signed or unsigned)
  1703	//	+1 if 0 < x < +Inf
  1704	//	+2 if x == +Inf
  1705	//
  1706	func (x *Float) ord() int {
  1707		var m int
  1708		switch x.form {
  1709		case finite:
  1710			m = 1
  1711		case zero:
  1712			return 0
  1713		case inf:
  1714			m = 2
  1715		}
  1716		if x.neg {
  1717			m = -m
  1718		}
  1719		return m
  1720	}
  1721	
  1722	func umax32(x, y uint32) uint32 {
  1723		if x > y {
  1724			return x
  1725		}
  1726		return y
  1727	}
  1728	

View as plain text