Source file src/fmt/scan.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package fmt
     6	
     7	import (
     8		"errors"
     9		"io"
    10		"math"
    11		"os"
    12		"reflect"
    13		"strconv"
    14		"sync"
    15		"unicode/utf8"
    16	)
    17	
    18	// ScanState represents the scanner state passed to custom scanners.
    19	// Scanners may do rune-at-a-time scanning or ask the ScanState
    20	// to discover the next space-delimited token.
    21	type ScanState interface {
    22		// ReadRune reads the next rune (Unicode code point) from the input.
    23		// If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
    24		// return EOF after returning the first '\n' or when reading beyond
    25		// the specified width.
    26		ReadRune() (r rune, size int, err error)
    27		// UnreadRune causes the next call to ReadRune to return the same rune.
    28		UnreadRune() error
    29		// SkipSpace skips space in the input. Newlines are treated appropriately
    30		// for the operation being performed; see the package documentation
    31		// for more information.
    32		SkipSpace()
    33		// Token skips space in the input if skipSpace is true, then returns the
    34		// run of Unicode code points c satisfying f(c).  If f is nil,
    35		// !unicode.IsSpace(c) is used; that is, the token will hold non-space
    36		// characters. Newlines are treated appropriately for the operation being
    37		// performed; see the package documentation for more information.
    38		// The returned slice points to shared data that may be overwritten
    39		// by the next call to Token, a call to a Scan function using the ScanState
    40		// as input, or when the calling Scan method returns.
    41		Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
    42		// Width returns the value of the width option and whether it has been set.
    43		// The unit is Unicode code points.
    44		Width() (wid int, ok bool)
    45		// Because ReadRune is implemented by the interface, Read should never be
    46		// called by the scanning routines and a valid implementation of
    47		// ScanState may choose always to return an error from Read.
    48		Read(buf []byte) (n int, err error)
    49	}
    50	
    51	// Scanner is implemented by any value that has a Scan method, which scans
    52	// the input for the representation of a value and stores the result in the
    53	// receiver, which must be a pointer to be useful. The Scan method is called
    54	// for any argument to Scan, Scanf, or Scanln that implements it.
    55	type Scanner interface {
    56		Scan(state ScanState, verb rune) error
    57	}
    58	
    59	// Scan scans text read from standard input, storing successive
    60	// space-separated values into successive arguments. Newlines count
    61	// as space. It returns the number of items successfully scanned.
    62	// If that is less than the number of arguments, err will report why.
    63	func Scan(a ...interface{}) (n int, err error) {
    64		return Fscan(os.Stdin, a...)
    65	}
    66	
    67	// Scanln is similar to Scan, but stops scanning at a newline and
    68	// after the final item there must be a newline or EOF.
    69	func Scanln(a ...interface{}) (n int, err error) {
    70		return Fscanln(os.Stdin, a...)
    71	}
    72	
    73	// Scanf scans text read from standard input, storing successive
    74	// space-separated values into successive arguments as determined by
    75	// the format. It returns the number of items successfully scanned.
    76	// If that is less than the number of arguments, err will report why.
    77	// Newlines in the input must match newlines in the format.
    78	// The one exception: the verb %c always scans the next rune in the
    79	// input, even if it is a space (or tab etc.) or newline.
    80	func Scanf(format string, a ...interface{}) (n int, err error) {
    81		return Fscanf(os.Stdin, format, a...)
    82	}
    83	
    84	type stringReader string
    85	
    86	func (r *stringReader) Read(b []byte) (n int, err error) {
    87		n = copy(b, *r)
    88		*r = (*r)[n:]
    89		if n == 0 {
    90			err = io.EOF
    91		}
    92		return
    93	}
    94	
    95	// Sscan scans the argument string, storing successive space-separated
    96	// values into successive arguments. Newlines count as space. It
    97	// returns the number of items successfully scanned. If that is less
    98	// than the number of arguments, err will report why.
    99	func Sscan(str string, a ...interface{}) (n int, err error) {
   100		return Fscan((*stringReader)(&str), a...)
   101	}
   102	
   103	// Sscanln is similar to Sscan, but stops scanning at a newline and
   104	// after the final item there must be a newline or EOF.
   105	func Sscanln(str string, a ...interface{}) (n int, err error) {
   106		return Fscanln((*stringReader)(&str), a...)
   107	}
   108	
   109	// Sscanf scans the argument string, storing successive space-separated
   110	// values into successive arguments as determined by the format. It
   111	// returns the number of items successfully parsed.
   112	// Newlines in the input must match newlines in the format.
   113	func Sscanf(str string, format string, a ...interface{}) (n int, err error) {
   114		return Fscanf((*stringReader)(&str), format, a...)
   115	}
   116	
   117	// Fscan scans text read from r, storing successive space-separated
   118	// values into successive arguments. Newlines count as space. It
   119	// returns the number of items successfully scanned. If that is less
   120	// than the number of arguments, err will report why.
   121	func Fscan(r io.Reader, a ...interface{}) (n int, err error) {
   122		s, old := newScanState(r, true, false)
   123		n, err = s.doScan(a)
   124		s.free(old)
   125		return
   126	}
   127	
   128	// Fscanln is similar to Fscan, but stops scanning at a newline and
   129	// after the final item there must be a newline or EOF.
   130	func Fscanln(r io.Reader, a ...interface{}) (n int, err error) {
   131		s, old := newScanState(r, false, true)
   132		n, err = s.doScan(a)
   133		s.free(old)
   134		return
   135	}
   136	
   137	// Fscanf scans text read from r, storing successive space-separated
   138	// values into successive arguments as determined by the format. It
   139	// returns the number of items successfully parsed.
   140	// Newlines in the input must match newlines in the format.
   141	func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err error) {
   142		s, old := newScanState(r, false, false)
   143		n, err = s.doScanf(format, a)
   144		s.free(old)
   145		return
   146	}
   147	
   148	// scanError represents an error generated by the scanning software.
   149	// It's used as a unique signature to identify such errors when recovering.
   150	type scanError struct {
   151		err error
   152	}
   153	
   154	const eof = -1
   155	
   156	// ss is the internal implementation of ScanState.
   157	type ss struct {
   158		rs    io.RuneScanner // where to read input
   159		buf   buffer         // token accumulator
   160		count int            // runes consumed so far.
   161		atEOF bool           // already read EOF
   162		ssave
   163	}
   164	
   165	// ssave holds the parts of ss that need to be
   166	// saved and restored on recursive scans.
   167	type ssave struct {
   168		validSave bool // is or was a part of an actual ss.
   169		nlIsEnd   bool // whether newline terminates scan
   170		nlIsSpace bool // whether newline counts as white space
   171		argLimit  int  // max value of ss.count for this arg; argLimit <= limit
   172		limit     int  // max value of ss.count.
   173		maxWid    int  // width of this arg.
   174	}
   175	
   176	// The Read method is only in ScanState so that ScanState
   177	// satisfies io.Reader. It will never be called when used as
   178	// intended, so there is no need to make it actually work.
   179	func (s *ss) Read(buf []byte) (n int, err error) {
   180		return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
   181	}
   182	
   183	func (s *ss) ReadRune() (r rune, size int, err error) {
   184		if s.atEOF || s.count >= s.argLimit {
   185			err = io.EOF
   186			return
   187		}
   188	
   189		r, size, err = s.rs.ReadRune()
   190		if err == nil {
   191			s.count++
   192			if s.nlIsEnd && r == '\n' {
   193				s.atEOF = true
   194			}
   195		} else if err == io.EOF {
   196			s.atEOF = true
   197		}
   198		return
   199	}
   200	
   201	func (s *ss) Width() (wid int, ok bool) {
   202		if s.maxWid == hugeWid {
   203			return 0, false
   204		}
   205		return s.maxWid, true
   206	}
   207	
   208	// The public method returns an error; this private one panics.
   209	// If getRune reaches EOF, the return value is EOF (-1).
   210	func (s *ss) getRune() (r rune) {
   211		r, _, err := s.ReadRune()
   212		if err != nil {
   213			if err == io.EOF {
   214				return eof
   215			}
   216			s.error(err)
   217		}
   218		return
   219	}
   220	
   221	// mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
   222	// It is called in cases such as string scanning where an EOF is a
   223	// syntax error.
   224	func (s *ss) mustReadRune() (r rune) {
   225		r = s.getRune()
   226		if r == eof {
   227			s.error(io.ErrUnexpectedEOF)
   228		}
   229		return
   230	}
   231	
   232	func (s *ss) UnreadRune() error {
   233		s.rs.UnreadRune()
   234		s.atEOF = false
   235		s.count--
   236		return nil
   237	}
   238	
   239	func (s *ss) error(err error) {
   240		panic(scanError{err})
   241	}
   242	
   243	func (s *ss) errorString(err string) {
   244		panic(scanError{errors.New(err)})
   245	}
   246	
   247	func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
   248		defer func() {
   249			if e := recover(); e != nil {
   250				if se, ok := e.(scanError); ok {
   251					err = se.err
   252				} else {
   253					panic(e)
   254				}
   255			}
   256		}()
   257		if f == nil {
   258			f = notSpace
   259		}
   260		s.buf = s.buf[:0]
   261		tok = s.token(skipSpace, f)
   262		return
   263	}
   264	
   265	// space is a copy of the unicode.White_Space ranges,
   266	// to avoid depending on package unicode.
   267	var space = [][2]uint16{
   268		{0x0009, 0x000d},
   269		{0x0020, 0x0020},
   270		{0x0085, 0x0085},
   271		{0x00a0, 0x00a0},
   272		{0x1680, 0x1680},
   273		{0x2000, 0x200a},
   274		{0x2028, 0x2029},
   275		{0x202f, 0x202f},
   276		{0x205f, 0x205f},
   277		{0x3000, 0x3000},
   278	}
   279	
   280	func isSpace(r rune) bool {
   281		if r >= 1<<16 {
   282			return false
   283		}
   284		rx := uint16(r)
   285		for _, rng := range space {
   286			if rx < rng[0] {
   287				return false
   288			}
   289			if rx <= rng[1] {
   290				return true
   291			}
   292		}
   293		return false
   294	}
   295	
   296	// notSpace is the default scanning function used in Token.
   297	func notSpace(r rune) bool {
   298		return !isSpace(r)
   299	}
   300	
   301	// readRune is a structure to enable reading UTF-8 encoded code points
   302	// from an io.Reader. It is used if the Reader given to the scanner does
   303	// not already implement io.RuneScanner.
   304	type readRune struct {
   305		reader   io.Reader
   306		buf      [utf8.UTFMax]byte // used only inside ReadRune
   307		pending  int               // number of bytes in pendBuf; only >0 for bad UTF-8
   308		pendBuf  [utf8.UTFMax]byte // bytes left over
   309		peekRune rune              // if >=0 next rune; when <0 is ^(previous Rune)
   310	}
   311	
   312	// readByte returns the next byte from the input, which may be
   313	// left over from a previous read if the UTF-8 was ill-formed.
   314	func (r *readRune) readByte() (b byte, err error) {
   315		if r.pending > 0 {
   316			b = r.pendBuf[0]
   317			copy(r.pendBuf[0:], r.pendBuf[1:])
   318			r.pending--
   319			return
   320		}
   321		n, err := io.ReadFull(r.reader, r.pendBuf[:1])
   322		if n != 1 {
   323			return 0, err
   324		}
   325		return r.pendBuf[0], err
   326	}
   327	
   328	// ReadRune returns the next UTF-8 encoded code point from the
   329	// io.Reader inside r.
   330	func (r *readRune) ReadRune() (rr rune, size int, err error) {
   331		if r.peekRune >= 0 {
   332			rr = r.peekRune
   333			r.peekRune = ^r.peekRune
   334			size = utf8.RuneLen(rr)
   335			return
   336		}
   337		r.buf[0], err = r.readByte()
   338		if err != nil {
   339			return
   340		}
   341		if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
   342			rr = rune(r.buf[0])
   343			size = 1 // Known to be 1.
   344			// Flip the bits of the rune so it's available to UnreadRune.
   345			r.peekRune = ^rr
   346			return
   347		}
   348		var n int
   349		for n = 1; !utf8.FullRune(r.buf[:n]); n++ {
   350			r.buf[n], err = r.readByte()
   351			if err != nil {
   352				if err == io.EOF {
   353					err = nil
   354					break
   355				}
   356				return
   357			}
   358		}
   359		rr, size = utf8.DecodeRune(r.buf[:n])
   360		if size < n { // an error, save the bytes for the next read
   361			copy(r.pendBuf[r.pending:], r.buf[size:n])
   362			r.pending += n - size
   363		}
   364		// Flip the bits of the rune so it's available to UnreadRune.
   365		r.peekRune = ^rr
   366		return
   367	}
   368	
   369	func (r *readRune) UnreadRune() error {
   370		if r.peekRune >= 0 {
   371			return errors.New("fmt: scanning called UnreadRune with no rune available")
   372		}
   373		// Reverse bit flip of previously read rune to obtain valid >=0 state.
   374		r.peekRune = ^r.peekRune
   375		return nil
   376	}
   377	
   378	var ssFree = sync.Pool{
   379		New: func() interface{} { return new(ss) },
   380	}
   381	
   382	// newScanState allocates a new ss struct or grab a cached one.
   383	func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
   384		s = ssFree.Get().(*ss)
   385		if rs, ok := r.(io.RuneScanner); ok {
   386			s.rs = rs
   387		} else {
   388			s.rs = &readRune{reader: r, peekRune: -1}
   389		}
   390		s.nlIsSpace = nlIsSpace
   391		s.nlIsEnd = nlIsEnd
   392		s.atEOF = false
   393		s.limit = hugeWid
   394		s.argLimit = hugeWid
   395		s.maxWid = hugeWid
   396		s.validSave = true
   397		s.count = 0
   398		return
   399	}
   400	
   401	// free saves used ss structs in ssFree; avoid an allocation per invocation.
   402	func (s *ss) free(old ssave) {
   403		// If it was used recursively, just restore the old state.
   404		if old.validSave {
   405			s.ssave = old
   406			return
   407		}
   408		// Don't hold on to ss structs with large buffers.
   409		if cap(s.buf) > 1024 {
   410			return
   411		}
   412		s.buf = s.buf[:0]
   413		s.rs = nil
   414		ssFree.Put(s)
   415	}
   416	
   417	// SkipSpace provides Scan methods the ability to skip space and newline
   418	// characters in keeping with the current scanning mode set by format strings
   419	// and Scan/Scanln.
   420	func (s *ss) SkipSpace() {
   421		for {
   422			r := s.getRune()
   423			if r == eof {
   424				return
   425			}
   426			if r == '\r' && s.peek("\n") {
   427				continue
   428			}
   429			if r == '\n' {
   430				if s.nlIsSpace {
   431					continue
   432				}
   433				s.errorString("unexpected newline")
   434				return
   435			}
   436			if !isSpace(r) {
   437				s.UnreadRune()
   438				break
   439			}
   440		}
   441	}
   442	
   443	// token returns the next space-delimited string from the input. It
   444	// skips white space. For Scanln, it stops at newlines. For Scan,
   445	// newlines are treated as spaces.
   446	func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
   447		if skipSpace {
   448			s.SkipSpace()
   449		}
   450		// read until white space or newline
   451		for {
   452			r := s.getRune()
   453			if r == eof {
   454				break
   455			}
   456			if !f(r) {
   457				s.UnreadRune()
   458				break
   459			}
   460			s.buf.writeRune(r)
   461		}
   462		return s.buf
   463	}
   464	
   465	var complexError = errors.New("syntax error scanning complex number")
   466	var boolError = errors.New("syntax error scanning boolean")
   467	
   468	func indexRune(s string, r rune) int {
   469		for i, c := range s {
   470			if c == r {
   471				return i
   472			}
   473		}
   474		return -1
   475	}
   476	
   477	// consume reads the next rune in the input and reports whether it is in the ok string.
   478	// If accept is true, it puts the character into the input token.
   479	func (s *ss) consume(ok string, accept bool) bool {
   480		r := s.getRune()
   481		if r == eof {
   482			return false
   483		}
   484		if indexRune(ok, r) >= 0 {
   485			if accept {
   486				s.buf.writeRune(r)
   487			}
   488			return true
   489		}
   490		if r != eof && accept {
   491			s.UnreadRune()
   492		}
   493		return false
   494	}
   495	
   496	// peek reports whether the next character is in the ok string, without consuming it.
   497	func (s *ss) peek(ok string) bool {
   498		r := s.getRune()
   499		if r != eof {
   500			s.UnreadRune()
   501		}
   502		return indexRune(ok, r) >= 0
   503	}
   504	
   505	func (s *ss) notEOF() {
   506		// Guarantee there is data to be read.
   507		if r := s.getRune(); r == eof {
   508			panic(io.EOF)
   509		}
   510		s.UnreadRune()
   511	}
   512	
   513	// accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
   514	// buffer and returns true. Otherwise it return false.
   515	func (s *ss) accept(ok string) bool {
   516		return s.consume(ok, true)
   517	}
   518	
   519	// okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
   520	func (s *ss) okVerb(verb rune, okVerbs, typ string) bool {
   521		for _, v := range okVerbs {
   522			if v == verb {
   523				return true
   524			}
   525		}
   526		s.errorString("bad verb '%" + string(verb) + "' for " + typ)
   527		return false
   528	}
   529	
   530	// scanBool returns the value of the boolean represented by the next token.
   531	func (s *ss) scanBool(verb rune) bool {
   532		s.SkipSpace()
   533		s.notEOF()
   534		if !s.okVerb(verb, "tv", "boolean") {
   535			return false
   536		}
   537		// Syntax-checking a boolean is annoying. We're not fastidious about case.
   538		switch s.getRune() {
   539		case '0':
   540			return false
   541		case '1':
   542			return true
   543		case 't', 'T':
   544			if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
   545				s.error(boolError)
   546			}
   547			return true
   548		case 'f', 'F':
   549			if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
   550				s.error(boolError)
   551			}
   552			return false
   553		}
   554		return false
   555	}
   556	
   557	// Numerical elements
   558	const (
   559		binaryDigits      = "01"
   560		octalDigits       = "01234567"
   561		decimalDigits     = "0123456789"
   562		hexadecimalDigits = "0123456789aAbBcCdDeEfF"
   563		sign              = "+-"
   564		period            = "."
   565		exponent          = "eEpP"
   566	)
   567	
   568	// getBase returns the numeric base represented by the verb and its digit string.
   569	func (s *ss) getBase(verb rune) (base int, digits string) {
   570		s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
   571		base = 10
   572		digits = decimalDigits
   573		switch verb {
   574		case 'b':
   575			base = 2
   576			digits = binaryDigits
   577		case 'o':
   578			base = 8
   579			digits = octalDigits
   580		case 'x', 'X', 'U':
   581			base = 16
   582			digits = hexadecimalDigits
   583		}
   584		return
   585	}
   586	
   587	// scanNumber returns the numerical string with specified digits starting here.
   588	func (s *ss) scanNumber(digits string, haveDigits bool) string {
   589		if !haveDigits {
   590			s.notEOF()
   591			if !s.accept(digits) {
   592				s.errorString("expected integer")
   593			}
   594		}
   595		for s.accept(digits) {
   596		}
   597		return string(s.buf)
   598	}
   599	
   600	// scanRune returns the next rune value in the input.
   601	func (s *ss) scanRune(bitSize int) int64 {
   602		s.notEOF()
   603		r := int64(s.getRune())
   604		n := uint(bitSize)
   605		x := (r << (64 - n)) >> (64 - n)
   606		if x != r {
   607			s.errorString("overflow on character value " + string(r))
   608		}
   609		return r
   610	}
   611	
   612	// scanBasePrefix reports whether the integer begins with a base prefix
   613	// and returns the base, digit string, and whether a zero was found.
   614	// It is called only if the verb is %v.
   615	func (s *ss) scanBasePrefix() (base int, digits string, zeroFound bool) {
   616		if !s.peek("0") {
   617			return 0, decimalDigits + "_", false
   618		}
   619		s.accept("0")
   620		// Special cases for 0, 0b, 0o, 0x.
   621		switch {
   622		case s.peek("bB"):
   623			s.consume("bB", true)
   624			return 0, binaryDigits + "_", true
   625		case s.peek("oO"):
   626			s.consume("oO", true)
   627			return 0, octalDigits + "_", true
   628		case s.peek("xX"):
   629			s.consume("xX", true)
   630			return 0, hexadecimalDigits + "_", true
   631		default:
   632			return 0, octalDigits + "_", true
   633		}
   634	}
   635	
   636	// scanInt returns the value of the integer represented by the next
   637	// token, checking for overflow. Any error is stored in s.err.
   638	func (s *ss) scanInt(verb rune, bitSize int) int64 {
   639		if verb == 'c' {
   640			return s.scanRune(bitSize)
   641		}
   642		s.SkipSpace()
   643		s.notEOF()
   644		base, digits := s.getBase(verb)
   645		haveDigits := false
   646		if verb == 'U' {
   647			if !s.consume("U", false) || !s.consume("+", false) {
   648				s.errorString("bad unicode format ")
   649			}
   650		} else {
   651			s.accept(sign) // If there's a sign, it will be left in the token buffer.
   652			if verb == 'v' {
   653				base, digits, haveDigits = s.scanBasePrefix()
   654			}
   655		}
   656		tok := s.scanNumber(digits, haveDigits)
   657		i, err := strconv.ParseInt(tok, base, 64)
   658		if err != nil {
   659			s.error(err)
   660		}
   661		n := uint(bitSize)
   662		x := (i << (64 - n)) >> (64 - n)
   663		if x != i {
   664			s.errorString("integer overflow on token " + tok)
   665		}
   666		return i
   667	}
   668	
   669	// scanUint returns the value of the unsigned integer represented
   670	// by the next token, checking for overflow. Any error is stored in s.err.
   671	func (s *ss) scanUint(verb rune, bitSize int) uint64 {
   672		if verb == 'c' {
   673			return uint64(s.scanRune(bitSize))
   674		}
   675		s.SkipSpace()
   676		s.notEOF()
   677		base, digits := s.getBase(verb)
   678		haveDigits := false
   679		if verb == 'U' {
   680			if !s.consume("U", false) || !s.consume("+", false) {
   681				s.errorString("bad unicode format ")
   682			}
   683		} else if verb == 'v' {
   684			base, digits, haveDigits = s.scanBasePrefix()
   685		}
   686		tok := s.scanNumber(digits, haveDigits)
   687		i, err := strconv.ParseUint(tok, base, 64)
   688		if err != nil {
   689			s.error(err)
   690		}
   691		n := uint(bitSize)
   692		x := (i << (64 - n)) >> (64 - n)
   693		if x != i {
   694			s.errorString("unsigned integer overflow on token " + tok)
   695		}
   696		return i
   697	}
   698	
   699	// floatToken returns the floating-point number starting here, no longer than swid
   700	// if the width is specified. It's not rigorous about syntax because it doesn't check that
   701	// we have at least some digits, but Atof will do that.
   702	func (s *ss) floatToken() string {
   703		s.buf = s.buf[:0]
   704		// NaN?
   705		if s.accept("nN") && s.accept("aA") && s.accept("nN") {
   706			return string(s.buf)
   707		}
   708		// leading sign?
   709		s.accept(sign)
   710		// Inf?
   711		if s.accept("iI") && s.accept("nN") && s.accept("fF") {
   712			return string(s.buf)
   713		}
   714		digits := decimalDigits + "_"
   715		exp := exponent
   716		if s.accept("0") && s.accept("xX") {
   717			digits = hexadecimalDigits + "_"
   718			exp = "pP"
   719		}
   720		// digits?
   721		for s.accept(digits) {
   722		}
   723		// decimal point?
   724		if s.accept(period) {
   725			// fraction?
   726			for s.accept(digits) {
   727			}
   728		}
   729		// exponent?
   730		if s.accept(exp) {
   731			// leading sign?
   732			s.accept(sign)
   733			// digits?
   734			for s.accept(decimalDigits + "_") {
   735			}
   736		}
   737		return string(s.buf)
   738	}
   739	
   740	// complexTokens returns the real and imaginary parts of the complex number starting here.
   741	// The number might be parenthesized and has the format (N+Ni) where N is a floating-point
   742	// number and there are no spaces within.
   743	func (s *ss) complexTokens() (real, imag string) {
   744		// TODO: accept N and Ni independently?
   745		parens := s.accept("(")
   746		real = s.floatToken()
   747		s.buf = s.buf[:0]
   748		// Must now have a sign.
   749		if !s.accept("+-") {
   750			s.error(complexError)
   751		}
   752		// Sign is now in buffer
   753		imagSign := string(s.buf)
   754		imag = s.floatToken()
   755		if !s.accept("i") {
   756			s.error(complexError)
   757		}
   758		if parens && !s.accept(")") {
   759			s.error(complexError)
   760		}
   761		return real, imagSign + imag
   762	}
   763	
   764	func hasX(s string) bool {
   765		for i := 0; i < len(s); i++ {
   766			if s[i] == 'x' || s[i] == 'X' {
   767				return true
   768			}
   769		}
   770		return false
   771	}
   772	
   773	// convertFloat converts the string to a float64value.
   774	func (s *ss) convertFloat(str string, n int) float64 {
   775		// strconv.ParseFloat will handle "+0x1.fp+2",
   776		// but we have to implement our non-standard
   777		// decimal+binary exponent mix (1.2p4) ourselves.
   778		if p := indexRune(str, 'p'); p >= 0 && !hasX(str) {
   779			// Atof doesn't handle power-of-2 exponents,
   780			// but they're easy to evaluate.
   781			f, err := strconv.ParseFloat(str[:p], n)
   782			if err != nil {
   783				// Put full string into error.
   784				if e, ok := err.(*strconv.NumError); ok {
   785					e.Num = str
   786				}
   787				s.error(err)
   788			}
   789			m, err := strconv.Atoi(str[p+1:])
   790			if err != nil {
   791				// Put full string into error.
   792				if e, ok := err.(*strconv.NumError); ok {
   793					e.Num = str
   794				}
   795				s.error(err)
   796			}
   797			return math.Ldexp(f, m)
   798		}
   799		f, err := strconv.ParseFloat(str, n)
   800		if err != nil {
   801			s.error(err)
   802		}
   803		return f
   804	}
   805	
   806	// convertComplex converts the next token to a complex128 value.
   807	// The atof argument is a type-specific reader for the underlying type.
   808	// If we're reading complex64, atof will parse float32s and convert them
   809	// to float64's to avoid reproducing this code for each complex type.
   810	func (s *ss) scanComplex(verb rune, n int) complex128 {
   811		if !s.okVerb(verb, floatVerbs, "complex") {
   812			return 0
   813		}
   814		s.SkipSpace()
   815		s.notEOF()
   816		sreal, simag := s.complexTokens()
   817		real := s.convertFloat(sreal, n/2)
   818		imag := s.convertFloat(simag, n/2)
   819		return complex(real, imag)
   820	}
   821	
   822	// convertString returns the string represented by the next input characters.
   823	// The format of the input is determined by the verb.
   824	func (s *ss) convertString(verb rune) (str string) {
   825		if !s.okVerb(verb, "svqxX", "string") {
   826			return ""
   827		}
   828		s.SkipSpace()
   829		s.notEOF()
   830		switch verb {
   831		case 'q':
   832			str = s.quotedString()
   833		case 'x', 'X':
   834			str = s.hexString()
   835		default:
   836			str = string(s.token(true, notSpace)) // %s and %v just return the next word
   837		}
   838		return
   839	}
   840	
   841	// quotedString returns the double- or back-quoted string represented by the next input characters.
   842	func (s *ss) quotedString() string {
   843		s.notEOF()
   844		quote := s.getRune()
   845		switch quote {
   846		case '`':
   847			// Back-quoted: Anything goes until EOF or back quote.
   848			for {
   849				r := s.mustReadRune()
   850				if r == quote {
   851					break
   852				}
   853				s.buf.writeRune(r)
   854			}
   855			return string(s.buf)
   856		case '"':
   857			// Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
   858			s.buf.writeByte('"')
   859			for {
   860				r := s.mustReadRune()
   861				s.buf.writeRune(r)
   862				if r == '\\' {
   863					// In a legal backslash escape, no matter how long, only the character
   864					// immediately after the escape can itself be a backslash or quote.
   865					// Thus we only need to protect the first character after the backslash.
   866					s.buf.writeRune(s.mustReadRune())
   867				} else if r == '"' {
   868					break
   869				}
   870			}
   871			result, err := strconv.Unquote(string(s.buf))
   872			if err != nil {
   873				s.error(err)
   874			}
   875			return result
   876		default:
   877			s.errorString("expected quoted string")
   878		}
   879		return ""
   880	}
   881	
   882	// hexDigit returns the value of the hexadecimal digit.
   883	func hexDigit(d rune) (int, bool) {
   884		digit := int(d)
   885		switch digit {
   886		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   887			return digit - '0', true
   888		case 'a', 'b', 'c', 'd', 'e', 'f':
   889			return 10 + digit - 'a', true
   890		case 'A', 'B', 'C', 'D', 'E', 'F':
   891			return 10 + digit - 'A', true
   892		}
   893		return -1, false
   894	}
   895	
   896	// hexByte returns the next hex-encoded (two-character) byte from the input.
   897	// It returns ok==false if the next bytes in the input do not encode a hex byte.
   898	// If the first byte is hex and the second is not, processing stops.
   899	func (s *ss) hexByte() (b byte, ok bool) {
   900		rune1 := s.getRune()
   901		if rune1 == eof {
   902			return
   903		}
   904		value1, ok := hexDigit(rune1)
   905		if !ok {
   906			s.UnreadRune()
   907			return
   908		}
   909		value2, ok := hexDigit(s.mustReadRune())
   910		if !ok {
   911			s.errorString("illegal hex digit")
   912			return
   913		}
   914		return byte(value1<<4 | value2), true
   915	}
   916	
   917	// hexString returns the space-delimited hexpair-encoded string.
   918	func (s *ss) hexString() string {
   919		s.notEOF()
   920		for {
   921			b, ok := s.hexByte()
   922			if !ok {
   923				break
   924			}
   925			s.buf.writeByte(b)
   926		}
   927		if len(s.buf) == 0 {
   928			s.errorString("no hex data for %x string")
   929			return ""
   930		}
   931		return string(s.buf)
   932	}
   933	
   934	const (
   935		floatVerbs = "beEfFgGv"
   936	
   937		hugeWid = 1 << 30
   938	
   939		intBits     = 32 << (^uint(0) >> 63)
   940		uintptrBits = 32 << (^uintptr(0) >> 63)
   941	)
   942	
   943	// scanOne scans a single value, deriving the scanner from the type of the argument.
   944	func (s *ss) scanOne(verb rune, arg interface{}) {
   945		s.buf = s.buf[:0]
   946		var err error
   947		// If the parameter has its own Scan method, use that.
   948		if v, ok := arg.(Scanner); ok {
   949			err = v.Scan(s, verb)
   950			if err != nil {
   951				if err == io.EOF {
   952					err = io.ErrUnexpectedEOF
   953				}
   954				s.error(err)
   955			}
   956			return
   957		}
   958	
   959		switch v := arg.(type) {
   960		case *bool:
   961			*v = s.scanBool(verb)
   962		case *complex64:
   963			*v = complex64(s.scanComplex(verb, 64))
   964		case *complex128:
   965			*v = s.scanComplex(verb, 128)
   966		case *int:
   967			*v = int(s.scanInt(verb, intBits))
   968		case *int8:
   969			*v = int8(s.scanInt(verb, 8))
   970		case *int16:
   971			*v = int16(s.scanInt(verb, 16))
   972		case *int32:
   973			*v = int32(s.scanInt(verb, 32))
   974		case *int64:
   975			*v = s.scanInt(verb, 64)
   976		case *uint:
   977			*v = uint(s.scanUint(verb, intBits))
   978		case *uint8:
   979			*v = uint8(s.scanUint(verb, 8))
   980		case *uint16:
   981			*v = uint16(s.scanUint(verb, 16))
   982		case *uint32:
   983			*v = uint32(s.scanUint(verb, 32))
   984		case *uint64:
   985			*v = s.scanUint(verb, 64)
   986		case *uintptr:
   987			*v = uintptr(s.scanUint(verb, uintptrBits))
   988		// Floats are tricky because you want to scan in the precision of the result, not
   989		// scan in high precision and convert, in order to preserve the correct error condition.
   990		case *float32:
   991			if s.okVerb(verb, floatVerbs, "float32") {
   992				s.SkipSpace()
   993				s.notEOF()
   994				*v = float32(s.convertFloat(s.floatToken(), 32))
   995			}
   996		case *float64:
   997			if s.okVerb(verb, floatVerbs, "float64") {
   998				s.SkipSpace()
   999				s.notEOF()
  1000				*v = s.convertFloat(s.floatToken(), 64)
  1001			}
  1002		case *string:
  1003			*v = s.convertString(verb)
  1004		case *[]byte:
  1005			// We scan to string and convert so we get a copy of the data.
  1006			// If we scanned to bytes, the slice would point at the buffer.
  1007			*v = []byte(s.convertString(verb))
  1008		default:
  1009			val := reflect.ValueOf(v)
  1010			ptr := val
  1011			if ptr.Kind() != reflect.Ptr {
  1012				s.errorString("type not a pointer: " + val.Type().String())
  1013				return
  1014			}
  1015			switch v := ptr.Elem(); v.Kind() {
  1016			case reflect.Bool:
  1017				v.SetBool(s.scanBool(verb))
  1018			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  1019				v.SetInt(s.scanInt(verb, v.Type().Bits()))
  1020			case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
  1021				v.SetUint(s.scanUint(verb, v.Type().Bits()))
  1022			case reflect.String:
  1023				v.SetString(s.convertString(verb))
  1024			case reflect.Slice:
  1025				// For now, can only handle (renamed) []byte.
  1026				typ := v.Type()
  1027				if typ.Elem().Kind() != reflect.Uint8 {
  1028					s.errorString("can't scan type: " + val.Type().String())
  1029				}
  1030				str := s.convertString(verb)
  1031				v.Set(reflect.MakeSlice(typ, len(str), len(str)))
  1032				for i := 0; i < len(str); i++ {
  1033					v.Index(i).SetUint(uint64(str[i]))
  1034				}
  1035			case reflect.Float32, reflect.Float64:
  1036				s.SkipSpace()
  1037				s.notEOF()
  1038				v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits()))
  1039			case reflect.Complex64, reflect.Complex128:
  1040				v.SetComplex(s.scanComplex(verb, v.Type().Bits()))
  1041			default:
  1042				s.errorString("can't scan type: " + val.Type().String())
  1043			}
  1044		}
  1045	}
  1046	
  1047	// errorHandler turns local panics into error returns.
  1048	func errorHandler(errp *error) {
  1049		if e := recover(); e != nil {
  1050			if se, ok := e.(scanError); ok { // catch local error
  1051				*errp = se.err
  1052			} else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
  1053				*errp = eof
  1054			} else {
  1055				panic(e)
  1056			}
  1057		}
  1058	}
  1059	
  1060	// doScan does the real work for scanning without a format string.
  1061	func (s *ss) doScan(a []interface{}) (numProcessed int, err error) {
  1062		defer errorHandler(&err)
  1063		for _, arg := range a {
  1064			s.scanOne('v', arg)
  1065			numProcessed++
  1066		}
  1067		// Check for newline (or EOF) if required (Scanln etc.).
  1068		if s.nlIsEnd {
  1069			for {
  1070				r := s.getRune()
  1071				if r == '\n' || r == eof {
  1072					break
  1073				}
  1074				if !isSpace(r) {
  1075					s.errorString("expected newline")
  1076					break
  1077				}
  1078			}
  1079		}
  1080		return
  1081	}
  1082	
  1083	// advance determines whether the next characters in the input match
  1084	// those of the format. It returns the number of bytes (sic) consumed
  1085	// in the format. All runs of space characters in either input or
  1086	// format behave as a single space. Newlines are special, though:
  1087	// newlines in the format must match those in the input and vice versa.
  1088	// This routine also handles the %% case. If the return value is zero,
  1089	// either format starts with a % (with no following %) or the input
  1090	// is empty. If it is negative, the input did not match the string.
  1091	func (s *ss) advance(format string) (i int) {
  1092		for i < len(format) {
  1093			fmtc, w := utf8.DecodeRuneInString(format[i:])
  1094	
  1095			// Space processing.
  1096			// In the rest of this comment "space" means spaces other than newline.
  1097			// Newline in the format matches input of zero or more spaces and then newline or end-of-input.
  1098			// Spaces in the format before the newline are collapsed into the newline.
  1099			// Spaces in the format after the newline match zero or more spaces after the corresponding input newline.
  1100			// Other spaces in the format match input of one or more spaces or end-of-input.
  1101			if isSpace(fmtc) {
  1102				newlines := 0
  1103				trailingSpace := false
  1104				for isSpace(fmtc) && i < len(format) {
  1105					if fmtc == '\n' {
  1106						newlines++
  1107						trailingSpace = false
  1108					} else {
  1109						trailingSpace = true
  1110					}
  1111					i += w
  1112					fmtc, w = utf8.DecodeRuneInString(format[i:])
  1113				}
  1114				for j := 0; j < newlines; j++ {
  1115					inputc := s.getRune()
  1116					for isSpace(inputc) && inputc != '\n' {
  1117						inputc = s.getRune()
  1118					}
  1119					if inputc != '\n' && inputc != eof {
  1120						s.errorString("newline in format does not match input")
  1121					}
  1122				}
  1123				if trailingSpace {
  1124					inputc := s.getRune()
  1125					if newlines == 0 {
  1126						// If the trailing space stood alone (did not follow a newline),
  1127						// it must find at least one space to consume.
  1128						if !isSpace(inputc) && inputc != eof {
  1129							s.errorString("expected space in input to match format")
  1130						}
  1131						if inputc == '\n' {
  1132							s.errorString("newline in input does not match format")
  1133						}
  1134					}
  1135					for isSpace(inputc) && inputc != '\n' {
  1136						inputc = s.getRune()
  1137					}
  1138					if inputc != eof {
  1139						s.UnreadRune()
  1140					}
  1141				}
  1142				continue
  1143			}
  1144	
  1145			// Verbs.
  1146			if fmtc == '%' {
  1147				// % at end of string is an error.
  1148				if i+w == len(format) {
  1149					s.errorString("missing verb: % at end of format string")
  1150				}
  1151				// %% acts like a real percent
  1152				nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
  1153				if nextc != '%' {
  1154					return
  1155				}
  1156				i += w // skip the first %
  1157			}
  1158	
  1159			// Literals.
  1160			inputc := s.mustReadRune()
  1161			if fmtc != inputc {
  1162				s.UnreadRune()
  1163				return -1
  1164			}
  1165			i += w
  1166		}
  1167		return
  1168	}
  1169	
  1170	// doScanf does the real work when scanning with a format string.
  1171	// At the moment, it handles only pointers to basic types.
  1172	func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err error) {
  1173		defer errorHandler(&err)
  1174		end := len(format) - 1
  1175		// We process one item per non-trivial format
  1176		for i := 0; i <= end; {
  1177			w := s.advance(format[i:])
  1178			if w > 0 {
  1179				i += w
  1180				continue
  1181			}
  1182			// Either we failed to advance, we have a percent character, or we ran out of input.
  1183			if format[i] != '%' {
  1184				// Can't advance format. Why not?
  1185				if w < 0 {
  1186					s.errorString("input does not match format")
  1187				}
  1188				// Otherwise at EOF; "too many operands" error handled below
  1189				break
  1190			}
  1191			i++ // % is one byte
  1192	
  1193			// do we have 20 (width)?
  1194			var widPresent bool
  1195			s.maxWid, widPresent, i = parsenum(format, i, end)
  1196			if !widPresent {
  1197				s.maxWid = hugeWid
  1198			}
  1199	
  1200			c, w := utf8.DecodeRuneInString(format[i:])
  1201			i += w
  1202	
  1203			if c != 'c' {
  1204				s.SkipSpace()
  1205			}
  1206			s.argLimit = s.limit
  1207			if f := s.count + s.maxWid; f < s.argLimit {
  1208				s.argLimit = f
  1209			}
  1210	
  1211			if numProcessed >= len(a) { // out of operands
  1212				s.errorString("too few operands for format '%" + format[i-w:] + "'")
  1213				break
  1214			}
  1215			arg := a[numProcessed]
  1216	
  1217			s.scanOne(c, arg)
  1218			numProcessed++
  1219			s.argLimit = s.limit
  1220		}
  1221		if numProcessed < len(a) {
  1222			s.errorString("too many operands")
  1223		}
  1224		return
  1225	}
  1226
View as plain text