Source file src/pkg/text/scanner/scanner.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package scanner provides a scanner and tokenizer for UTF-8-encoded text.
     6	// It takes an io.Reader providing the source, which then can be tokenized
     7	// through repeated calls to the Scan function. For compatibility with
     8	// existing tools, the NUL character is not allowed. If the first character
     9	// in the source is a UTF-8 encoded byte order mark (BOM), it is discarded.
    10	//
    11	// By default, a Scanner skips white space and Go comments and recognizes all
    12	// literals as defined by the Go language specification. It may be
    13	// customized to recognize only a subset of those literals and to recognize
    14	// different identifier and white space characters.
    15	package scanner
    16	
    17	import (
    18		"bytes"
    19		"fmt"
    20		"io"
    21		"os"
    22		"unicode"
    23		"unicode/utf8"
    24	)
    25	
    26	// A source position is represented by a Position value.
    27	// A position is valid if Line > 0.
    28	type Position struct {
    29		Filename string // filename, if any
    30		Offset   int    // byte offset, starting at 0
    31		Line     int    // line number, starting at 1
    32		Column   int    // column number, starting at 1 (character count per line)
    33	}
    34	
    35	// IsValid reports whether the position is valid.
    36	func (pos *Position) IsValid() bool { return pos.Line > 0 }
    37	
    38	func (pos Position) String() string {
    39		s := pos.Filename
    40		if s == "" {
    41			s = "<input>"
    42		}
    43		if pos.IsValid() {
    44			s += fmt.Sprintf(":%d:%d", pos.Line, pos.Column)
    45		}
    46		return s
    47	}
    48	
    49	// Predefined mode bits to control recognition of tokens. For instance,
    50	// to configure a Scanner such that it only recognizes (Go) identifiers,
    51	// integers, and skips comments, set the Scanner's Mode field to:
    52	//
    53	//	ScanIdents | ScanInts | SkipComments
    54	//
    55	// With the exceptions of comments, which are skipped if SkipComments is
    56	// set, unrecognized tokens are not ignored. Instead, the scanner simply
    57	// returns the respective individual characters (or possibly sub-tokens).
    58	// For instance, if the mode is ScanIdents (not ScanStrings), the string
    59	// "foo" is scanned as the token sequence '"' Ident '"'.
    60	//
    61	// Use GoTokens to configure the Scanner such that it accepts all Go
    62	// literal tokens including Go identifiers. Comments will be skipped.
    63	//
    64	const (
    65		ScanIdents     = 1 << -Ident
    66		ScanInts       = 1 << -Int
    67		ScanFloats     = 1 << -Float // includes Ints and hexadecimal floats
    68		ScanChars      = 1 << -Char
    69		ScanStrings    = 1 << -String
    70		ScanRawStrings = 1 << -RawString
    71		ScanComments   = 1 << -Comment
    72		SkipComments   = 1 << -skipComment // if set with ScanComments, comments become white space
    73		GoTokens       = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments
    74	)
    75	
    76	// The result of Scan is one of these tokens or a Unicode character.
    77	const (
    78		EOF = -(iota + 1)
    79		Ident
    80		Int
    81		Float
    82		Char
    83		String
    84		RawString
    85		Comment
    86	
    87		// internal use only
    88		skipComment
    89	)
    90	
    91	var tokenString = map[rune]string{
    92		EOF:       "EOF",
    93		Ident:     "Ident",
    94		Int:       "Int",
    95		Float:     "Float",
    96		Char:      "Char",
    97		String:    "String",
    98		RawString: "RawString",
    99		Comment:   "Comment",
   100	}
   101	
   102	// TokenString returns a printable string for a token or Unicode character.
   103	func TokenString(tok rune) string {
   104		if s, found := tokenString[tok]; found {
   105			return s
   106		}
   107		return fmt.Sprintf("%q", string(tok))
   108	}
   109	
   110	// GoWhitespace is the default value for the Scanner's Whitespace field.
   111	// Its value selects Go's white space characters.
   112	const GoWhitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
   113	
   114	const bufLen = 1024 // at least utf8.UTFMax
   115	
   116	// A Scanner implements reading of Unicode characters and tokens from an io.Reader.
   117	type Scanner struct {
   118		// Input
   119		src io.Reader
   120	
   121		// Source buffer
   122		srcBuf [bufLen + 1]byte // +1 for sentinel for common case of s.next()
   123		srcPos int              // reading position (srcBuf index)
   124		srcEnd int              // source end (srcBuf index)
   125	
   126		// Source position
   127		srcBufOffset int // byte offset of srcBuf[0] in source
   128		line         int // line count
   129		column       int // character count
   130		lastLineLen  int // length of last line in characters (for correct column reporting)
   131		lastCharLen  int // length of last character in bytes
   132	
   133		// Token text buffer
   134		// Typically, token text is stored completely in srcBuf, but in general
   135		// the token text's head may be buffered in tokBuf while the token text's
   136		// tail is stored in srcBuf.
   137		tokBuf bytes.Buffer // token text head that is not in srcBuf anymore
   138		tokPos int          // token text tail position (srcBuf index); valid if >= 0
   139		tokEnd int          // token text tail end (srcBuf index)
   140	
   141		// One character look-ahead
   142		ch rune // character before current srcPos
   143	
   144		// Error is called for each error encountered. If no Error
   145		// function is set, the error is reported to os.Stderr.
   146		Error func(s *Scanner, msg string)
   147	
   148		// ErrorCount is incremented by one for each error encountered.
   149		ErrorCount int
   150	
   151		// The Mode field controls which tokens are recognized. For instance,
   152		// to recognize Ints, set the ScanInts bit in Mode. The field may be
   153		// changed at any time.
   154		Mode uint
   155	
   156		// The Whitespace field controls which characters are recognized
   157		// as white space. To recognize a character ch <= ' ' as white space,
   158		// set the ch'th bit in Whitespace (the Scanner's behavior is undefined
   159		// for values ch > ' '). The field may be changed at any time.
   160		Whitespace uint64
   161	
   162		// IsIdentRune is a predicate controlling the characters accepted
   163		// as the ith rune in an identifier. The set of valid characters
   164		// must not intersect with the set of white space characters.
   165		// If no IsIdentRune function is set, regular Go identifiers are
   166		// accepted instead. The field may be changed at any time.
   167		IsIdentRune func(ch rune, i int) bool
   168	
   169		// Start position of most recently scanned token; set by Scan.
   170		// Calling Init or Next invalidates the position (Line == 0).
   171		// The Filename field is always left untouched by the Scanner.
   172		// If an error is reported (via Error) and Position is invalid,
   173		// the scanner is not inside a token. Call Pos to obtain an error
   174		// position in that case, or to obtain the position immediately
   175		// after the most recently scanned token.
   176		Position
   177	}
   178	
   179	// Init initializes a Scanner with a new source and returns s.
   180	// Error is set to nil, ErrorCount is set to 0, Mode is set to GoTokens,
   181	// and Whitespace is set to GoWhitespace.
   182	func (s *Scanner) Init(src io.Reader) *Scanner {
   183		s.src = src
   184	
   185		// initialize source buffer
   186		// (the first call to next() will fill it by calling src.Read)
   187		s.srcBuf[0] = utf8.RuneSelf // sentinel
   188		s.srcPos = 0
   189		s.srcEnd = 0
   190	
   191		// initialize source position
   192		s.srcBufOffset = 0
   193		s.line = 1
   194		s.column = 0
   195		s.lastLineLen = 0
   196		s.lastCharLen = 0
   197	
   198		// initialize token text buffer
   199		// (required for first call to next()).
   200		s.tokPos = -1
   201	
   202		// initialize one character look-ahead
   203		s.ch = -2 // no char read yet, not EOF
   204	
   205		// initialize public fields
   206		s.Error = nil
   207		s.ErrorCount = 0
   208		s.Mode = GoTokens
   209		s.Whitespace = GoWhitespace
   210		s.Line = 0 // invalidate token position
   211	
   212		return s
   213	}
   214	
   215	// next reads and returns the next Unicode character. It is designed such
   216	// that only a minimal amount of work needs to be done in the common ASCII
   217	// case (one test to check for both ASCII and end-of-buffer, and one test
   218	// to check for newlines).
   219	func (s *Scanner) next() rune {
   220		ch, width := rune(s.srcBuf[s.srcPos]), 1
   221	
   222		if ch >= utf8.RuneSelf {
   223			// uncommon case: not ASCII or not enough bytes
   224			for s.srcPos+utf8.UTFMax > s.srcEnd && !utf8.FullRune(s.srcBuf[s.srcPos:s.srcEnd]) {
   225				// not enough bytes: read some more, but first
   226				// save away token text if any
   227				if s.tokPos >= 0 {
   228					s.tokBuf.Write(s.srcBuf[s.tokPos:s.srcPos])
   229					s.tokPos = 0
   230					// s.tokEnd is set by Scan()
   231				}
   232				// move unread bytes to beginning of buffer
   233				copy(s.srcBuf[0:], s.srcBuf[s.srcPos:s.srcEnd])
   234				s.srcBufOffset += s.srcPos
   235				// read more bytes
   236				// (an io.Reader must return io.EOF when it reaches
   237				// the end of what it is reading - simply returning
   238				// n == 0 will make this loop retry forever; but the
   239				// error is in the reader implementation in that case)
   240				i := s.srcEnd - s.srcPos
   241				n, err := s.src.Read(s.srcBuf[i:bufLen])
   242				s.srcPos = 0
   243				s.srcEnd = i + n
   244				s.srcBuf[s.srcEnd] = utf8.RuneSelf // sentinel
   245				if err != nil {
   246					if err != io.EOF {
   247						s.error(err.Error())
   248					}
   249					if s.srcEnd == 0 {
   250						if s.lastCharLen > 0 {
   251							// previous character was not EOF
   252							s.column++
   253						}
   254						s.lastCharLen = 0
   255						return EOF
   256					}
   257					// If err == EOF, we won't be getting more
   258					// bytes; break to avoid infinite loop. If
   259					// err is something else, we don't know if
   260					// we can get more bytes; thus also break.
   261					break
   262				}
   263			}
   264			// at least one byte
   265			ch = rune(s.srcBuf[s.srcPos])
   266			if ch >= utf8.RuneSelf {
   267				// uncommon case: not ASCII
   268				ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
   269				if ch == utf8.RuneError && width == 1 {
   270					// advance for correct error position
   271					s.srcPos += width
   272					s.lastCharLen = width
   273					s.column++
   274					s.error("invalid UTF-8 encoding")
   275					return ch
   276				}
   277			}
   278		}
   279	
   280		// advance
   281		s.srcPos += width
   282		s.lastCharLen = width
   283		s.column++
   284	
   285		// special situations
   286		switch ch {
   287		case 0:
   288			// for compatibility with other tools
   289			s.error("invalid character NUL")
   290		case '\n':
   291			s.line++
   292			s.lastLineLen = s.column
   293			s.column = 0
   294		}
   295	
   296		return ch
   297	}
   298	
   299	// Next reads and returns the next Unicode character.
   300	// It returns EOF at the end of the source. It reports
   301	// a read error by calling s.Error, if not nil; otherwise
   302	// it prints an error message to os.Stderr. Next does not
   303	// update the Scanner's Position field; use Pos() to
   304	// get the current position.
   305	func (s *Scanner) Next() rune {
   306		s.tokPos = -1 // don't collect token text
   307		s.Line = 0    // invalidate token position
   308		ch := s.Peek()
   309		if ch != EOF {
   310			s.ch = s.next()
   311		}
   312		return ch
   313	}
   314	
   315	// Peek returns the next Unicode character in the source without advancing
   316	// the scanner. It returns EOF if the scanner's position is at the last
   317	// character of the source.
   318	func (s *Scanner) Peek() rune {
   319		if s.ch == -2 {
   320			// this code is only run for the very first character
   321			s.ch = s.next()
   322			if s.ch == '\uFEFF' {
   323				s.ch = s.next() // ignore BOM
   324			}
   325		}
   326		return s.ch
   327	}
   328	
   329	func (s *Scanner) error(msg string) {
   330		s.tokEnd = s.srcPos - s.lastCharLen // make sure token text is terminated
   331		s.ErrorCount++
   332		if s.Error != nil {
   333			s.Error(s, msg)
   334			return
   335		}
   336		pos := s.Position
   337		if !pos.IsValid() {
   338			pos = s.Pos()
   339		}
   340		fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
   341	}
   342	
   343	func (s *Scanner) errorf(format string, args ...interface{}) {
   344		s.error(fmt.Sprintf(format, args...))
   345	}
   346	
   347	func (s *Scanner) isIdentRune(ch rune, i int) bool {
   348		if s.IsIdentRune != nil {
   349			return s.IsIdentRune(ch, i)
   350		}
   351		return ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) && i > 0
   352	}
   353	
   354	func (s *Scanner) scanIdentifier() rune {
   355		// we know the zero'th rune is OK; start scanning at the next one
   356		ch := s.next()
   357		for i := 1; s.isIdentRune(ch, i); i++ {
   358			ch = s.next()
   359		}
   360		return ch
   361	}
   362	
   363	func lower(ch rune) rune     { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
   364	func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
   365	func isHex(ch rune) bool     { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
   366	
   367	// digits accepts the sequence { digit | '_' } starting with ch0.
   368	// If base <= 10, digits accepts any decimal digit but records
   369	// the first invalid digit >= base in *invalid if *invalid == 0.
   370	// digits returns the first rune that is not part of the sequence
   371	// anymore, and a bitset describing whether the sequence contained
   372	// digits (bit 0 is set), or separators '_' (bit 1 is set).
   373	func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int) {
   374		ch = ch0
   375		if base <= 10 {
   376			max := rune('0' + base)
   377			for isDecimal(ch) || ch == '_' {
   378				ds := 1
   379				if ch == '_' {
   380					ds = 2
   381				} else if ch >= max && *invalid == 0 {
   382					*invalid = ch
   383				}
   384				digsep |= ds
   385				ch = s.next()
   386			}
   387		} else {
   388			for isHex(ch) || ch == '_' {
   389				ds := 1
   390				if ch == '_' {
   391					ds = 2
   392				}
   393				digsep |= ds
   394				ch = s.next()
   395			}
   396		}
   397		return
   398	}
   399	
   400	func (s *Scanner) scanNumber(ch rune, seenDot bool) (rune, rune) {
   401		base := 10         // number base
   402		prefix := rune(0)  // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
   403		digsep := 0        // bit 0: digit present, bit 1: '_' present
   404		invalid := rune(0) // invalid digit in literal, or 0
   405	
   406		// integer part
   407		var tok rune
   408		var ds int
   409		if !seenDot {
   410			tok = Int
   411			if ch == '0' {
   412				ch = s.next()
   413				switch lower(ch) {
   414				case 'x':
   415					ch = s.next()
   416					base, prefix = 16, 'x'
   417				case 'o':
   418					ch = s.next()
   419					base, prefix = 8, 'o'
   420				case 'b':
   421					ch = s.next()
   422					base, prefix = 2, 'b'
   423				default:
   424					base, prefix = 8, '0'
   425					digsep = 1 // leading 0
   426				}
   427			}
   428			ch, ds = s.digits(ch, base, &invalid)
   429			digsep |= ds
   430			if ch == '.' && s.Mode&ScanFloats != 0 {
   431				ch = s.next()
   432				seenDot = true
   433			}
   434		}
   435	
   436		// fractional part
   437		if seenDot {
   438			tok = Float
   439			if prefix == 'o' || prefix == 'b' {
   440				s.error("invalid radix point in " + litname(prefix))
   441			}
   442			ch, ds = s.digits(ch, base, &invalid)
   443			digsep |= ds
   444		}
   445	
   446		if digsep&1 == 0 {
   447			s.error(litname(prefix) + " has no digits")
   448		}
   449	
   450		// exponent
   451		if e := lower(ch); (e == 'e' || e == 'p') && s.Mode&ScanFloats != 0 {
   452			switch {
   453			case e == 'e' && prefix != 0 && prefix != '0':
   454				s.errorf("%q exponent requires decimal mantissa", ch)
   455			case e == 'p' && prefix != 'x':
   456				s.errorf("%q exponent requires hexadecimal mantissa", ch)
   457			}
   458			ch = s.next()
   459			tok = Float
   460			if ch == '+' || ch == '-' {
   461				ch = s.next()
   462			}
   463			ch, ds = s.digits(ch, 10, nil)
   464			digsep |= ds
   465			if ds&1 == 0 {
   466				s.error("exponent has no digits")
   467			}
   468		} else if prefix == 'x' && tok == Float {
   469			s.error("hexadecimal mantissa requires a 'p' exponent")
   470		}
   471	
   472		if tok == Int && invalid != 0 {
   473			s.errorf("invalid digit %q in %s", invalid, litname(prefix))
   474		}
   475	
   476		if digsep&2 != 0 {
   477			s.tokEnd = s.srcPos - s.lastCharLen // make sure token text is terminated
   478			if i := invalidSep(s.TokenText()); i >= 0 {
   479				s.error("'_' must separate successive digits")
   480			}
   481		}
   482	
   483		return tok, ch
   484	}
   485	
   486	func litname(prefix rune) string {
   487		switch prefix {
   488		default:
   489			return "decimal literal"
   490		case 'x':
   491			return "hexadecimal literal"
   492		case 'o', '0':
   493			return "octal literal"
   494		case 'b':
   495			return "binary literal"
   496		}
   497	}
   498	
   499	// invalidSep returns the index of the first invalid separator in x, or -1.
   500	func invalidSep(x string) int {
   501		x1 := ' ' // prefix char, we only care if it's 'x'
   502		d := '.'  // digit, one of '_', '0' (a digit), or '.' (anything else)
   503		i := 0
   504	
   505		// a prefix counts as a digit
   506		if len(x) >= 2 && x[0] == '0' {
   507			x1 = lower(rune(x[1]))
   508			if x1 == 'x' || x1 == 'o' || x1 == 'b' {
   509				d = '0'
   510				i = 2
   511			}
   512		}
   513	
   514		// mantissa and exponent
   515		for ; i < len(x); i++ {
   516			p := d // previous digit
   517			d = rune(x[i])
   518			switch {
   519			case d == '_':
   520				if p != '0' {
   521					return i
   522				}
   523			case isDecimal(d) || x1 == 'x' && isHex(d):
   524				d = '0'
   525			default:
   526				if p == '_' {
   527					return i - 1
   528				}
   529				d = '.'
   530			}
   531		}
   532		if d == '_' {
   533			return len(x) - 1
   534		}
   535	
   536		return -1
   537	}
   538	
   539	func digitVal(ch rune) int {
   540		switch {
   541		case '0' <= ch && ch <= '9':
   542			return int(ch - '0')
   543		case 'a' <= lower(ch) && lower(ch) <= 'f':
   544			return int(lower(ch) - 'a' + 10)
   545		}
   546		return 16 // larger than any legal digit val
   547	}
   548	
   549	func (s *Scanner) scanDigits(ch rune, base, n int) rune {
   550		for n > 0 && digitVal(ch) < base {
   551			ch = s.next()
   552			n--
   553		}
   554		if n > 0 {
   555			s.error("invalid char escape")
   556		}
   557		return ch
   558	}
   559	
   560	func (s *Scanner) scanEscape(quote rune) rune {
   561		ch := s.next() // read character after '/'
   562		switch ch {
   563		case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
   564			// nothing to do
   565			ch = s.next()
   566		case '0', '1', '2', '3', '4', '5', '6', '7':
   567			ch = s.scanDigits(ch, 8, 3)
   568		case 'x':
   569			ch = s.scanDigits(s.next(), 16, 2)
   570		case 'u':
   571			ch = s.scanDigits(s.next(), 16, 4)
   572		case 'U':
   573			ch = s.scanDigits(s.next(), 16, 8)
   574		default:
   575			s.error("invalid char escape")
   576		}
   577		return ch
   578	}
   579	
   580	func (s *Scanner) scanString(quote rune) (n int) {
   581		ch := s.next() // read character after quote
   582		for ch != quote {
   583			if ch == '\n' || ch < 0 {
   584				s.error("literal not terminated")
   585				return
   586			}
   587			if ch == '\\' {
   588				ch = s.scanEscape(quote)
   589			} else {
   590				ch = s.next()
   591			}
   592			n++
   593		}
   594		return
   595	}
   596	
   597	func (s *Scanner) scanRawString() {
   598		ch := s.next() // read character after '`'
   599		for ch != '`' {
   600			if ch < 0 {
   601				s.error("literal not terminated")
   602				return
   603			}
   604			ch = s.next()
   605		}
   606	}
   607	
   608	func (s *Scanner) scanChar() {
   609		if s.scanString('\'') != 1 {
   610			s.error("invalid char literal")
   611		}
   612	}
   613	
   614	func (s *Scanner) scanComment(ch rune) rune {
   615		// ch == '/' || ch == '*'
   616		if ch == '/' {
   617			// line comment
   618			ch = s.next() // read character after "//"
   619			for ch != '\n' && ch >= 0 {
   620				ch = s.next()
   621			}
   622			return ch
   623		}
   624	
   625		// general comment
   626		ch = s.next() // read character after "/*"
   627		for {
   628			if ch < 0 {
   629				s.error("comment not terminated")
   630				break
   631			}
   632			ch0 := ch
   633			ch = s.next()
   634			if ch0 == '*' && ch == '/' {
   635				ch = s.next()
   636				break
   637			}
   638		}
   639		return ch
   640	}
   641	
   642	// Scan reads the next token or Unicode character from source and returns it.
   643	// It only recognizes tokens t for which the respective Mode bit (1<<-t) is set.
   644	// It returns EOF at the end of the source. It reports scanner errors (read and
   645	// token errors) by calling s.Error, if not nil; otherwise it prints an error
   646	// message to os.Stderr.
   647	func (s *Scanner) Scan() rune {
   648		ch := s.Peek()
   649	
   650		// reset token text position
   651		s.tokPos = -1
   652		s.Line = 0
   653	
   654	redo:
   655		// skip white space
   656		for s.Whitespace&(1<<uint(ch)) != 0 {
   657			ch = s.next()
   658		}
   659	
   660		// start collecting token text
   661		s.tokBuf.Reset()
   662		s.tokPos = s.srcPos - s.lastCharLen
   663	
   664		// set token position
   665		// (this is a slightly optimized version of the code in Pos())
   666		s.Offset = s.srcBufOffset + s.tokPos
   667		if s.column > 0 {
   668			// common case: last character was not a '\n'
   669			s.Line = s.line
   670			s.Column = s.column
   671		} else {
   672			// last character was a '\n'
   673			// (we cannot be at the beginning of the source
   674			// since we have called next() at least once)
   675			s.Line = s.line - 1
   676			s.Column = s.lastLineLen
   677		}
   678	
   679		// determine token value
   680		tok := ch
   681		switch {
   682		case s.isIdentRune(ch, 0):
   683			if s.Mode&ScanIdents != 0 {
   684				tok = Ident
   685				ch = s.scanIdentifier()
   686			} else {
   687				ch = s.next()
   688			}
   689		case isDecimal(ch):
   690			if s.Mode&(ScanInts|ScanFloats) != 0 {
   691				tok, ch = s.scanNumber(ch, false)
   692			} else {
   693				ch = s.next()
   694			}
   695		default:
   696			switch ch {
   697			case EOF:
   698				break
   699			case '"':
   700				if s.Mode&ScanStrings != 0 {
   701					s.scanString('"')
   702					tok = String
   703				}
   704				ch = s.next()
   705			case '\'':
   706				if s.Mode&ScanChars != 0 {
   707					s.scanChar()
   708					tok = Char
   709				}
   710				ch = s.next()
   711			case '.':
   712				ch = s.next()
   713				if isDecimal(ch) && s.Mode&ScanFloats != 0 {
   714					tok, ch = s.scanNumber(ch, true)
   715				}
   716			case '/':
   717				ch = s.next()
   718				if (ch == '/' || ch == '*') && s.Mode&ScanComments != 0 {
   719					if s.Mode&SkipComments != 0 {
   720						s.tokPos = -1 // don't collect token text
   721						ch = s.scanComment(ch)
   722						goto redo
   723					}
   724					ch = s.scanComment(ch)
   725					tok = Comment
   726				}
   727			case '`':
   728				if s.Mode&ScanRawStrings != 0 {
   729					s.scanRawString()
   730					tok = RawString
   731				}
   732				ch = s.next()
   733			default:
   734				ch = s.next()
   735			}
   736		}
   737	
   738		// end of token text
   739		s.tokEnd = s.srcPos - s.lastCharLen
   740	
   741		s.ch = ch
   742		return tok
   743	}
   744	
   745	// Pos returns the position of the character immediately after
   746	// the character or token returned by the last call to Next or Scan.
   747	// Use the Scanner's Position field for the start position of the most
   748	// recently scanned token.
   749	func (s *Scanner) Pos() (pos Position) {
   750		pos.Filename = s.Filename
   751		pos.Offset = s.srcBufOffset + s.srcPos - s.lastCharLen
   752		switch {
   753		case s.column > 0:
   754			// common case: last character was not a '\n'
   755			pos.Line = s.line
   756			pos.Column = s.column
   757		case s.lastLineLen > 0:
   758			// last character was a '\n'
   759			pos.Line = s.line - 1
   760			pos.Column = s.lastLineLen
   761		default:
   762			// at the beginning of the source
   763			pos.Line = 1
   764			pos.Column = 1
   765		}
   766		return
   767	}
   768	
   769	// TokenText returns the string corresponding to the most recently scanned token.
   770	// Valid after calling Scan and in calls of Scanner.Error.
   771	func (s *Scanner) TokenText() string {
   772		if s.tokPos < 0 {
   773			// no token text
   774			return ""
   775		}
   776	
   777		if s.tokEnd < s.tokPos {
   778			// if EOF was reached, s.tokEnd is set to -1 (s.srcPos == 0)
   779			s.tokEnd = s.tokPos
   780		}
   781		// s.tokEnd >= s.tokPos
   782	
   783		if s.tokBuf.Len() == 0 {
   784			// common case: the entire token text is still in srcBuf
   785			return string(s.srcBuf[s.tokPos:s.tokEnd])
   786		}
   787	
   788		// part of the token text was saved in tokBuf: save the rest in
   789		// tokBuf as well and return its content
   790		s.tokBuf.Write(s.srcBuf[s.tokPos:s.tokEnd])
   791		s.tokPos = s.tokEnd // ensure idempotency of TokenText() call
   792		return s.tokBuf.String()
   793	}
   794
View as plain text