Source file src/encoding/json/scanner.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package json
     6	
     7	// JSON value parser state machine.
     8	// Just about at the limit of what is reasonable to write by hand.
     9	// Some parts are a bit tedious, but overall it nicely factors out the
    10	// otherwise common code from the multiple scanning functions
    11	// in this package (Compact, Indent, checkValid, etc).
    12	//
    13	// This file starts with two simple examples using the scanner
    14	// before diving into the scanner itself.
    15	
    16	import "strconv"
    17	
    18	// Valid reports whether data is a valid JSON encoding.
    19	func Valid(data []byte) bool {
    20		return checkValid(data, &scanner{}) == nil
    21	}
    22	
    23	// checkValid verifies that data is valid JSON-encoded data.
    24	// scan is passed in for use by checkValid to avoid an allocation.
    25	func checkValid(data []byte, scan *scanner) error {
    26		scan.reset()
    27		for _, c := range data {
    28			scan.bytes++
    29			if scan.step(scan, c) == scanError {
    30				return scan.err
    31			}
    32		}
    33		if scan.eof() == scanError {
    34			return scan.err
    35		}
    36		return nil
    37	}
    38	
    39	// A SyntaxError is a description of a JSON syntax error.
    40	type SyntaxError struct {
    41		msg    string // description of error
    42		Offset int64  // error occurred after reading Offset bytes
    43	}
    44	
    45	func (e *SyntaxError) Error() string { return e.msg }
    46	
    47	// A scanner is a JSON scanning state machine.
    48	// Callers call scan.reset() and then pass bytes in one at a time
    49	// by calling scan.step(&scan, c) for each byte.
    50	// The return value, referred to as an opcode, tells the
    51	// caller about significant parsing events like beginning
    52	// and ending literals, objects, and arrays, so that the
    53	// caller can follow along if it wishes.
    54	// The return value scanEnd indicates that a single top-level
    55	// JSON value has been completed, *before* the byte that
    56	// just got passed in.  (The indication must be delayed in order
    57	// to recognize the end of numbers: is 123 a whole value or
    58	// the beginning of 12345e+6?).
    59	type scanner struct {
    60		// The step is a func to be called to execute the next transition.
    61		// Also tried using an integer constant and a single func
    62		// with a switch, but using the func directly was 10% faster
    63		// on a 64-bit Mac Mini, and it's nicer to read.
    64		step func(*scanner, byte) int
    65	
    66		// Reached end of top-level value.
    67		endTop bool
    68	
    69		// Stack of what we're in the middle of - array values, object keys, object values.
    70		parseState []int
    71	
    72		// Error that happened, if any.
    73		err error
    74	
    75		// total bytes consumed, updated by decoder.Decode
    76		bytes int64
    77	}
    78	
    79	// These values are returned by the state transition functions
    80	// assigned to scanner.state and the method scanner.eof.
    81	// They give details about the current state of the scan that
    82	// callers might be interested to know about.
    83	// It is okay to ignore the return value of any particular
    84	// call to scanner.state: if one call returns scanError,
    85	// every subsequent call will return scanError too.
    86	const (
    87		// Continue.
    88		scanContinue     = iota // uninteresting byte
    89		scanBeginLiteral        // end implied by next result != scanContinue
    90		scanBeginObject         // begin object
    91		scanObjectKey           // just finished object key (string)
    92		scanObjectValue         // just finished non-last object value
    93		scanEndObject           // end object (implies scanObjectValue if possible)
    94		scanBeginArray          // begin array
    95		scanArrayValue          // just finished array value
    96		scanEndArray            // end array (implies scanArrayValue if possible)
    97		scanSkipSpace           // space byte; can skip; known to be last "continue" result
    98	
    99		// Stop.
   100		scanEnd   // top-level value ended *before* this byte; known to be first "stop" result
   101		scanError // hit an error, scanner.err.
   102	)
   103	
   104	// These values are stored in the parseState stack.
   105	// They give the current state of a composite value
   106	// being scanned. If the parser is inside a nested value
   107	// the parseState describes the nested state, outermost at entry 0.
   108	const (
   109		parseObjectKey   = iota // parsing object key (before colon)
   110		parseObjectValue        // parsing object value (after colon)
   111		parseArrayValue         // parsing array value
   112	)
   113	
   114	// reset prepares the scanner for use.
   115	// It must be called before calling s.step.
   116	func (s *scanner) reset() {
   117		s.step = stateBeginValue
   118		s.parseState = s.parseState[0:0]
   119		s.err = nil
   120		s.endTop = false
   121	}
   122	
   123	// eof tells the scanner that the end of input has been reached.
   124	// It returns a scan status just as s.step does.
   125	func (s *scanner) eof() int {
   126		if s.err != nil {
   127			return scanError
   128		}
   129		if s.endTop {
   130			return scanEnd
   131		}
   132		s.step(s, ' ')
   133		if s.endTop {
   134			return scanEnd
   135		}
   136		if s.err == nil {
   137			s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
   138		}
   139		return scanError
   140	}
   141	
   142	// pushParseState pushes a new parse state p onto the parse stack.
   143	func (s *scanner) pushParseState(p int) {
   144		s.parseState = append(s.parseState, p)
   145	}
   146	
   147	// popParseState pops a parse state (already obtained) off the stack
   148	// and updates s.step accordingly.
   149	func (s *scanner) popParseState() {
   150		n := len(s.parseState) - 1
   151		s.parseState = s.parseState[0:n]
   152		if n == 0 {
   153			s.step = stateEndTop
   154			s.endTop = true
   155		} else {
   156			s.step = stateEndValue
   157		}
   158	}
   159	
   160	func isSpace(c byte) bool {
   161		return c == ' ' || c == '\t' || c == '\r' || c == '\n'
   162	}
   163	
   164	// stateBeginValueOrEmpty is the state after reading `[`.
   165	func stateBeginValueOrEmpty(s *scanner, c byte) int {
   166		if c <= ' ' && isSpace(c) {
   167			return scanSkipSpace
   168		}
   169		if c == ']' {
   170			return stateEndValue(s, c)
   171		}
   172		return stateBeginValue(s, c)
   173	}
   174	
   175	// stateBeginValue is the state at the beginning of the input.
   176	func stateBeginValue(s *scanner, c byte) int {
   177		if c <= ' ' && isSpace(c) {
   178			return scanSkipSpace
   179		}
   180		switch c {
   181		case '{':
   182			s.step = stateBeginStringOrEmpty
   183			s.pushParseState(parseObjectKey)
   184			return scanBeginObject
   185		case '[':
   186			s.step = stateBeginValueOrEmpty
   187			s.pushParseState(parseArrayValue)
   188			return scanBeginArray
   189		case '"':
   190			s.step = stateInString
   191			return scanBeginLiteral
   192		case '-':
   193			s.step = stateNeg
   194			return scanBeginLiteral
   195		case '0': // beginning of 0.123
   196			s.step = state0
   197			return scanBeginLiteral
   198		case 't': // beginning of true
   199			s.step = stateT
   200			return scanBeginLiteral
   201		case 'f': // beginning of false
   202			s.step = stateF
   203			return scanBeginLiteral
   204		case 'n': // beginning of null
   205			s.step = stateN
   206			return scanBeginLiteral
   207		}
   208		if '1' <= c && c <= '9' { // beginning of 1234.5
   209			s.step = state1
   210			return scanBeginLiteral
   211		}
   212		return s.error(c, "looking for beginning of value")
   213	}
   214	
   215	// stateBeginStringOrEmpty is the state after reading `{`.
   216	func stateBeginStringOrEmpty(s *scanner, c byte) int {
   217		if c <= ' ' && isSpace(c) {
   218			return scanSkipSpace
   219		}
   220		if c == '}' {
   221			n := len(s.parseState)
   222			s.parseState[n-1] = parseObjectValue
   223			return stateEndValue(s, c)
   224		}
   225		return stateBeginString(s, c)
   226	}
   227	
   228	// stateBeginString is the state after reading `{"key": value,`.
   229	func stateBeginString(s *scanner, c byte) int {
   230		if c <= ' ' && isSpace(c) {
   231			return scanSkipSpace
   232		}
   233		if c == '"' {
   234			s.step = stateInString
   235			return scanBeginLiteral
   236		}
   237		return s.error(c, "looking for beginning of object key string")
   238	}
   239	
   240	// stateEndValue is the state after completing a value,
   241	// such as after reading `{}` or `true` or `["x"`.
   242	func stateEndValue(s *scanner, c byte) int {
   243		n := len(s.parseState)
   244		if n == 0 {
   245			// Completed top-level before the current byte.
   246			s.step = stateEndTop
   247			s.endTop = true
   248			return stateEndTop(s, c)
   249		}
   250		if c <= ' ' && isSpace(c) {
   251			s.step = stateEndValue
   252			return scanSkipSpace
   253		}
   254		ps := s.parseState[n-1]
   255		switch ps {
   256		case parseObjectKey:
   257			if c == ':' {
   258				s.parseState[n-1] = parseObjectValue
   259				s.step = stateBeginValue
   260				return scanObjectKey
   261			}
   262			return s.error(c, "after object key")
   263		case parseObjectValue:
   264			if c == ',' {
   265				s.parseState[n-1] = parseObjectKey
   266				s.step = stateBeginString
   267				return scanObjectValue
   268			}
   269			if c == '}' {
   270				s.popParseState()
   271				return scanEndObject
   272			}
   273			return s.error(c, "after object key:value pair")
   274		case parseArrayValue:
   275			if c == ',' {
   276				s.step = stateBeginValue
   277				return scanArrayValue
   278			}
   279			if c == ']' {
   280				s.popParseState()
   281				return scanEndArray
   282			}
   283			return s.error(c, "after array element")
   284		}
   285		return s.error(c, "")
   286	}
   287	
   288	// stateEndTop is the state after finishing the top-level value,
   289	// such as after reading `{}` or `[1,2,3]`.
   290	// Only space characters should be seen now.
   291	func stateEndTop(s *scanner, c byte) int {
   292		if !isSpace(c) {
   293			// Complain about non-space byte on next call.
   294			s.error(c, "after top-level value")
   295		}
   296		return scanEnd
   297	}
   298	
   299	// stateInString is the state after reading `"`.
   300	func stateInString(s *scanner, c byte) int {
   301		if c == '"' {
   302			s.step = stateEndValue
   303			return scanContinue
   304		}
   305		if c == '\\' {
   306			s.step = stateInStringEsc
   307			return scanContinue
   308		}
   309		if c < 0x20 {
   310			return s.error(c, "in string literal")
   311		}
   312		return scanContinue
   313	}
   314	
   315	// stateInStringEsc is the state after reading `"\` during a quoted string.
   316	func stateInStringEsc(s *scanner, c byte) int {
   317		switch c {
   318		case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
   319			s.step = stateInString
   320			return scanContinue
   321		case 'u':
   322			s.step = stateInStringEscU
   323			return scanContinue
   324		}
   325		return s.error(c, "in string escape code")
   326	}
   327	
   328	// stateInStringEscU is the state after reading `"\u` during a quoted string.
   329	func stateInStringEscU(s *scanner, c byte) int {
   330		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   331			s.step = stateInStringEscU1
   332			return scanContinue
   333		}
   334		// numbers
   335		return s.error(c, "in \\u hexadecimal character escape")
   336	}
   337	
   338	// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
   339	func stateInStringEscU1(s *scanner, c byte) int {
   340		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   341			s.step = stateInStringEscU12
   342			return scanContinue
   343		}
   344		// numbers
   345		return s.error(c, "in \\u hexadecimal character escape")
   346	}
   347	
   348	// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
   349	func stateInStringEscU12(s *scanner, c byte) int {
   350		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   351			s.step = stateInStringEscU123
   352			return scanContinue
   353		}
   354		// numbers
   355		return s.error(c, "in \\u hexadecimal character escape")
   356	}
   357	
   358	// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
   359	func stateInStringEscU123(s *scanner, c byte) int {
   360		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   361			s.step = stateInString
   362			return scanContinue
   363		}
   364		// numbers
   365		return s.error(c, "in \\u hexadecimal character escape")
   366	}
   367	
   368	// stateNeg is the state after reading `-` during a number.
   369	func stateNeg(s *scanner, c byte) int {
   370		if c == '0' {
   371			s.step = state0
   372			return scanContinue
   373		}
   374		if '1' <= c && c <= '9' {
   375			s.step = state1
   376			return scanContinue
   377		}
   378		return s.error(c, "in numeric literal")
   379	}
   380	
   381	// state1 is the state after reading a non-zero integer during a number,
   382	// such as after reading `1` or `100` but not `0`.
   383	func state1(s *scanner, c byte) int {
   384		if '0' <= c && c <= '9' {
   385			s.step = state1
   386			return scanContinue
   387		}
   388		return state0(s, c)
   389	}
   390	
   391	// state0 is the state after reading `0` during a number.
   392	func state0(s *scanner, c byte) int {
   393		if c == '.' {
   394			s.step = stateDot
   395			return scanContinue
   396		}
   397		if c == 'e' || c == 'E' {
   398			s.step = stateE
   399			return scanContinue
   400		}
   401		return stateEndValue(s, c)
   402	}
   403	
   404	// stateDot is the state after reading the integer and decimal point in a number,
   405	// such as after reading `1.`.
   406	func stateDot(s *scanner, c byte) int {
   407		if '0' <= c && c <= '9' {
   408			s.step = stateDot0
   409			return scanContinue
   410		}
   411		return s.error(c, "after decimal point in numeric literal")
   412	}
   413	
   414	// stateDot0 is the state after reading the integer, decimal point, and subsequent
   415	// digits of a number, such as after reading `3.14`.
   416	func stateDot0(s *scanner, c byte) int {
   417		if '0' <= c && c <= '9' {
   418			return scanContinue
   419		}
   420		if c == 'e' || c == 'E' {
   421			s.step = stateE
   422			return scanContinue
   423		}
   424		return stateEndValue(s, c)
   425	}
   426	
   427	// stateE is the state after reading the mantissa and e in a number,
   428	// such as after reading `314e` or `0.314e`.
   429	func stateE(s *scanner, c byte) int {
   430		if c == '+' || c == '-' {
   431			s.step = stateESign
   432			return scanContinue
   433		}
   434		return stateESign(s, c)
   435	}
   436	
   437	// stateESign is the state after reading the mantissa, e, and sign in a number,
   438	// such as after reading `314e-` or `0.314e+`.
   439	func stateESign(s *scanner, c byte) int {
   440		if '0' <= c && c <= '9' {
   441			s.step = stateE0
   442			return scanContinue
   443		}
   444		return s.error(c, "in exponent of numeric literal")
   445	}
   446	
   447	// stateE0 is the state after reading the mantissa, e, optional sign,
   448	// and at least one digit of the exponent in a number,
   449	// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
   450	func stateE0(s *scanner, c byte) int {
   451		if '0' <= c && c <= '9' {
   452			return scanContinue
   453		}
   454		return stateEndValue(s, c)
   455	}
   456	
   457	// stateT is the state after reading `t`.
   458	func stateT(s *scanner, c byte) int {
   459		if c == 'r' {
   460			s.step = stateTr
   461			return scanContinue
   462		}
   463		return s.error(c, "in literal true (expecting 'r')")
   464	}
   465	
   466	// stateTr is the state after reading `tr`.
   467	func stateTr(s *scanner, c byte) int {
   468		if c == 'u' {
   469			s.step = stateTru
   470			return scanContinue
   471		}
   472		return s.error(c, "in literal true (expecting 'u')")
   473	}
   474	
   475	// stateTru is the state after reading `tru`.
   476	func stateTru(s *scanner, c byte) int {
   477		if c == 'e' {
   478			s.step = stateEndValue
   479			return scanContinue
   480		}
   481		return s.error(c, "in literal true (expecting 'e')")
   482	}
   483	
   484	// stateF is the state after reading `f`.
   485	func stateF(s *scanner, c byte) int {
   486		if c == 'a' {
   487			s.step = stateFa
   488			return scanContinue
   489		}
   490		return s.error(c, "in literal false (expecting 'a')")
   491	}
   492	
   493	// stateFa is the state after reading `fa`.
   494	func stateFa(s *scanner, c byte) int {
   495		if c == 'l' {
   496			s.step = stateFal
   497			return scanContinue
   498		}
   499		return s.error(c, "in literal false (expecting 'l')")
   500	}
   501	
   502	// stateFal is the state after reading `fal`.
   503	func stateFal(s *scanner, c byte) int {
   504		if c == 's' {
   505			s.step = stateFals
   506			return scanContinue
   507		}
   508		return s.error(c, "in literal false (expecting 's')")
   509	}
   510	
   511	// stateFals is the state after reading `fals`.
   512	func stateFals(s *scanner, c byte) int {
   513		if c == 'e' {
   514			s.step = stateEndValue
   515			return scanContinue
   516		}
   517		return s.error(c, "in literal false (expecting 'e')")
   518	}
   519	
   520	// stateN is the state after reading `n`.
   521	func stateN(s *scanner, c byte) int {
   522		if c == 'u' {
   523			s.step = stateNu
   524			return scanContinue
   525		}
   526		return s.error(c, "in literal null (expecting 'u')")
   527	}
   528	
   529	// stateNu is the state after reading `nu`.
   530	func stateNu(s *scanner, c byte) int {
   531		if c == 'l' {
   532			s.step = stateNul
   533			return scanContinue
   534		}
   535		return s.error(c, "in literal null (expecting 'l')")
   536	}
   537	
   538	// stateNul is the state after reading `nul`.
   539	func stateNul(s *scanner, c byte) int {
   540		if c == 'l' {
   541			s.step = stateEndValue
   542			return scanContinue
   543		}
   544		return s.error(c, "in literal null (expecting 'l')")
   545	}
   546	
   547	// stateError is the state after reaching a syntax error,
   548	// such as after reading `[1}` or `5.1.2`.
   549	func stateError(s *scanner, c byte) int {
   550		return scanError
   551	}
   552	
   553	// error records an error and switches to the error state.
   554	func (s *scanner) error(c byte, context string) int {
   555		s.step = stateError
   556		s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
   557		return scanError
   558	}
   559	
   560	// quoteChar formats c as a quoted character literal
   561	func quoteChar(c byte) string {
   562		// special cases - different from quoted strings
   563		if c == '\'' {
   564			return `'\''`
   565		}
   566		if c == '"' {
   567			return `'"'`
   568		}
   569	
   570		// use quoted string with different quotation marks
   571		s := strconv.Quote(string(c))
   572		return "'" + s[1:len(s)-1] + "'"
   573	}
   574
View as plain text