...

Source file src/encoding/json/stream.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package json
     6	
     7	import (
     8		"bytes"
     9		"errors"
    10		"io"
    11	)
    12	
    13	// A Decoder reads and decodes JSON values from an input stream.
    14	type Decoder struct {
    15		r       io.Reader
    16		buf     []byte
    17		d       decodeState
    18		scanp   int   // start of unread data in buf
    19		scanned int64 // amount of data already scanned
    20		scan    scanner
    21		err     error
    22	
    23		tokenState int
    24		tokenStack []int
    25	}
    26	
    27	// NewDecoder returns a new decoder that reads from r.
    28	//
    29	// The decoder introduces its own buffering and may
    30	// read data from r beyond the JSON values requested.
    31	func NewDecoder(r io.Reader) *Decoder {
    32		return &Decoder{r: r}
    33	}
    34	
    35	// UseNumber causes the Decoder to unmarshal a number into an interface{} as a
    36	// Number instead of as a float64.
    37	func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
    38	
    39	// DisallowUnknownFields causes the Decoder to return an error when the destination
    40	// is a struct and the input contains object keys which do not match any
    41	// non-ignored, exported fields in the destination.
    42	func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
    43	
    44	// Decode reads the next JSON-encoded value from its
    45	// input and stores it in the value pointed to by v.
    46	//
    47	// See the documentation for Unmarshal for details about
    48	// the conversion of JSON into a Go value.
    49	func (dec *Decoder) Decode(v interface{}) error {
    50		if dec.err != nil {
    51			return dec.err
    52		}
    53	
    54		if err := dec.tokenPrepareForDecode(); err != nil {
    55			return err
    56		}
    57	
    58		if !dec.tokenValueAllowed() {
    59			return &SyntaxError{msg: "not at beginning of value", Offset: dec.offset()}
    60		}
    61	
    62		// Read whole value into buffer.
    63		n, err := dec.readValue()
    64		if err != nil {
    65			return err
    66		}
    67		dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
    68		dec.scanp += n
    69	
    70		// Don't save err from unmarshal into dec.err:
    71		// the connection is still usable since we read a complete JSON
    72		// object from it before the error happened.
    73		err = dec.d.unmarshal(v)
    74	
    75		// fixup token streaming state
    76		dec.tokenValueEnd()
    77	
    78		return err
    79	}
    80	
    81	// Buffered returns a reader of the data remaining in the Decoder's
    82	// buffer. The reader is valid until the next call to Decode.
    83	func (dec *Decoder) Buffered() io.Reader {
    84		return bytes.NewReader(dec.buf[dec.scanp:])
    85	}
    86	
    87	// readValue reads a JSON value into dec.buf.
    88	// It returns the length of the encoding.
    89	func (dec *Decoder) readValue() (int, error) {
    90		dec.scan.reset()
    91	
    92		scanp := dec.scanp
    93		var err error
    94	Input:
    95		// help the compiler see that scanp is never negative, so it can remove
    96		// some bounds checks below.
    97		for scanp >= 0 {
    98	
    99			// Look in the buffer for a new value.
   100			for ; scanp < len(dec.buf); scanp++ {
   101				c := dec.buf[scanp]
   102				dec.scan.bytes++
   103				switch dec.scan.step(&dec.scan, c) {
   104				case scanEnd:
   105					break Input
   106				case scanEndObject, scanEndArray:
   107					// scanEnd is delayed one byte.
   108					// We might block trying to get that byte from src,
   109					// so instead invent a space byte.
   110					if stateEndValue(&dec.scan, ' ') == scanEnd {
   111						scanp++
   112						break Input
   113					}
   114				case scanError:
   115					dec.err = dec.scan.err
   116					return 0, dec.scan.err
   117				}
   118			}
   119	
   120			// Did the last read have an error?
   121			// Delayed until now to allow buffer scan.
   122			if err != nil {
   123				if err == io.EOF {
   124					if dec.scan.step(&dec.scan, ' ') == scanEnd {
   125						break Input
   126					}
   127					if nonSpace(dec.buf) {
   128						err = io.ErrUnexpectedEOF
   129					}
   130				}
   131				dec.err = err
   132				return 0, err
   133			}
   134	
   135			n := scanp - dec.scanp
   136			err = dec.refill()
   137			scanp = dec.scanp + n
   138		}
   139		return scanp - dec.scanp, nil
   140	}
   141	
   142	func (dec *Decoder) refill() error {
   143		// Make room to read more into the buffer.
   144		// First slide down data already consumed.
   145		if dec.scanp > 0 {
   146			dec.scanned += int64(dec.scanp)
   147			n := copy(dec.buf, dec.buf[dec.scanp:])
   148			dec.buf = dec.buf[:n]
   149			dec.scanp = 0
   150		}
   151	
   152		// Grow buffer if not large enough.
   153		const minRead = 512
   154		if cap(dec.buf)-len(dec.buf) < minRead {
   155			newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
   156			copy(newBuf, dec.buf)
   157			dec.buf = newBuf
   158		}
   159	
   160		// Read. Delay error for next iteration (after scan).
   161		n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
   162		dec.buf = dec.buf[0 : len(dec.buf)+n]
   163	
   164		return err
   165	}
   166	
   167	func nonSpace(b []byte) bool {
   168		for _, c := range b {
   169			if !isSpace(c) {
   170				return true
   171			}
   172		}
   173		return false
   174	}
   175	
   176	// An Encoder writes JSON values to an output stream.
   177	type Encoder struct {
   178		w          io.Writer
   179		err        error
   180		escapeHTML bool
   181	
   182		indentBuf    *bytes.Buffer
   183		indentPrefix string
   184		indentValue  string
   185	}
   186	
   187	// NewEncoder returns a new encoder that writes to w.
   188	func NewEncoder(w io.Writer) *Encoder {
   189		return &Encoder{w: w, escapeHTML: true}
   190	}
   191	
   192	// Encode writes the JSON encoding of v to the stream,
   193	// followed by a newline character.
   194	//
   195	// See the documentation for Marshal for details about the
   196	// conversion of Go values to JSON.
   197	func (enc *Encoder) Encode(v interface{}) error {
   198		if enc.err != nil {
   199			return enc.err
   200		}
   201		e := newEncodeState()
   202		err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
   203		if err != nil {
   204			return err
   205		}
   206	
   207		// Terminate each value with a newline.
   208		// This makes the output look a little nicer
   209		// when debugging, and some kind of space
   210		// is required if the encoded value was a number,
   211		// so that the reader knows there aren't more
   212		// digits coming.
   213		e.WriteByte('\n')
   214	
   215		b := e.Bytes()
   216		if enc.indentPrefix != "" || enc.indentValue != "" {
   217			if enc.indentBuf == nil {
   218				enc.indentBuf = new(bytes.Buffer)
   219			}
   220			enc.indentBuf.Reset()
   221			err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
   222			if err != nil {
   223				return err
   224			}
   225			b = enc.indentBuf.Bytes()
   226		}
   227		if _, err = enc.w.Write(b); err != nil {
   228			enc.err = err
   229		}
   230		encodeStatePool.Put(e)
   231		return err
   232	}
   233	
   234	// SetIndent instructs the encoder to format each subsequent encoded
   235	// value as if indented by the package-level function Indent(dst, src, prefix, indent).
   236	// Calling SetIndent("", "") disables indentation.
   237	func (enc *Encoder) SetIndent(prefix, indent string) {
   238		enc.indentPrefix = prefix
   239		enc.indentValue = indent
   240	}
   241	
   242	// SetEscapeHTML specifies whether problematic HTML characters
   243	// should be escaped inside JSON quoted strings.
   244	// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
   245	// to avoid certain safety problems that can arise when embedding JSON in HTML.
   246	//
   247	// In non-HTML settings where the escaping interferes with the readability
   248	// of the output, SetEscapeHTML(false) disables this behavior.
   249	func (enc *Encoder) SetEscapeHTML(on bool) {
   250		enc.escapeHTML = on
   251	}
   252	
   253	// RawMessage is a raw encoded JSON value.
   254	// It implements Marshaler and Unmarshaler and can
   255	// be used to delay JSON decoding or precompute a JSON encoding.
   256	type RawMessage []byte
   257	
   258	// MarshalJSON returns m as the JSON encoding of m.
   259	func (m RawMessage) MarshalJSON() ([]byte, error) {
   260		if m == nil {
   261			return []byte("null"), nil
   262		}
   263		return m, nil
   264	}
   265	
   266	// UnmarshalJSON sets *m to a copy of data.
   267	func (m *RawMessage) UnmarshalJSON(data []byte) error {
   268		if m == nil {
   269			return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
   270		}
   271		*m = append((*m)[0:0], data...)
   272		return nil
   273	}
   274	
   275	var _ Marshaler = (*RawMessage)(nil)
   276	var _ Unmarshaler = (*RawMessage)(nil)
   277	
   278	// A Token holds a value of one of these types:
   279	//
   280	//	Delim, for the four JSON delimiters [ ] { }
   281	//	bool, for JSON booleans
   282	//	float64, for JSON numbers
   283	//	Number, for JSON numbers
   284	//	string, for JSON string literals
   285	//	nil, for JSON null
   286	//
   287	type Token interface{}
   288	
   289	const (
   290		tokenTopValue = iota
   291		tokenArrayStart
   292		tokenArrayValue
   293		tokenArrayComma
   294		tokenObjectStart
   295		tokenObjectKey
   296		tokenObjectColon
   297		tokenObjectValue
   298		tokenObjectComma
   299	)
   300	
   301	// advance tokenstate from a separator state to a value state
   302	func (dec *Decoder) tokenPrepareForDecode() error {
   303		// Note: Not calling peek before switch, to avoid
   304		// putting peek into the standard Decode path.
   305		// peek is only called when using the Token API.
   306		switch dec.tokenState {
   307		case tokenArrayComma:
   308			c, err := dec.peek()
   309			if err != nil {
   310				return err
   311			}
   312			if c != ',' {
   313				return &SyntaxError{"expected comma after array element", dec.offset()}
   314			}
   315			dec.scanp++
   316			dec.tokenState = tokenArrayValue
   317		case tokenObjectColon:
   318			c, err := dec.peek()
   319			if err != nil {
   320				return err
   321			}
   322			if c != ':' {
   323				return &SyntaxError{"expected colon after object key", dec.offset()}
   324			}
   325			dec.scanp++
   326			dec.tokenState = tokenObjectValue
   327		}
   328		return nil
   329	}
   330	
   331	func (dec *Decoder) tokenValueAllowed() bool {
   332		switch dec.tokenState {
   333		case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
   334			return true
   335		}
   336		return false
   337	}
   338	
   339	func (dec *Decoder) tokenValueEnd() {
   340		switch dec.tokenState {
   341		case tokenArrayStart, tokenArrayValue:
   342			dec.tokenState = tokenArrayComma
   343		case tokenObjectValue:
   344			dec.tokenState = tokenObjectComma
   345		}
   346	}
   347	
   348	// A Delim is a JSON array or object delimiter, one of [ ] { or }.
   349	type Delim rune
   350	
   351	func (d Delim) String() string {
   352		return string(d)
   353	}
   354	
   355	// Token returns the next JSON token in the input stream.
   356	// At the end of the input stream, Token returns nil, io.EOF.
   357	//
   358	// Token guarantees that the delimiters [ ] { } it returns are
   359	// properly nested and matched: if Token encounters an unexpected
   360	// delimiter in the input, it will return an error.
   361	//
   362	// The input stream consists of basic JSON values—bool, string,
   363	// number, and null—along with delimiters [ ] { } of type Delim
   364	// to mark the start and end of arrays and objects.
   365	// Commas and colons are elided.
   366	func (dec *Decoder) Token() (Token, error) {
   367		for {
   368			c, err := dec.peek()
   369			if err != nil {
   370				return nil, err
   371			}
   372			switch c {
   373			case '[':
   374				if !dec.tokenValueAllowed() {
   375					return dec.tokenError(c)
   376				}
   377				dec.scanp++
   378				dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   379				dec.tokenState = tokenArrayStart
   380				return Delim('['), nil
   381	
   382			case ']':
   383				if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
   384					return dec.tokenError(c)
   385				}
   386				dec.scanp++
   387				dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   388				dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   389				dec.tokenValueEnd()
   390				return Delim(']'), nil
   391	
   392			case '{':
   393				if !dec.tokenValueAllowed() {
   394					return dec.tokenError(c)
   395				}
   396				dec.scanp++
   397				dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   398				dec.tokenState = tokenObjectStart
   399				return Delim('{'), nil
   400	
   401			case '}':
   402				if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
   403					return dec.tokenError(c)
   404				}
   405				dec.scanp++
   406				dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   407				dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   408				dec.tokenValueEnd()
   409				return Delim('}'), nil
   410	
   411			case ':':
   412				if dec.tokenState != tokenObjectColon {
   413					return dec.tokenError(c)
   414				}
   415				dec.scanp++
   416				dec.tokenState = tokenObjectValue
   417				continue
   418	
   419			case ',':
   420				if dec.tokenState == tokenArrayComma {
   421					dec.scanp++
   422					dec.tokenState = tokenArrayValue
   423					continue
   424				}
   425				if dec.tokenState == tokenObjectComma {
   426					dec.scanp++
   427					dec.tokenState = tokenObjectKey
   428					continue
   429				}
   430				return dec.tokenError(c)
   431	
   432			case '"':
   433				if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
   434					var x string
   435					old := dec.tokenState
   436					dec.tokenState = tokenTopValue
   437					err := dec.Decode(&x)
   438					dec.tokenState = old
   439					if err != nil {
   440						return nil, err
   441					}
   442					dec.tokenState = tokenObjectColon
   443					return x, nil
   444				}
   445				fallthrough
   446	
   447			default:
   448				if !dec.tokenValueAllowed() {
   449					return dec.tokenError(c)
   450				}
   451				var x interface{}
   452				if err := dec.Decode(&x); err != nil {
   453					return nil, err
   454				}
   455				return x, nil
   456			}
   457		}
   458	}
   459	
   460	func (dec *Decoder) tokenError(c byte) (Token, error) {
   461		var context string
   462		switch dec.tokenState {
   463		case tokenTopValue:
   464			context = " looking for beginning of value"
   465		case tokenArrayStart, tokenArrayValue, tokenObjectValue:
   466			context = " looking for beginning of value"
   467		case tokenArrayComma:
   468			context = " after array element"
   469		case tokenObjectKey:
   470			context = " looking for beginning of object key string"
   471		case tokenObjectColon:
   472			context = " after object key"
   473		case tokenObjectComma:
   474			context = " after object key:value pair"
   475		}
   476		return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.offset()}
   477	}
   478	
   479	// More reports whether there is another element in the
   480	// current array or object being parsed.
   481	func (dec *Decoder) More() bool {
   482		c, err := dec.peek()
   483		return err == nil && c != ']' && c != '}'
   484	}
   485	
   486	func (dec *Decoder) peek() (byte, error) {
   487		var err error
   488		for {
   489			for i := dec.scanp; i < len(dec.buf); i++ {
   490				c := dec.buf[i]
   491				if isSpace(c) {
   492					continue
   493				}
   494				dec.scanp = i
   495				return c, nil
   496			}
   497			// buffer has been scanned, now report any error
   498			if err != nil {
   499				return 0, err
   500			}
   501			err = dec.refill()
   502		}
   503	}
   504	
   505	func (dec *Decoder) offset() int64 {
   506		return dec.scanned + int64(dec.scanp)
   507	}
   508	

View as plain text