Source file src/pkg/encoding/base64/base64.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package base64 implements base64 encoding as specified by RFC 4648.
     6	package base64
     7	
     8	import (
     9		"encoding/binary"
    10		"io"
    11		"strconv"
    12	)
    13	
    14	/*
    15	 * Encodings
    16	 */
    17	
    18	// An Encoding is a radix 64 encoding/decoding scheme, defined by a
    19	// 64-character alphabet. The most common encoding is the "base64"
    20	// encoding defined in RFC 4648 and used in MIME (RFC 2045) and PEM
    21	// (RFC 1421).  RFC 4648 also defines an alternate encoding, which is
    22	// the standard encoding with - and _ substituted for + and /.
    23	type Encoding struct {
    24		encode    [64]byte
    25		decodeMap [256]byte
    26		padChar   rune
    27		strict    bool
    28	}
    29	
    30	const (
    31		StdPadding rune = '=' // Standard padding character
    32		NoPadding  rune = -1  // No padding
    33	)
    34	
    35	const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
    36	const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
    37	
    38	// NewEncoding returns a new padded Encoding defined by the given alphabet,
    39	// which must be a 64-byte string that does not contain the padding character
    40	// or CR / LF ('\r', '\n').
    41	// The resulting Encoding uses the default padding character ('='),
    42	// which may be changed or disabled via WithPadding.
    43	func NewEncoding(encoder string) *Encoding {
    44		if len(encoder) != 64 {
    45			panic("encoding alphabet is not 64-bytes long")
    46		}
    47		for i := 0; i < len(encoder); i++ {
    48			if encoder[i] == '\n' || encoder[i] == '\r' {
    49				panic("encoding alphabet contains newline character")
    50			}
    51		}
    52	
    53		e := new(Encoding)
    54		e.padChar = StdPadding
    55		copy(e.encode[:], encoder)
    56	
    57		for i := 0; i < len(e.decodeMap); i++ {
    58			e.decodeMap[i] = 0xFF
    59		}
    60		for i := 0; i < len(encoder); i++ {
    61			e.decodeMap[encoder[i]] = byte(i)
    62		}
    63		return e
    64	}
    65	
    66	// WithPadding creates a new encoding identical to enc except
    67	// with a specified padding character, or NoPadding to disable padding.
    68	// The padding character must not be '\r' or '\n', must not
    69	// be contained in the encoding's alphabet and must be a rune equal or
    70	// below '\xff'.
    71	func (enc Encoding) WithPadding(padding rune) *Encoding {
    72		if padding == '\r' || padding == '\n' || padding > 0xff {
    73			panic("invalid padding")
    74		}
    75	
    76		for i := 0; i < len(enc.encode); i++ {
    77			if rune(enc.encode[i]) == padding {
    78				panic("padding contained in alphabet")
    79			}
    80		}
    81	
    82		enc.padChar = padding
    83		return &enc
    84	}
    85	
    86	// Strict creates a new encoding identical to enc except with
    87	// strict decoding enabled. In this mode, the decoder requires that
    88	// trailing padding bits are zero, as described in RFC 4648 section 3.5.
    89	func (enc Encoding) Strict() *Encoding {
    90		enc.strict = true
    91		return &enc
    92	}
    93	
    94	// StdEncoding is the standard base64 encoding, as defined in
    95	// RFC 4648.
    96	var StdEncoding = NewEncoding(encodeStd)
    97	
    98	// URLEncoding is the alternate base64 encoding defined in RFC 4648.
    99	// It is typically used in URLs and file names.
   100	var URLEncoding = NewEncoding(encodeURL)
   101	
   102	// RawStdEncoding is the standard raw, unpadded base64 encoding,
   103	// as defined in RFC 4648 section 3.2.
   104	// This is the same as StdEncoding but omits padding characters.
   105	var RawStdEncoding = StdEncoding.WithPadding(NoPadding)
   106	
   107	// RawURLEncoding is the unpadded alternate base64 encoding defined in RFC 4648.
   108	// It is typically used in URLs and file names.
   109	// This is the same as URLEncoding but omits padding characters.
   110	var RawURLEncoding = URLEncoding.WithPadding(NoPadding)
   111	
   112	/*
   113	 * Encoder
   114	 */
   115	
   116	// Encode encodes src using the encoding enc, writing
   117	// EncodedLen(len(src)) bytes to dst.
   118	//
   119	// The encoding pads the output to a multiple of 4 bytes,
   120	// so Encode is not appropriate for use on individual blocks
   121	// of a large data stream. Use NewEncoder() instead.
   122	func (enc *Encoding) Encode(dst, src []byte) {
   123		if len(src) == 0 {
   124			return
   125		}
   126		// enc is a pointer receiver, so the use of enc.encode within the hot
   127		// loop below means a nil check at every operation. Lift that nil check
   128		// outside of the loop to speed up the encoder.
   129		_ = enc.encode
   130	
   131		di, si := 0, 0
   132		n := (len(src) / 3) * 3
   133		for si < n {
   134			// Convert 3x 8bit source bytes into 4 bytes
   135			val := uint(src[si+0])<<16 | uint(src[si+1])<<8 | uint(src[si+2])
   136	
   137			dst[di+0] = enc.encode[val>>18&0x3F]
   138			dst[di+1] = enc.encode[val>>12&0x3F]
   139			dst[di+2] = enc.encode[val>>6&0x3F]
   140			dst[di+3] = enc.encode[val&0x3F]
   141	
   142			si += 3
   143			di += 4
   144		}
   145	
   146		remain := len(src) - si
   147		if remain == 0 {
   148			return
   149		}
   150		// Add the remaining small block
   151		val := uint(src[si+0]) << 16
   152		if remain == 2 {
   153			val |= uint(src[si+1]) << 8
   154		}
   155	
   156		dst[di+0] = enc.encode[val>>18&0x3F]
   157		dst[di+1] = enc.encode[val>>12&0x3F]
   158	
   159		switch remain {
   160		case 2:
   161			dst[di+2] = enc.encode[val>>6&0x3F]
   162			if enc.padChar != NoPadding {
   163				dst[di+3] = byte(enc.padChar)
   164			}
   165		case 1:
   166			if enc.padChar != NoPadding {
   167				dst[di+2] = byte(enc.padChar)
   168				dst[di+3] = byte(enc.padChar)
   169			}
   170		}
   171	}
   172	
   173	// EncodeToString returns the base64 encoding of src.
   174	func (enc *Encoding) EncodeToString(src []byte) string {
   175		buf := make([]byte, enc.EncodedLen(len(src)))
   176		enc.Encode(buf, src)
   177		return string(buf)
   178	}
   179	
   180	type encoder struct {
   181		err  error
   182		enc  *Encoding
   183		w    io.Writer
   184		buf  [3]byte    // buffered data waiting to be encoded
   185		nbuf int        // number of bytes in buf
   186		out  [1024]byte // output buffer
   187	}
   188	
   189	func (e *encoder) Write(p []byte) (n int, err error) {
   190		if e.err != nil {
   191			return 0, e.err
   192		}
   193	
   194		// Leading fringe.
   195		if e.nbuf > 0 {
   196			var i int
   197			for i = 0; i < len(p) && e.nbuf < 3; i++ {
   198				e.buf[e.nbuf] = p[i]
   199				e.nbuf++
   200			}
   201			n += i
   202			p = p[i:]
   203			if e.nbuf < 3 {
   204				return
   205			}
   206			e.enc.Encode(e.out[:], e.buf[:])
   207			if _, e.err = e.w.Write(e.out[:4]); e.err != nil {
   208				return n, e.err
   209			}
   210			e.nbuf = 0
   211		}
   212	
   213		// Large interior chunks.
   214		for len(p) >= 3 {
   215			nn := len(e.out) / 4 * 3
   216			if nn > len(p) {
   217				nn = len(p)
   218				nn -= nn % 3
   219			}
   220			e.enc.Encode(e.out[:], p[:nn])
   221			if _, e.err = e.w.Write(e.out[0 : nn/3*4]); e.err != nil {
   222				return n, e.err
   223			}
   224			n += nn
   225			p = p[nn:]
   226		}
   227	
   228		// Trailing fringe.
   229		for i := 0; i < len(p); i++ {
   230			e.buf[i] = p[i]
   231		}
   232		e.nbuf = len(p)
   233		n += len(p)
   234		return
   235	}
   236	
   237	// Close flushes any pending output from the encoder.
   238	// It is an error to call Write after calling Close.
   239	func (e *encoder) Close() error {
   240		// If there's anything left in the buffer, flush it out
   241		if e.err == nil && e.nbuf > 0 {
   242			e.enc.Encode(e.out[:], e.buf[:e.nbuf])
   243			_, e.err = e.w.Write(e.out[:e.enc.EncodedLen(e.nbuf)])
   244			e.nbuf = 0
   245		}
   246		return e.err
   247	}
   248	
   249	// NewEncoder returns a new base64 stream encoder. Data written to
   250	// the returned writer will be encoded using enc and then written to w.
   251	// Base64 encodings operate in 4-byte blocks; when finished
   252	// writing, the caller must Close the returned encoder to flush any
   253	// partially written blocks.
   254	func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser {
   255		return &encoder{enc: enc, w: w}
   256	}
   257	
   258	// EncodedLen returns the length in bytes of the base64 encoding
   259	// of an input buffer of length n.
   260	func (enc *Encoding) EncodedLen(n int) int {
   261		if enc.padChar == NoPadding {
   262			return (n*8 + 5) / 6 // minimum # chars at 6 bits per char
   263		}
   264		return (n + 2) / 3 * 4 // minimum # 4-char quanta, 3 bytes each
   265	}
   266	
   267	/*
   268	 * Decoder
   269	 */
   270	
   271	type CorruptInputError int64
   272	
   273	func (e CorruptInputError) Error() string {
   274		return "illegal base64 data at input byte " + strconv.FormatInt(int64(e), 10)
   275	}
   276	
   277	// decodeQuantum decodes up to 4 base64 bytes. The received parameters are
   278	// the destination buffer dst, the source buffer src and an index in the
   279	// source buffer si.
   280	// It returns the number of bytes read from src, the number of bytes written
   281	// to dst, and an error, if any.
   282	func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err error) {
   283		// Decode quantum using the base64 alphabet
   284		var dbuf [4]byte
   285		dlen := 4
   286	
   287		// Lift the nil check outside of the loop.
   288		_ = enc.decodeMap
   289	
   290		for j := 0; j < len(dbuf); j++ {
   291			if len(src) == si {
   292				switch {
   293				case j == 0:
   294					return si, 0, nil
   295				case j == 1, enc.padChar != NoPadding:
   296					return si, 0, CorruptInputError(si - j)
   297				}
   298				dlen = j
   299				break
   300			}
   301			in := src[si]
   302			si++
   303	
   304			out := enc.decodeMap[in]
   305			if out != 0xff {
   306				dbuf[j] = out
   307				continue
   308			}
   309	
   310			if in == '\n' || in == '\r' {
   311				j--
   312				continue
   313			}
   314	
   315			if rune(in) != enc.padChar {
   316				return si, 0, CorruptInputError(si - 1)
   317			}
   318	
   319			// We've reached the end and there's padding
   320			switch j {
   321			case 0, 1:
   322				// incorrect padding
   323				return si, 0, CorruptInputError(si - 1)
   324			case 2:
   325				// "==" is expected, the first "=" is already consumed.
   326				// skip over newlines
   327				for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
   328					si++
   329				}
   330				if si == len(src) {
   331					// not enough padding
   332					return si, 0, CorruptInputError(len(src))
   333				}
   334				if rune(src[si]) != enc.padChar {
   335					// incorrect padding
   336					return si, 0, CorruptInputError(si - 1)
   337				}
   338	
   339				si++
   340			}
   341	
   342			// skip over newlines
   343			for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
   344				si++
   345			}
   346			if si < len(src) {
   347				// trailing garbage
   348				err = CorruptInputError(si)
   349			}
   350			dlen = j
   351			break
   352		}
   353	
   354		// Convert 4x 6bit source bytes into 3 bytes
   355		val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
   356		dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16)
   357		switch dlen {
   358		case 4:
   359			dst[2] = dbuf[2]
   360			dbuf[2] = 0
   361			fallthrough
   362		case 3:
   363			dst[1] = dbuf[1]
   364			if enc.strict && dbuf[2] != 0 {
   365				return si, 0, CorruptInputError(si - 1)
   366			}
   367			dbuf[1] = 0
   368			fallthrough
   369		case 2:
   370			dst[0] = dbuf[0]
   371			if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) {
   372				return si, 0, CorruptInputError(si - 2)
   373			}
   374		}
   375	
   376		return si, dlen - 1, err
   377	}
   378	
   379	// DecodeString returns the bytes represented by the base64 string s.
   380	func (enc *Encoding) DecodeString(s string) ([]byte, error) {
   381		dbuf := make([]byte, enc.DecodedLen(len(s)))
   382		n, err := enc.Decode(dbuf, []byte(s))
   383		return dbuf[:n], err
   384	}
   385	
   386	type decoder struct {
   387		err     error
   388		readErr error // error from r.Read
   389		enc     *Encoding
   390		r       io.Reader
   391		buf     [1024]byte // leftover input
   392		nbuf    int
   393		out     []byte // leftover decoded output
   394		outbuf  [1024 / 4 * 3]byte
   395	}
   396	
   397	func (d *decoder) Read(p []byte) (n int, err error) {
   398		// Use leftover decoded output from last read.
   399		if len(d.out) > 0 {
   400			n = copy(p, d.out)
   401			d.out = d.out[n:]
   402			return n, nil
   403		}
   404	
   405		if d.err != nil {
   406			return 0, d.err
   407		}
   408	
   409		// This code assumes that d.r strips supported whitespace ('\r' and '\n').
   410	
   411		// Refill buffer.
   412		for d.nbuf < 4 && d.readErr == nil {
   413			nn := len(p) / 3 * 4
   414			if nn < 4 {
   415				nn = 4
   416			}
   417			if nn > len(d.buf) {
   418				nn = len(d.buf)
   419			}
   420			nn, d.readErr = d.r.Read(d.buf[d.nbuf:nn])
   421			d.nbuf += nn
   422		}
   423	
   424		if d.nbuf < 4 {
   425			if d.enc.padChar == NoPadding && d.nbuf > 0 {
   426				// Decode final fragment, without padding.
   427				var nw int
   428				nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:d.nbuf])
   429				d.nbuf = 0
   430				d.out = d.outbuf[:nw]
   431				n = copy(p, d.out)
   432				d.out = d.out[n:]
   433				if n > 0 || len(p) == 0 && len(d.out) > 0 {
   434					return n, nil
   435				}
   436				if d.err != nil {
   437					return 0, d.err
   438				}
   439			}
   440			d.err = d.readErr
   441			if d.err == io.EOF && d.nbuf > 0 {
   442				d.err = io.ErrUnexpectedEOF
   443			}
   444			return 0, d.err
   445		}
   446	
   447		// Decode chunk into p, or d.out and then p if p is too small.
   448		nr := d.nbuf / 4 * 4
   449		nw := d.nbuf / 4 * 3
   450		if nw > len(p) {
   451			nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:nr])
   452			d.out = d.outbuf[:nw]
   453			n = copy(p, d.out)
   454			d.out = d.out[n:]
   455		} else {
   456			n, d.err = d.enc.Decode(p, d.buf[:nr])
   457		}
   458		d.nbuf -= nr
   459		copy(d.buf[:d.nbuf], d.buf[nr:])
   460		return n, d.err
   461	}
   462	
   463	// Decode decodes src using the encoding enc. It writes at most
   464	// DecodedLen(len(src)) bytes to dst and returns the number of bytes
   465	// written. If src contains invalid base64 data, it will return the
   466	// number of bytes successfully written and CorruptInputError.
   467	// New line characters (\r and \n) are ignored.
   468	func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
   469		if len(src) == 0 {
   470			return 0, nil
   471		}
   472	
   473		// Lift the nil check outside of the loop. enc.decodeMap is directly
   474		// used later in this function, to let the compiler know that the
   475		// receiver can't be nil.
   476		_ = enc.decodeMap
   477	
   478		si := 0
   479		for strconv.IntSize >= 64 && len(src)-si >= 8 && len(dst)-n >= 8 {
   480			if dn, ok := assemble64(
   481				enc.decodeMap[src[si+0]],
   482				enc.decodeMap[src[si+1]],
   483				enc.decodeMap[src[si+2]],
   484				enc.decodeMap[src[si+3]],
   485				enc.decodeMap[src[si+4]],
   486				enc.decodeMap[src[si+5]],
   487				enc.decodeMap[src[si+6]],
   488				enc.decodeMap[src[si+7]],
   489			); ok {
   490				binary.BigEndian.PutUint64(dst[n:], dn)
   491				n += 6
   492				si += 8
   493			} else {
   494				var ninc int
   495				si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
   496				n += ninc
   497				if err != nil {
   498					return n, err
   499				}
   500			}
   501		}
   502	
   503		for len(src)-si >= 4 && len(dst)-n >= 4 {
   504			if dn, ok := assemble32(
   505				enc.decodeMap[src[si+0]],
   506				enc.decodeMap[src[si+1]],
   507				enc.decodeMap[src[si+2]],
   508				enc.decodeMap[src[si+3]],
   509			); ok {
   510				binary.BigEndian.PutUint32(dst[n:], dn)
   511				n += 3
   512				si += 4
   513			} else {
   514				var ninc int
   515				si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
   516				n += ninc
   517				if err != nil {
   518					return n, err
   519				}
   520			}
   521		}
   522	
   523		for si < len(src) {
   524			var ninc int
   525			si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
   526			n += ninc
   527			if err != nil {
   528				return n, err
   529			}
   530		}
   531		return n, err
   532	}
   533	
   534	// assemble32 assembles 4 base64 digits into 3 bytes.
   535	// Each digit comes from the decode map, and will be 0xff
   536	// if it came from an invalid character.
   537	func assemble32(n1, n2, n3, n4 byte) (dn uint32, ok bool) {
   538		// Check that all the digits are valid. If any of them was 0xff, their
   539		// bitwise OR will be 0xff.
   540		if n1|n2|n3|n4 == 0xff {
   541			return 0, false
   542		}
   543		return uint32(n1)<<26 |
   544				uint32(n2)<<20 |
   545				uint32(n3)<<14 |
   546				uint32(n4)<<8,
   547			true
   548	}
   549	
   550	// assemble64 assembles 8 base64 digits into 6 bytes.
   551	// Each digit comes from the decode map, and will be 0xff
   552	// if it came from an invalid character.
   553	func assemble64(n1, n2, n3, n4, n5, n6, n7, n8 byte) (dn uint64, ok bool) {
   554		// Check that all the digits are valid. If any of them was 0xff, their
   555		// bitwise OR will be 0xff.
   556		if n1|n2|n3|n4|n5|n6|n7|n8 == 0xff {
   557			return 0, false
   558		}
   559		return uint64(n1)<<58 |
   560				uint64(n2)<<52 |
   561				uint64(n3)<<46 |
   562				uint64(n4)<<40 |
   563				uint64(n5)<<34 |
   564				uint64(n6)<<28 |
   565				uint64(n7)<<22 |
   566				uint64(n8)<<16,
   567			true
   568	}
   569	
   570	type newlineFilteringReader struct {
   571		wrapped io.Reader
   572	}
   573	
   574	func (r *newlineFilteringReader) Read(p []byte) (int, error) {
   575		n, err := r.wrapped.Read(p)
   576		for n > 0 {
   577			offset := 0
   578			for i, b := range p[:n] {
   579				if b != '\r' && b != '\n' {
   580					if i != offset {
   581						p[offset] = b
   582					}
   583					offset++
   584				}
   585			}
   586			if offset > 0 {
   587				return offset, err
   588			}
   589			// Previous buffer entirely whitespace, read again
   590			n, err = r.wrapped.Read(p)
   591		}
   592		return n, err
   593	}
   594	
   595	// NewDecoder constructs a new base64 stream decoder.
   596	func NewDecoder(enc *Encoding, r io.Reader) io.Reader {
   597		return &decoder{enc: enc, r: &newlineFilteringReader{r}}
   598	}
   599	
   600	// DecodedLen returns the maximum length in bytes of the decoded data
   601	// corresponding to n bytes of base64-encoded data.
   602	func (enc *Encoding) DecodedLen(n int) int {
   603		if enc.padChar == NoPadding {
   604			// Unpadded data may end with partial block of 2-3 characters.
   605			return n * 6 / 8
   606		}
   607		// Padded base64 should always be a multiple of 4 characters in length.
   608		return n / 4 * 3
   609	}
   610
View as plain text