...

Source file src/pkg/mime/quotedprintable/reader.go

     1	// Copyright 2012 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package quotedprintable implements quoted-printable encoding as specified by
     6	// RFC 2045.
     7	package quotedprintable
     8	
     9	import (
    10		"bufio"
    11		"bytes"
    12		"fmt"
    13		"io"
    14	)
    15	
    16	// Reader is a quoted-printable decoder.
    17	type Reader struct {
    18		br   *bufio.Reader
    19		rerr error  // last read error
    20		line []byte // to be consumed before more of br
    21	}
    22	
    23	// NewReader returns a quoted-printable reader, decoding from r.
    24	func NewReader(r io.Reader) *Reader {
    25		return &Reader{
    26			br: bufio.NewReader(r),
    27		}
    28	}
    29	
    30	func fromHex(b byte) (byte, error) {
    31		switch {
    32		case b >= '0' && b <= '9':
    33			return b - '0', nil
    34		case b >= 'A' && b <= 'F':
    35			return b - 'A' + 10, nil
    36		// Accept badly encoded bytes.
    37		case b >= 'a' && b <= 'f':
    38			return b - 'a' + 10, nil
    39		}
    40		return 0, fmt.Errorf("quotedprintable: invalid hex byte 0x%02x", b)
    41	}
    42	
    43	func readHexByte(v []byte) (b byte, err error) {
    44		if len(v) < 2 {
    45			return 0, io.ErrUnexpectedEOF
    46		}
    47		var hb, lb byte
    48		if hb, err = fromHex(v[0]); err != nil {
    49			return 0, err
    50		}
    51		if lb, err = fromHex(v[1]); err != nil {
    52			return 0, err
    53		}
    54		return hb<<4 | lb, nil
    55	}
    56	
    57	func isQPDiscardWhitespace(r rune) bool {
    58		switch r {
    59		case '\n', '\r', ' ', '\t':
    60			return true
    61		}
    62		return false
    63	}
    64	
    65	var (
    66		crlf       = []byte("\r\n")
    67		lf         = []byte("\n")
    68		softSuffix = []byte("=")
    69	)
    70	
    71	// Read reads and decodes quoted-printable data from the underlying reader.
    72	func (r *Reader) Read(p []byte) (n int, err error) {
    73		// Deviations from RFC 2045:
    74		// 1. in addition to "=\r\n", "=\n" is also treated as soft line break.
    75		// 2. it will pass through a '\r' or '\n' not preceded by '=', consistent
    76		//    with other broken QP encoders & decoders.
    77		// 3. it accepts soft line-break (=) at end of message (issue 15486); i.e.
    78		//    the final byte read from the underlying reader is allowed to be '=',
    79		//    and it will be silently ignored.
    80		// 4. it takes = as literal = if not followed by two hex digits
    81		//    but not at end of line (issue 13219).
    82		for len(p) > 0 {
    83			if len(r.line) == 0 {
    84				if r.rerr != nil {
    85					return n, r.rerr
    86				}
    87				r.line, r.rerr = r.br.ReadSlice('\n')
    88	
    89				// Does the line end in CRLF instead of just LF?
    90				hasLF := bytes.HasSuffix(r.line, lf)
    91				hasCR := bytes.HasSuffix(r.line, crlf)
    92				wholeLine := r.line
    93				r.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace)
    94				if bytes.HasSuffix(r.line, softSuffix) {
    95					rightStripped := wholeLine[len(r.line):]
    96					r.line = r.line[:len(r.line)-1]
    97					if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) &&
    98						!(len(rightStripped) == 0 && len(r.line) > 0 && r.rerr == io.EOF) {
    99						r.rerr = fmt.Errorf("quotedprintable: invalid bytes after =: %q", rightStripped)
   100					}
   101				} else if hasLF {
   102					if hasCR {
   103						r.line = append(r.line, '\r', '\n')
   104					} else {
   105						r.line = append(r.line, '\n')
   106					}
   107				}
   108				continue
   109			}
   110			b := r.line[0]
   111	
   112			switch {
   113			case b == '=':
   114				b, err = readHexByte(r.line[1:])
   115				if err != nil {
   116					if len(r.line) >= 2 && r.line[1] != '\r' && r.line[1] != '\n' {
   117						// Take the = as a literal =.
   118						b = '='
   119						break
   120					}
   121					return n, err
   122				}
   123				r.line = r.line[2:] // 2 of the 3; other 1 is done below
   124			case b == '\t' || b == '\r' || b == '\n':
   125				break
   126			case b >= 0x80:
   127				// As an extension to RFC 2045, we accept
   128				// values >= 0x80 without complaint. Issue 22597.
   129				break
   130			case b < ' ' || b > '~':
   131				return n, fmt.Errorf("quotedprintable: invalid unescaped byte 0x%02x in body", b)
   132			}
   133			p[0] = b
   134			p = p[1:]
   135			r.line = r.line[1:]
   136			n++
   137		}
   138		return n, nil
   139	}
   140	

View as plain text