...

Source file src/archive/zip/writer.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package zip
     6	
     7	import (
     8		"bufio"
     9		"encoding/binary"
    10		"errors"
    11		"hash"
    12		"hash/crc32"
    13		"io"
    14		"strings"
    15		"unicode/utf8"
    16	)
    17	
    18	var (
    19		errLongName  = errors.New("zip: FileHeader.Name too long")
    20		errLongExtra = errors.New("zip: FileHeader.Extra too long")
    21	)
    22	
    23	// Writer implements a zip file writer.
    24	type Writer struct {
    25		cw          *countWriter
    26		dir         []*header
    27		last        *fileWriter
    28		closed      bool
    29		compressors map[uint16]Compressor
    30		comment     string
    31	
    32		// testHookCloseSizeOffset if non-nil is called with the size
    33		// of offset of the central directory at Close.
    34		testHookCloseSizeOffset func(size, offset uint64)
    35	}
    36	
    37	type header struct {
    38		*FileHeader
    39		offset uint64
    40	}
    41	
    42	// NewWriter returns a new Writer writing a zip file to w.
    43	func NewWriter(w io.Writer) *Writer {
    44		return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}}
    45	}
    46	
    47	// SetOffset sets the offset of the beginning of the zip data within the
    48	// underlying writer. It should be used when the zip data is appended to an
    49	// existing file, such as a binary executable.
    50	// It must be called before any data is written.
    51	func (w *Writer) SetOffset(n int64) {
    52		if w.cw.count != 0 {
    53			panic("zip: SetOffset called after data was written")
    54		}
    55		w.cw.count = n
    56	}
    57	
    58	// Flush flushes any buffered data to the underlying writer.
    59	// Calling Flush is not normally necessary; calling Close is sufficient.
    60	func (w *Writer) Flush() error {
    61		return w.cw.w.(*bufio.Writer).Flush()
    62	}
    63	
    64	// SetComment sets the end-of-central-directory comment field.
    65	// It can only be called before Close.
    66	func (w *Writer) SetComment(comment string) error {
    67		if len(comment) > uint16max {
    68			return errors.New("zip: Writer.Comment too long")
    69		}
    70		w.comment = comment
    71		return nil
    72	}
    73	
    74	// Close finishes writing the zip file by writing the central directory.
    75	// It does not close the underlying writer.
    76	func (w *Writer) Close() error {
    77		if w.last != nil && !w.last.closed {
    78			if err := w.last.close(); err != nil {
    79				return err
    80			}
    81			w.last = nil
    82		}
    83		if w.closed {
    84			return errors.New("zip: writer closed twice")
    85		}
    86		w.closed = true
    87	
    88		// write central directory
    89		start := w.cw.count
    90		for _, h := range w.dir {
    91			var buf [directoryHeaderLen]byte
    92			b := writeBuf(buf[:])
    93			b.uint32(uint32(directoryHeaderSignature))
    94			b.uint16(h.CreatorVersion)
    95			b.uint16(h.ReaderVersion)
    96			b.uint16(h.Flags)
    97			b.uint16(h.Method)
    98			b.uint16(h.ModifiedTime)
    99			b.uint16(h.ModifiedDate)
   100			b.uint32(h.CRC32)
   101			if h.isZip64() || h.offset >= uint32max {
   102				// the file needs a zip64 header. store maxint in both
   103				// 32 bit size fields (and offset later) to signal that the
   104				// zip64 extra header should be used.
   105				b.uint32(uint32max) // compressed size
   106				b.uint32(uint32max) // uncompressed size
   107	
   108				// append a zip64 extra block to Extra
   109				var buf [28]byte // 2x uint16 + 3x uint64
   110				eb := writeBuf(buf[:])
   111				eb.uint16(zip64ExtraID)
   112				eb.uint16(24) // size = 3x uint64
   113				eb.uint64(h.UncompressedSize64)
   114				eb.uint64(h.CompressedSize64)
   115				eb.uint64(h.offset)
   116				h.Extra = append(h.Extra, buf[:]...)
   117			} else {
   118				b.uint32(h.CompressedSize)
   119				b.uint32(h.UncompressedSize)
   120			}
   121	
   122			b.uint16(uint16(len(h.Name)))
   123			b.uint16(uint16(len(h.Extra)))
   124			b.uint16(uint16(len(h.Comment)))
   125			b = b[4:] // skip disk number start and internal file attr (2x uint16)
   126			b.uint32(h.ExternalAttrs)
   127			if h.offset > uint32max {
   128				b.uint32(uint32max)
   129			} else {
   130				b.uint32(uint32(h.offset))
   131			}
   132			if _, err := w.cw.Write(buf[:]); err != nil {
   133				return err
   134			}
   135			if _, err := io.WriteString(w.cw, h.Name); err != nil {
   136				return err
   137			}
   138			if _, err := w.cw.Write(h.Extra); err != nil {
   139				return err
   140			}
   141			if _, err := io.WriteString(w.cw, h.Comment); err != nil {
   142				return err
   143			}
   144		}
   145		end := w.cw.count
   146	
   147		records := uint64(len(w.dir))
   148		size := uint64(end - start)
   149		offset := uint64(start)
   150	
   151		if f := w.testHookCloseSizeOffset; f != nil {
   152			f(size, offset)
   153		}
   154	
   155		if records >= uint16max || size >= uint32max || offset >= uint32max {
   156			var buf [directory64EndLen + directory64LocLen]byte
   157			b := writeBuf(buf[:])
   158	
   159			// zip64 end of central directory record
   160			b.uint32(directory64EndSignature)
   161			b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64)
   162			b.uint16(zipVersion45)           // version made by
   163			b.uint16(zipVersion45)           // version needed to extract
   164			b.uint32(0)                      // number of this disk
   165			b.uint32(0)                      // number of the disk with the start of the central directory
   166			b.uint64(records)                // total number of entries in the central directory on this disk
   167			b.uint64(records)                // total number of entries in the central directory
   168			b.uint64(size)                   // size of the central directory
   169			b.uint64(offset)                 // offset of start of central directory with respect to the starting disk number
   170	
   171			// zip64 end of central directory locator
   172			b.uint32(directory64LocSignature)
   173			b.uint32(0)           // number of the disk with the start of the zip64 end of central directory
   174			b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record
   175			b.uint32(1)           // total number of disks
   176	
   177			if _, err := w.cw.Write(buf[:]); err != nil {
   178				return err
   179			}
   180	
   181			// store max values in the regular end record to signal
   182			// that the zip64 values should be used instead
   183			records = uint16max
   184			size = uint32max
   185			offset = uint32max
   186		}
   187	
   188		// write end record
   189		var buf [directoryEndLen]byte
   190		b := writeBuf(buf[:])
   191		b.uint32(uint32(directoryEndSignature))
   192		b = b[4:]                        // skip over disk number and first disk number (2x uint16)
   193		b.uint16(uint16(records))        // number of entries this disk
   194		b.uint16(uint16(records))        // number of entries total
   195		b.uint32(uint32(size))           // size of directory
   196		b.uint32(uint32(offset))         // start of directory
   197		b.uint16(uint16(len(w.comment))) // byte size of EOCD comment
   198		if _, err := w.cw.Write(buf[:]); err != nil {
   199			return err
   200		}
   201		if _, err := io.WriteString(w.cw, w.comment); err != nil {
   202			return err
   203		}
   204	
   205		return w.cw.w.(*bufio.Writer).Flush()
   206	}
   207	
   208	// Create adds a file to the zip file using the provided name.
   209	// It returns a Writer to which the file contents should be written.
   210	// The file contents will be compressed using the Deflate method.
   211	// The name must be a relative path: it must not start with a drive
   212	// letter (e.g. C:) or leading slash, and only forward slashes are
   213	// allowed. To create a directory instead of a file, add a trailing
   214	// slash to the name.
   215	// The file's contents must be written to the io.Writer before the next
   216	// call to Create, CreateHeader, or Close.
   217	func (w *Writer) Create(name string) (io.Writer, error) {
   218		header := &FileHeader{
   219			Name:   name,
   220			Method: Deflate,
   221		}
   222		return w.CreateHeader(header)
   223	}
   224	
   225	// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string
   226	// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
   227	// or any other common encoding).
   228	func detectUTF8(s string) (valid, require bool) {
   229		for i := 0; i < len(s); {
   230			r, size := utf8.DecodeRuneInString(s[i:])
   231			i += size
   232			// Officially, ZIP uses CP-437, but many readers use the system's
   233			// local character encoding. Most encoding are compatible with a large
   234			// subset of CP-437, which itself is ASCII-like.
   235			//
   236			// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
   237			// characters with localized currency and overline characters.
   238			if r < 0x20 || r > 0x7d || r == 0x5c {
   239				if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
   240					return false, false
   241				}
   242				require = true
   243			}
   244		}
   245		return true, require
   246	}
   247	
   248	// CreateHeader adds a file to the zip archive using the provided FileHeader
   249	// for the file metadata. Writer takes ownership of fh and may mutate
   250	// its fields. The caller must not modify fh after calling CreateHeader.
   251	//
   252	// This returns a Writer to which the file contents should be written.
   253	// The file's contents must be written to the io.Writer before the next
   254	// call to Create, CreateHeader, or Close.
   255	func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
   256		if w.last != nil && !w.last.closed {
   257			if err := w.last.close(); err != nil {
   258				return nil, err
   259			}
   260		}
   261		if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
   262			// See https://golang.org/issue/11144 confusion.
   263			return nil, errors.New("archive/zip: invalid duplicate FileHeader")
   264		}
   265	
   266		// The ZIP format has a sad state of affairs regarding character encoding.
   267		// Officially, the name and comment fields are supposed to be encoded
   268		// in CP-437 (which is mostly compatible with ASCII), unless the UTF-8
   269		// flag bit is set. However, there are several problems:
   270		//
   271		//	* Many ZIP readers still do not support UTF-8.
   272		//	* If the UTF-8 flag is cleared, several readers simply interpret the
   273		//	name and comment fields as whatever the local system encoding is.
   274		//
   275		// In order to avoid breaking readers without UTF-8 support,
   276		// we avoid setting the UTF-8 flag if the strings are CP-437 compatible.
   277		// However, if the strings require multibyte UTF-8 encoding and is a
   278		// valid UTF-8 string, then we set the UTF-8 bit.
   279		//
   280		// For the case, where the user explicitly wants to specify the encoding
   281		// as UTF-8, they will need to set the flag bit themselves.
   282		utf8Valid1, utf8Require1 := detectUTF8(fh.Name)
   283		utf8Valid2, utf8Require2 := detectUTF8(fh.Comment)
   284		switch {
   285		case fh.NonUTF8:
   286			fh.Flags &^= 0x800
   287		case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2):
   288			fh.Flags |= 0x800
   289		}
   290	
   291		fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
   292		fh.ReaderVersion = zipVersion20
   293	
   294		// If Modified is set, this takes precedence over MS-DOS timestamp fields.
   295		if !fh.Modified.IsZero() {
   296			// Contrary to the FileHeader.SetModTime method, we intentionally
   297			// do not convert to UTC, because we assume the user intends to encode
   298			// the date using the specified timezone. A user may want this control
   299			// because many legacy ZIP readers interpret the timestamp according
   300			// to the local timezone.
   301			//
   302			// The timezone is only non-UTC if a user directly sets the Modified
   303			// field directly themselves. All other approaches sets UTC.
   304			fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified)
   305	
   306			// Use "extended timestamp" format since this is what Info-ZIP uses.
   307			// Nearly every major ZIP implementation uses a different format,
   308			// but at least most seem to be able to understand the other formats.
   309			//
   310			// This format happens to be identical for both local and central header
   311			// if modification time is the only timestamp being encoded.
   312			var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32)
   313			mt := uint32(fh.Modified.Unix())
   314			eb := writeBuf(mbuf[:])
   315			eb.uint16(extTimeExtraID)
   316			eb.uint16(5)  // Size: SizeOf(uint8) + SizeOf(uint32)
   317			eb.uint8(1)   // Flags: ModTime
   318			eb.uint32(mt) // ModTime
   319			fh.Extra = append(fh.Extra, mbuf[:]...)
   320		}
   321	
   322		var (
   323			ow io.Writer
   324			fw *fileWriter
   325		)
   326		h := &header{
   327			FileHeader: fh,
   328			offset:     uint64(w.cw.count),
   329		}
   330	
   331		if strings.HasSuffix(fh.Name, "/") {
   332			// Set the compression method to Store to ensure data length is truly zero,
   333			// which the writeHeader method always encodes for the size fields.
   334			// This is necessary as most compression formats have non-zero lengths
   335			// even when compressing an empty string.
   336			fh.Method = Store
   337			fh.Flags &^= 0x8 // we will not write a data descriptor
   338	
   339			// Explicitly clear sizes as they have no meaning for directories.
   340			fh.CompressedSize = 0
   341			fh.CompressedSize64 = 0
   342			fh.UncompressedSize = 0
   343			fh.UncompressedSize64 = 0
   344	
   345			ow = dirWriter{}
   346		} else {
   347			fh.Flags |= 0x8 // we will write a data descriptor
   348	
   349			fw = &fileWriter{
   350				zipw:      w.cw,
   351				compCount: &countWriter{w: w.cw},
   352				crc32:     crc32.NewIEEE(),
   353			}
   354			comp := w.compressor(fh.Method)
   355			if comp == nil {
   356				return nil, ErrAlgorithm
   357			}
   358			var err error
   359			fw.comp, err = comp(fw.compCount)
   360			if err != nil {
   361				return nil, err
   362			}
   363			fw.rawCount = &countWriter{w: fw.comp}
   364			fw.header = h
   365			ow = fw
   366		}
   367		w.dir = append(w.dir, h)
   368		if err := writeHeader(w.cw, fh); err != nil {
   369			return nil, err
   370		}
   371		// If we're creating a directory, fw is nil.
   372		w.last = fw
   373		return ow, nil
   374	}
   375	
   376	func writeHeader(w io.Writer, h *FileHeader) error {
   377		const maxUint16 = 1<<16 - 1
   378		if len(h.Name) > maxUint16 {
   379			return errLongName
   380		}
   381		if len(h.Extra) > maxUint16 {
   382			return errLongExtra
   383		}
   384	
   385		var buf [fileHeaderLen]byte
   386		b := writeBuf(buf[:])
   387		b.uint32(uint32(fileHeaderSignature))
   388		b.uint16(h.ReaderVersion)
   389		b.uint16(h.Flags)
   390		b.uint16(h.Method)
   391		b.uint16(h.ModifiedTime)
   392		b.uint16(h.ModifiedDate)
   393		b.uint32(0) // since we are writing a data descriptor crc32,
   394		b.uint32(0) // compressed size,
   395		b.uint32(0) // and uncompressed size should be zero
   396		b.uint16(uint16(len(h.Name)))
   397		b.uint16(uint16(len(h.Extra)))
   398		if _, err := w.Write(buf[:]); err != nil {
   399			return err
   400		}
   401		if _, err := io.WriteString(w, h.Name); err != nil {
   402			return err
   403		}
   404		_, err := w.Write(h.Extra)
   405		return err
   406	}
   407	
   408	// RegisterCompressor registers or overrides a custom compressor for a specific
   409	// method ID. If a compressor for a given method is not found, Writer will
   410	// default to looking up the compressor at the package level.
   411	func (w *Writer) RegisterCompressor(method uint16, comp Compressor) {
   412		if w.compressors == nil {
   413			w.compressors = make(map[uint16]Compressor)
   414		}
   415		w.compressors[method] = comp
   416	}
   417	
   418	func (w *Writer) compressor(method uint16) Compressor {
   419		comp := w.compressors[method]
   420		if comp == nil {
   421			comp = compressor(method)
   422		}
   423		return comp
   424	}
   425	
   426	type dirWriter struct{}
   427	
   428	func (dirWriter) Write(b []byte) (int, error) {
   429		if len(b) == 0 {
   430			return 0, nil
   431		}
   432		return 0, errors.New("zip: write to directory")
   433	}
   434	
   435	type fileWriter struct {
   436		*header
   437		zipw      io.Writer
   438		rawCount  *countWriter
   439		comp      io.WriteCloser
   440		compCount *countWriter
   441		crc32     hash.Hash32
   442		closed    bool
   443	}
   444	
   445	func (w *fileWriter) Write(p []byte) (int, error) {
   446		if w.closed {
   447			return 0, errors.New("zip: write to closed file")
   448		}
   449		w.crc32.Write(p)
   450		return w.rawCount.Write(p)
   451	}
   452	
   453	func (w *fileWriter) close() error {
   454		if w.closed {
   455			return errors.New("zip: file closed twice")
   456		}
   457		w.closed = true
   458		if err := w.comp.Close(); err != nil {
   459			return err
   460		}
   461	
   462		// update FileHeader
   463		fh := w.header.FileHeader
   464		fh.CRC32 = w.crc32.Sum32()
   465		fh.CompressedSize64 = uint64(w.compCount.count)
   466		fh.UncompressedSize64 = uint64(w.rawCount.count)
   467	
   468		if fh.isZip64() {
   469			fh.CompressedSize = uint32max
   470			fh.UncompressedSize = uint32max
   471			fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions
   472		} else {
   473			fh.CompressedSize = uint32(fh.CompressedSize64)
   474			fh.UncompressedSize = uint32(fh.UncompressedSize64)
   475		}
   476	
   477		// Write data descriptor. This is more complicated than one would
   478		// think, see e.g. comments in zipfile.c:putextended() and
   479		// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588.
   480		// The approach here is to write 8 byte sizes if needed without
   481		// adding a zip64 extra in the local header (too late anyway).
   482		var buf []byte
   483		if fh.isZip64() {
   484			buf = make([]byte, dataDescriptor64Len)
   485		} else {
   486			buf = make([]byte, dataDescriptorLen)
   487		}
   488		b := writeBuf(buf)
   489		b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
   490		b.uint32(fh.CRC32)
   491		if fh.isZip64() {
   492			b.uint64(fh.CompressedSize64)
   493			b.uint64(fh.UncompressedSize64)
   494		} else {
   495			b.uint32(fh.CompressedSize)
   496			b.uint32(fh.UncompressedSize)
   497		}
   498		_, err := w.zipw.Write(buf)
   499		return err
   500	}
   501	
   502	type countWriter struct {
   503		w     io.Writer
   504		count int64
   505	}
   506	
   507	func (w *countWriter) Write(p []byte) (int, error) {
   508		n, err := w.w.Write(p)
   509		w.count += int64(n)
   510		return n, err
   511	}
   512	
   513	type nopCloser struct {
   514		io.Writer
   515	}
   516	
   517	func (w nopCloser) Close() error {
   518		return nil
   519	}
   520	
   521	type writeBuf []byte
   522	
   523	func (b *writeBuf) uint8(v uint8) {
   524		(*b)[0] = v
   525		*b = (*b)[1:]
   526	}
   527	
   528	func (b *writeBuf) uint16(v uint16) {
   529		binary.LittleEndian.PutUint16(*b, v)
   530		*b = (*b)[2:]
   531	}
   532	
   533	func (b *writeBuf) uint32(v uint32) {
   534		binary.LittleEndian.PutUint32(*b, v)
   535		*b = (*b)[4:]
   536	}
   537	
   538	func (b *writeBuf) uint64(v uint64) {
   539		binary.LittleEndian.PutUint64(*b, v)
   540		*b = (*b)[8:]
   541	}
   542	

View as plain text