Source file src/pkg/compress/flate/dict_decoder.go

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package flate
     6	
     7	// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
     8	// LZ77 decompresses data through sequences of two forms of commands:
     9	//
    10	//	* Literal insertions: Runs of one or more symbols are inserted into the data
    11	//	stream as is. This is accomplished through the writeByte method for a
    12	//	single symbol, or combinations of writeSlice/writeMark for multiple symbols.
    13	//	Any valid stream must start with a literal insertion if no preset dictionary
    14	//	is used.
    15	//
    16	//	* Backward copies: Runs of one or more symbols are copied from previously
    17	//	emitted data. Backward copies come as the tuple (dist, length) where dist
    18	//	determines how far back in the stream to copy from and length determines how
    19	//	many bytes to copy. Note that it is valid for the length to be greater than
    20	//	the distance. Since LZ77 uses forward copies, that situation is used to
    21	//	perform a form of run-length encoding on repeated runs of symbols.
    22	//	The writeCopy and tryWriteCopy are used to implement this command.
    23	//
    24	// For performance reasons, this implementation performs little to no sanity
    25	// checks about the arguments. As such, the invariants documented for each
    26	// method call must be respected.
    27	type dictDecoder struct {
    28		hist []byte // Sliding window history
    29	
    30		// Invariant: 0 <= rdPos <= wrPos <= len(hist)
    31		wrPos int  // Current output position in buffer
    32		rdPos int  // Have emitted hist[:rdPos] already
    33		full  bool // Has a full window length been written yet?
    34	}
    35	
    36	// init initializes dictDecoder to have a sliding window dictionary of the given
    37	// size. If a preset dict is provided, it will initialize the dictionary with
    38	// the contents of dict.
    39	func (dd *dictDecoder) init(size int, dict []byte) {
    40		*dd = dictDecoder{hist: dd.hist}
    41	
    42		if cap(dd.hist) < size {
    43			dd.hist = make([]byte, size)
    44		}
    45		dd.hist = dd.hist[:size]
    46	
    47		if len(dict) > len(dd.hist) {
    48			dict = dict[len(dict)-len(dd.hist):]
    49		}
    50		dd.wrPos = copy(dd.hist, dict)
    51		if dd.wrPos == len(dd.hist) {
    52			dd.wrPos = 0
    53			dd.full = true
    54		}
    55		dd.rdPos = dd.wrPos
    56	}
    57	
    58	// histSize reports the total amount of historical data in the dictionary.
    59	func (dd *dictDecoder) histSize() int {
    60		if dd.full {
    61			return len(dd.hist)
    62		}
    63		return dd.wrPos
    64	}
    65	
    66	// availRead reports the number of bytes that can be flushed by readFlush.
    67	func (dd *dictDecoder) availRead() int {
    68		return dd.wrPos - dd.rdPos
    69	}
    70	
    71	// availWrite reports the available amount of output buffer space.
    72	func (dd *dictDecoder) availWrite() int {
    73		return len(dd.hist) - dd.wrPos
    74	}
    75	
    76	// writeSlice returns a slice of the available buffer to write data to.
    77	//
    78	// This invariant will be kept: len(s) <= availWrite()
    79	func (dd *dictDecoder) writeSlice() []byte {
    80		return dd.hist[dd.wrPos:]
    81	}
    82	
    83	// writeMark advances the writer pointer by cnt.
    84	//
    85	// This invariant must be kept: 0 <= cnt <= availWrite()
    86	func (dd *dictDecoder) writeMark(cnt int) {
    87		dd.wrPos += cnt
    88	}
    89	
    90	// writeByte writes a single byte to the dictionary.
    91	//
    92	// This invariant must be kept: 0 < availWrite()
    93	func (dd *dictDecoder) writeByte(c byte) {
    94		dd.hist[dd.wrPos] = c
    95		dd.wrPos++
    96	}
    97	
    98	// writeCopy copies a string at a given (dist, length) to the output.
    99	// This returns the number of bytes copied and may be less than the requested
   100	// length if the available space in the output buffer is too small.
   101	//
   102	// This invariant must be kept: 0 < dist <= histSize()
   103	func (dd *dictDecoder) writeCopy(dist, length int) int {
   104		dstBase := dd.wrPos
   105		dstPos := dstBase
   106		srcPos := dstPos - dist
   107		endPos := dstPos + length
   108		if endPos > len(dd.hist) {
   109			endPos = len(dd.hist)
   110		}
   111	
   112		// Copy non-overlapping section after destination position.
   113		//
   114		// This section is non-overlapping in that the copy length for this section
   115		// is always less than or equal to the backwards distance. This can occur
   116		// if a distance refers to data that wraps-around in the buffer.
   117		// Thus, a backwards copy is performed here; that is, the exact bytes in
   118		// the source prior to the copy is placed in the destination.
   119		if srcPos < 0 {
   120			srcPos += len(dd.hist)
   121			dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
   122			srcPos = 0
   123		}
   124	
   125		// Copy possibly overlapping section before destination position.
   126		//
   127		// This section can overlap if the copy length for this section is larger
   128		// than the backwards distance. This is allowed by LZ77 so that repeated
   129		// strings can be succinctly represented using (dist, length) pairs.
   130		// Thus, a forwards copy is performed here; that is, the bytes copied is
   131		// possibly dependent on the resulting bytes in the destination as the copy
   132		// progresses along. This is functionally equivalent to the following:
   133		//
   134		//	for i := 0; i < endPos-dstPos; i++ {
   135		//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
   136		//	}
   137		//	dstPos = endPos
   138		//
   139		for dstPos < endPos {
   140			dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
   141		}
   142	
   143		dd.wrPos = dstPos
   144		return dstPos - dstBase
   145	}
   146	
   147	// tryWriteCopy tries to copy a string at a given (distance, length) to the
   148	// output. This specialized version is optimized for short distances.
   149	//
   150	// This method is designed to be inlined for performance reasons.
   151	//
   152	// This invariant must be kept: 0 < dist <= histSize()
   153	func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
   154		dstPos := dd.wrPos
   155		endPos := dstPos + length
   156		if dstPos < dist || endPos > len(dd.hist) {
   157			return 0
   158		}
   159		dstBase := dstPos
   160		srcPos := dstPos - dist
   161	
   162		// Copy possibly overlapping section before destination position.
   163	loop:
   164		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
   165		if dstPos < endPos {
   166			goto loop // Avoid for-loop so that this function can be inlined
   167		}
   168	
   169		dd.wrPos = dstPos
   170		return dstPos - dstBase
   171	}
   172	
   173	// readFlush returns a slice of the historical buffer that is ready to be
   174	// emitted to the user. The data returned by readFlush must be fully consumed
   175	// before calling any other dictDecoder methods.
   176	func (dd *dictDecoder) readFlush() []byte {
   177		toRead := dd.hist[dd.rdPos:dd.wrPos]
   178		dd.rdPos = dd.wrPos
   179		if dd.wrPos == len(dd.hist) {
   180			dd.wrPos, dd.rdPos = 0, 0
   181			dd.full = true
   182		}
   183		return toRead
   184	}
   185
View as plain text