...

Source file src/pkg/compress/flate/huffman_code.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package flate
     6	
     7	import (
     8		"math"
     9		"math/bits"
    10		"sort"
    11	)
    12	
    13	// hcode is a huffman code with a bit code and bit length.
    14	type hcode struct {
    15		code, len uint16
    16	}
    17	
    18	type huffmanEncoder struct {
    19		codes     []hcode
    20		freqcache []literalNode
    21		bitCount  [17]int32
    22		lns       byLiteral // stored to avoid repeated allocation in generate
    23		lfs       byFreq    // stored to avoid repeated allocation in generate
    24	}
    25	
    26	type literalNode struct {
    27		literal uint16
    28		freq    int32
    29	}
    30	
    31	// A levelInfo describes the state of the constructed tree for a given depth.
    32	type levelInfo struct {
    33		// Our level.  for better printing
    34		level int32
    35	
    36		// The frequency of the last node at this level
    37		lastFreq int32
    38	
    39		// The frequency of the next character to add to this level
    40		nextCharFreq int32
    41	
    42		// The frequency of the next pair (from level below) to add to this level.
    43		// Only valid if the "needed" value of the next lower level is 0.
    44		nextPairFreq int32
    45	
    46		// The number of chains remaining to generate for this level before moving
    47		// up to the next level
    48		needed int32
    49	}
    50	
    51	// set sets the code and length of an hcode.
    52	func (h *hcode) set(code uint16, length uint16) {
    53		h.len = length
    54		h.code = code
    55	}
    56	
    57	func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
    58	
    59	func newHuffmanEncoder(size int) *huffmanEncoder {
    60		return &huffmanEncoder{codes: make([]hcode, size)}
    61	}
    62	
    63	// Generates a HuffmanCode corresponding to the fixed literal table
    64	func generateFixedLiteralEncoding() *huffmanEncoder {
    65		h := newHuffmanEncoder(maxNumLit)
    66		codes := h.codes
    67		var ch uint16
    68		for ch = 0; ch < maxNumLit; ch++ {
    69			var bits uint16
    70			var size uint16
    71			switch {
    72			case ch < 144:
    73				// size 8, 000110000  .. 10111111
    74				bits = ch + 48
    75				size = 8
    76				break
    77			case ch < 256:
    78				// size 9, 110010000 .. 111111111
    79				bits = ch + 400 - 144
    80				size = 9
    81				break
    82			case ch < 280:
    83				// size 7, 0000000 .. 0010111
    84				bits = ch - 256
    85				size = 7
    86				break
    87			default:
    88				// size 8, 11000000 .. 11000111
    89				bits = ch + 192 - 280
    90				size = 8
    91			}
    92			codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
    93		}
    94		return h
    95	}
    96	
    97	func generateFixedOffsetEncoding() *huffmanEncoder {
    98		h := newHuffmanEncoder(30)
    99		codes := h.codes
   100		for ch := range codes {
   101			codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
   102		}
   103		return h
   104	}
   105	
   106	var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
   107	var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
   108	
   109	func (h *huffmanEncoder) bitLength(freq []int32) int {
   110		var total int
   111		for i, f := range freq {
   112			if f != 0 {
   113				total += int(f) * int(h.codes[i].len)
   114			}
   115		}
   116		return total
   117	}
   118	
   119	const maxBitsLimit = 16
   120	
   121	// Return the number of literals assigned to each bit size in the Huffman encoding
   122	//
   123	// This method is only called when list.length >= 3
   124	// The cases of 0, 1, and 2 literals are handled by special case code.
   125	//
   126	// list  An array of the literals with non-zero frequencies
   127	//             and their associated frequencies. The array is in order of increasing
   128	//             frequency, and has as its last element a special element with frequency
   129	//             MaxInt32
   130	// maxBits     The maximum number of bits that should be used to encode any literal.
   131	//             Must be less than 16.
   132	// return      An integer array in which array[i] indicates the number of literals
   133	//             that should be encoded in i bits.
   134	func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
   135		if maxBits >= maxBitsLimit {
   136			panic("flate: maxBits too large")
   137		}
   138		n := int32(len(list))
   139		list = list[0 : n+1]
   140		list[n] = maxNode()
   141	
   142		// The tree can't have greater depth than n - 1, no matter what. This
   143		// saves a little bit of work in some small cases
   144		if maxBits > n-1 {
   145			maxBits = n - 1
   146		}
   147	
   148		// Create information about each of the levels.
   149		// A bogus "Level 0" whose sole purpose is so that
   150		// level1.prev.needed==0.  This makes level1.nextPairFreq
   151		// be a legitimate value that never gets chosen.
   152		var levels [maxBitsLimit]levelInfo
   153		// leafCounts[i] counts the number of literals at the left
   154		// of ancestors of the rightmost node at level i.
   155		// leafCounts[i][j] is the number of literals at the left
   156		// of the level j ancestor.
   157		var leafCounts [maxBitsLimit][maxBitsLimit]int32
   158	
   159		for level := int32(1); level <= maxBits; level++ {
   160			// For every level, the first two items are the first two characters.
   161			// We initialize the levels as if we had already figured this out.
   162			levels[level] = levelInfo{
   163				level:        level,
   164				lastFreq:     list[1].freq,
   165				nextCharFreq: list[2].freq,
   166				nextPairFreq: list[0].freq + list[1].freq,
   167			}
   168			leafCounts[level][level] = 2
   169			if level == 1 {
   170				levels[level].nextPairFreq = math.MaxInt32
   171			}
   172		}
   173	
   174		// We need a total of 2*n - 2 items at top level and have already generated 2.
   175		levels[maxBits].needed = 2*n - 4
   176	
   177		level := maxBits
   178		for {
   179			l := &levels[level]
   180			if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
   181				// We've run out of both leafs and pairs.
   182				// End all calculations for this level.
   183				// To make sure we never come back to this level or any lower level,
   184				// set nextPairFreq impossibly large.
   185				l.needed = 0
   186				levels[level+1].nextPairFreq = math.MaxInt32
   187				level++
   188				continue
   189			}
   190	
   191			prevFreq := l.lastFreq
   192			if l.nextCharFreq < l.nextPairFreq {
   193				// The next item on this row is a leaf node.
   194				n := leafCounts[level][level] + 1
   195				l.lastFreq = l.nextCharFreq
   196				// Lower leafCounts are the same of the previous node.
   197				leafCounts[level][level] = n
   198				l.nextCharFreq = list[n].freq
   199			} else {
   200				// The next item on this row is a pair from the previous row.
   201				// nextPairFreq isn't valid until we generate two
   202				// more values in the level below
   203				l.lastFreq = l.nextPairFreq
   204				// Take leaf counts from the lower level, except counts[level] remains the same.
   205				copy(leafCounts[level][:level], leafCounts[level-1][:level])
   206				levels[l.level-1].needed = 2
   207			}
   208	
   209			if l.needed--; l.needed == 0 {
   210				// We've done everything we need to do for this level.
   211				// Continue calculating one level up. Fill in nextPairFreq
   212				// of that level with the sum of the two nodes we've just calculated on
   213				// this level.
   214				if l.level == maxBits {
   215					// All done!
   216					break
   217				}
   218				levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
   219				level++
   220			} else {
   221				// If we stole from below, move down temporarily to replenish it.
   222				for levels[level-1].needed > 0 {
   223					level--
   224				}
   225			}
   226		}
   227	
   228		// Somethings is wrong if at the end, the top level is null or hasn't used
   229		// all of the leaves.
   230		if leafCounts[maxBits][maxBits] != n {
   231			panic("leafCounts[maxBits][maxBits] != n")
   232		}
   233	
   234		bitCount := h.bitCount[:maxBits+1]
   235		bits := 1
   236		counts := &leafCounts[maxBits]
   237		for level := maxBits; level > 0; level-- {
   238			// chain.leafCount gives the number of literals requiring at least "bits"
   239			// bits to encode.
   240			bitCount[bits] = counts[level] - counts[level-1]
   241			bits++
   242		}
   243		return bitCount
   244	}
   245	
   246	// Look at the leaves and assign them a bit count and an encoding as specified
   247	// in RFC 1951 3.2.2
   248	func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
   249		code := uint16(0)
   250		for n, bits := range bitCount {
   251			code <<= 1
   252			if n == 0 || bits == 0 {
   253				continue
   254			}
   255			// The literals list[len(list)-bits] .. list[len(list)-bits]
   256			// are encoded using "bits" bits, and get the values
   257			// code, code + 1, ....  The code values are
   258			// assigned in literal order (not frequency order).
   259			chunk := list[len(list)-int(bits):]
   260	
   261			h.lns.sort(chunk)
   262			for _, node := range chunk {
   263				h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
   264				code++
   265			}
   266			list = list[0 : len(list)-int(bits)]
   267		}
   268	}
   269	
   270	// Update this Huffman Code object to be the minimum code for the specified frequency count.
   271	//
   272	// freq  An array of frequencies, in which frequency[i] gives the frequency of literal i.
   273	// maxBits  The maximum number of bits to use for any literal.
   274	func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
   275		if h.freqcache == nil {
   276			// Allocate a reusable buffer with the longest possible frequency table.
   277			// Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
   278			// The largest of these is maxNumLit, so we allocate for that case.
   279			h.freqcache = make([]literalNode, maxNumLit+1)
   280		}
   281		list := h.freqcache[:len(freq)+1]
   282		// Number of non-zero literals
   283		count := 0
   284		// Set list to be the set of all non-zero literals and their frequencies
   285		for i, f := range freq {
   286			if f != 0 {
   287				list[count] = literalNode{uint16(i), f}
   288				count++
   289			} else {
   290				list[count] = literalNode{}
   291				h.codes[i].len = 0
   292			}
   293		}
   294		list[len(freq)] = literalNode{}
   295	
   296		list = list[:count]
   297		if count <= 2 {
   298			// Handle the small cases here, because they are awkward for the general case code. With
   299			// two or fewer literals, everything has bit length 1.
   300			for i, node := range list {
   301				// "list" is in order of increasing literal value.
   302				h.codes[node.literal].set(uint16(i), 1)
   303			}
   304			return
   305		}
   306		h.lfs.sort(list)
   307	
   308		// Get the number of literals for each bit count
   309		bitCount := h.bitCounts(list, maxBits)
   310		// And do the assignment
   311		h.assignEncodingAndSize(bitCount, list)
   312	}
   313	
   314	type byLiteral []literalNode
   315	
   316	func (s *byLiteral) sort(a []literalNode) {
   317		*s = byLiteral(a)
   318		sort.Sort(s)
   319	}
   320	
   321	func (s byLiteral) Len() int { return len(s) }
   322	
   323	func (s byLiteral) Less(i, j int) bool {
   324		return s[i].literal < s[j].literal
   325	}
   326	
   327	func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
   328	
   329	type byFreq []literalNode
   330	
   331	func (s *byFreq) sort(a []literalNode) {
   332		*s = byFreq(a)
   333		sort.Sort(s)
   334	}
   335	
   336	func (s byFreq) Len() int { return len(s) }
   337	
   338	func (s byFreq) Less(i, j int) bool {
   339		if s[i].freq == s[j].freq {
   340			return s[i].literal < s[j].literal
   341		}
   342		return s[i].freq < s[j].freq
   343	}
   344	
   345	func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
   346	
   347	func reverseBits(number uint16, bitLength byte) uint16 {
   348		return bits.Reverse16(number << (16 - bitLength))
   349	}
   350	

View as plain text