...

Source file src/pkg/compress/bzip2/huffman.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package bzip2
     6	
     7	import "sort"
     8	
     9	// A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a
    10	// symbol.
    11	type huffmanTree struct {
    12		// nodes contains all the non-leaf nodes in the tree. nodes[0] is the
    13		// root of the tree and nextNode contains the index of the next element
    14		// of nodes to use when the tree is being constructed.
    15		nodes    []huffmanNode
    16		nextNode int
    17	}
    18	
    19	// A huffmanNode is a node in the tree. left and right contain indexes into the
    20	// nodes slice of the tree. If left or right is invalidNodeValue then the child
    21	// is a left node and its value is in leftValue/rightValue.
    22	//
    23	// The symbols are uint16s because bzip2 encodes not only MTF indexes in the
    24	// tree, but also two magic values for run-length encoding and an EOF symbol.
    25	// Thus there are more than 256 possible symbols.
    26	type huffmanNode struct {
    27		left, right           uint16
    28		leftValue, rightValue uint16
    29	}
    30	
    31	// invalidNodeValue is an invalid index which marks a leaf node in the tree.
    32	const invalidNodeValue = 0xffff
    33	
    34	// Decode reads bits from the given bitReader and navigates the tree until a
    35	// symbol is found.
    36	func (t *huffmanTree) Decode(br *bitReader) (v uint16) {
    37		nodeIndex := uint16(0) // node 0 is the root of the tree.
    38	
    39		for {
    40			node := &t.nodes[nodeIndex]
    41	
    42			var bit uint16
    43			if br.bits > 0 {
    44				// Get next bit - fast path.
    45				br.bits--
    46				bit = uint16(br.n>>(br.bits&63)) & 1
    47			} else {
    48				// Get next bit - slow path.
    49				// Use ReadBits to retrieve a single bit
    50				// from the underling io.ByteReader.
    51				bit = uint16(br.ReadBits(1))
    52			}
    53	
    54			// Trick a compiler into generating conditional move instead of branch,
    55			// by making both loads unconditional.
    56			l, r := node.left, node.right
    57	
    58			if bit == 1 {
    59				nodeIndex = l
    60			} else {
    61				nodeIndex = r
    62			}
    63	
    64			if nodeIndex == invalidNodeValue {
    65				// We found a leaf. Use the value of bit to decide
    66				// whether is a left or a right value.
    67				l, r := node.leftValue, node.rightValue
    68				if bit == 1 {
    69					v = l
    70				} else {
    71					v = r
    72				}
    73				return
    74			}
    75		}
    76	}
    77	
    78	// newHuffmanTree builds a Huffman tree from a slice containing the code
    79	// lengths of each symbol. The maximum code length is 32 bits.
    80	func newHuffmanTree(lengths []uint8) (huffmanTree, error) {
    81		// There are many possible trees that assign the same code length to
    82		// each symbol (consider reflecting a tree down the middle, for
    83		// example). Since the code length assignments determine the
    84		// efficiency of the tree, each of these trees is equally good. In
    85		// order to minimize the amount of information needed to build a tree
    86		// bzip2 uses a canonical tree so that it can be reconstructed given
    87		// only the code length assignments.
    88	
    89		if len(lengths) < 2 {
    90			panic("newHuffmanTree: too few symbols")
    91		}
    92	
    93		var t huffmanTree
    94	
    95		// First we sort the code length assignments by ascending code length,
    96		// using the symbol value to break ties.
    97		pairs := make([]huffmanSymbolLengthPair, len(lengths))
    98		for i, length := range lengths {
    99			pairs[i].value = uint16(i)
   100			pairs[i].length = length
   101		}
   102	
   103		sort.Slice(pairs, func(i, j int) bool {
   104			if pairs[i].length < pairs[j].length {
   105				return true
   106			}
   107			if pairs[i].length > pairs[j].length {
   108				return false
   109			}
   110			if pairs[i].value < pairs[j].value {
   111				return true
   112			}
   113			return false
   114		})
   115	
   116		// Now we assign codes to the symbols, starting with the longest code.
   117		// We keep the codes packed into a uint32, at the most-significant end.
   118		// So branches are taken from the MSB downwards. This makes it easy to
   119		// sort them later.
   120		code := uint32(0)
   121		length := uint8(32)
   122	
   123		codes := make([]huffmanCode, len(lengths))
   124		for i := len(pairs) - 1; i >= 0; i-- {
   125			if length > pairs[i].length {
   126				length = pairs[i].length
   127			}
   128			codes[i].code = code
   129			codes[i].codeLen = length
   130			codes[i].value = pairs[i].value
   131			// We need to 'increment' the code, which means treating |code|
   132			// like a |length| bit number.
   133			code += 1 << (32 - length)
   134		}
   135	
   136		// Now we can sort by the code so that the left half of each branch are
   137		// grouped together, recursively.
   138		sort.Slice(codes, func(i, j int) bool {
   139			return codes[i].code < codes[j].code
   140		})
   141	
   142		t.nodes = make([]huffmanNode, len(codes))
   143		_, err := buildHuffmanNode(&t, codes, 0)
   144		return t, err
   145	}
   146	
   147	// huffmanSymbolLengthPair contains a symbol and its code length.
   148	type huffmanSymbolLengthPair struct {
   149		value  uint16
   150		length uint8
   151	}
   152	
   153	// huffmanCode contains a symbol, its code and code length.
   154	type huffmanCode struct {
   155		code    uint32
   156		codeLen uint8
   157		value   uint16
   158	}
   159	
   160	// buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in
   161	// the Huffman tree at the given level. It returns the index of the newly
   162	// constructed node.
   163	func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err error) {
   164		test := uint32(1) << (31 - level)
   165	
   166		// We have to search the list of codes to find the divide between the left and right sides.
   167		firstRightIndex := len(codes)
   168		for i, code := range codes {
   169			if code.code&test != 0 {
   170				firstRightIndex = i
   171				break
   172			}
   173		}
   174	
   175		left := codes[:firstRightIndex]
   176		right := codes[firstRightIndex:]
   177	
   178		if len(left) == 0 || len(right) == 0 {
   179			// There is a superfluous level in the Huffman tree indicating
   180			// a bug in the encoder. However, this bug has been observed in
   181			// the wild so we handle it.
   182	
   183			// If this function was called recursively then we know that
   184			// len(codes) >= 2 because, otherwise, we would have hit the
   185			// "leaf node" case, below, and not recursed.
   186			//
   187			// However, for the initial call it's possible that len(codes)
   188			// is zero or one. Both cases are invalid because a zero length
   189			// tree cannot encode anything and a length-1 tree can only
   190			// encode EOF and so is superfluous. We reject both.
   191			if len(codes) < 2 {
   192				return 0, StructuralError("empty Huffman tree")
   193			}
   194	
   195			// In this case the recursion doesn't always reduce the length
   196			// of codes so we need to ensure termination via another
   197			// mechanism.
   198			if level == 31 {
   199				// Since len(codes) >= 2 the only way that the values
   200				// can match at all 32 bits is if they are equal, which
   201				// is invalid. This ensures that we never enter
   202				// infinite recursion.
   203				return 0, StructuralError("equal symbols in Huffman tree")
   204			}
   205	
   206			if len(left) == 0 {
   207				return buildHuffmanNode(t, right, level+1)
   208			}
   209			return buildHuffmanNode(t, left, level+1)
   210		}
   211	
   212		nodeIndex = uint16(t.nextNode)
   213		node := &t.nodes[t.nextNode]
   214		t.nextNode++
   215	
   216		if len(left) == 1 {
   217			// leaf node
   218			node.left = invalidNodeValue
   219			node.leftValue = left[0].value
   220		} else {
   221			node.left, err = buildHuffmanNode(t, left, level+1)
   222		}
   223	
   224		if err != nil {
   225			return
   226		}
   227	
   228		if len(right) == 1 {
   229			// leaf node
   230			node.right = invalidNodeValue
   231			node.rightValue = right[0].value
   232		} else {
   233			node.right, err = buildHuffmanNode(t, right, level+1)
   234		}
   235	
   236		return
   237	}
   238	

View as plain text