Initial commit: Go 1.23 release state

This commit is contained in:
Vorapol Rinsatitnon
2024-09-21 23:49:08 +10:00
commit 17cd57a668
13231 changed files with 3114330 additions and 0 deletions

View File

@@ -0,0 +1,82 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bzip2
import (
"bufio"
"io"
)
// bitReader wraps an io.Reader and provides the ability to read values,
// bit-by-bit, from it. Its Read* methods don't return the usual error
// because the error handling was verbose. Instead, any error is kept and can
// be checked afterwards.
type bitReader struct {
r io.ByteReader
n uint64
bits uint
err error
}
// newBitReader returns a new bitReader reading from r. If r is not
// already an io.ByteReader, it will be converted via a bufio.Reader.
func newBitReader(r io.Reader) bitReader {
byter, ok := r.(io.ByteReader)
if !ok {
byter = bufio.NewReader(r)
}
return bitReader{r: byter}
}
// ReadBits64 reads the given number of bits and returns them in the
// least-significant part of a uint64. In the event of an error, it returns 0
// and the error can be obtained by calling bitReader.Err().
func (br *bitReader) ReadBits64(bits uint) (n uint64) {
for bits > br.bits {
b, err := br.r.ReadByte()
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
if err != nil {
br.err = err
return 0
}
br.n <<= 8
br.n |= uint64(b)
br.bits += 8
}
// br.n looks like this (assuming that br.bits = 14 and bits = 6):
// Bit: 111111
// 5432109876543210
//
// (6 bits, the desired output)
// |-----|
// V V
// 0101101101001110
// ^ ^
// |------------|
// br.bits (num valid bits)
//
// The next line right shifts the desired bits into the
// least-significant places and masks off anything above.
n = (br.n >> (br.bits - bits)) & ((1 << bits) - 1)
br.bits -= bits
return
}
func (br *bitReader) ReadBits(bits uint) (n int) {
n64 := br.ReadBits64(bits)
return int(n64)
}
func (br *bitReader) ReadBit() bool {
n := br.ReadBits(1)
return n != 0
}
func (br *bitReader) Err() error {
return br.err
}

500
src/compress/bzip2/bzip2.go Normal file
View File

@@ -0,0 +1,500 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bzip2 implements bzip2 decompression.
package bzip2
import "io"
// There's no RFC for bzip2. I used the Wikipedia page for reference and a lot
// of guessing: https://en.wikipedia.org/wiki/Bzip2
// The source code to pyflate was useful for debugging:
// http://www.paul.sladen.org/projects/pyflate
// A StructuralError is returned when the bzip2 data is found to be
// syntactically invalid.
type StructuralError string
func (s StructuralError) Error() string {
return "bzip2 data invalid: " + string(s)
}
// A reader decompresses bzip2 compressed data.
type reader struct {
br bitReader
fileCRC uint32
blockCRC uint32
wantBlockCRC uint32
setupDone bool // true if we have parsed the bzip2 header.
eof bool
blockSize int // blockSize in bytes, i.e. 900 * 1000.
c [256]uint // the ``C'' array for the inverse BWT.
tt []uint32 // mirrors the ``tt'' array in the bzip2 source and contains the P array in the upper 24 bits.
tPos uint32 // Index of the next output byte in tt.
preRLE []uint32 // contains the RLE data still to be processed.
preRLEUsed int // number of entries of preRLE used.
lastByte int // the last byte value seen.
byteRepeats uint // the number of repeats of lastByte seen.
repeats uint // the number of copies of lastByte to output.
}
// NewReader returns an io.Reader which decompresses bzip2 data from r.
// If r does not also implement [io.ByteReader],
// the decompressor may read more data than necessary from r.
func NewReader(r io.Reader) io.Reader {
bz2 := new(reader)
bz2.br = newBitReader(r)
return bz2
}
const bzip2FileMagic = 0x425a // "BZ"
const bzip2BlockMagic = 0x314159265359
const bzip2FinalMagic = 0x177245385090
// setup parses the bzip2 header.
func (bz2 *reader) setup(needMagic bool) error {
br := &bz2.br
if needMagic {
magic := br.ReadBits(16)
if magic != bzip2FileMagic {
return StructuralError("bad magic value")
}
}
t := br.ReadBits(8)
if t != 'h' {
return StructuralError("non-Huffman entropy encoding")
}
level := br.ReadBits(8)
if level < '1' || level > '9' {
return StructuralError("invalid compression level")
}
bz2.fileCRC = 0
bz2.blockSize = 100 * 1000 * (level - '0')
if bz2.blockSize > len(bz2.tt) {
bz2.tt = make([]uint32, bz2.blockSize)
}
return nil
}
func (bz2 *reader) Read(buf []byte) (n int, err error) {
if bz2.eof {
return 0, io.EOF
}
if !bz2.setupDone {
err = bz2.setup(true)
brErr := bz2.br.Err()
if brErr != nil {
err = brErr
}
if err != nil {
return 0, err
}
bz2.setupDone = true
}
n, err = bz2.read(buf)
brErr := bz2.br.Err()
if brErr != nil {
err = brErr
}
return
}
func (bz2 *reader) readFromBlock(buf []byte) int {
// bzip2 is a block based compressor, except that it has a run-length
// preprocessing step. The block based nature means that we can
// preallocate fixed-size buffers and reuse them. However, the RLE
// preprocessing would require allocating huge buffers to store the
// maximum expansion. Thus we process blocks all at once, except for
// the RLE which we decompress as required.
n := 0
for (bz2.repeats > 0 || bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) {
// We have RLE data pending.
// The run-length encoding works like this:
// Any sequence of four equal bytes is followed by a length
// byte which contains the number of repeats of that byte to
// include. (The number of repeats can be zero.) Because we are
// decompressing on-demand our state is kept in the reader
// object.
if bz2.repeats > 0 {
buf[n] = byte(bz2.lastByte)
n++
bz2.repeats--
if bz2.repeats == 0 {
bz2.lastByte = -1
}
continue
}
bz2.tPos = bz2.preRLE[bz2.tPos]
b := byte(bz2.tPos)
bz2.tPos >>= 8
bz2.preRLEUsed++
if bz2.byteRepeats == 3 {
bz2.repeats = uint(b)
bz2.byteRepeats = 0
continue
}
if bz2.lastByte == int(b) {
bz2.byteRepeats++
} else {
bz2.byteRepeats = 0
}
bz2.lastByte = int(b)
buf[n] = b
n++
}
return n
}
func (bz2 *reader) read(buf []byte) (int, error) {
for {
n := bz2.readFromBlock(buf)
if n > 0 || len(buf) == 0 {
bz2.blockCRC = updateCRC(bz2.blockCRC, buf[:n])
return n, nil
}
// End of block. Check CRC.
if bz2.blockCRC != bz2.wantBlockCRC {
bz2.br.err = StructuralError("block checksum mismatch")
return 0, bz2.br.err
}
// Find next block.
br := &bz2.br
switch br.ReadBits64(48) {
default:
return 0, StructuralError("bad magic value found")
case bzip2BlockMagic:
// Start of block.
err := bz2.readBlock()
if err != nil {
return 0, err
}
case bzip2FinalMagic:
// Check end-of-file CRC.
wantFileCRC := uint32(br.ReadBits64(32))
if br.err != nil {
return 0, br.err
}
if bz2.fileCRC != wantFileCRC {
br.err = StructuralError("file checksum mismatch")
return 0, br.err
}
// Skip ahead to byte boundary.
// Is there a file concatenated to this one?
// It would start with BZ.
if br.bits%8 != 0 {
br.ReadBits(br.bits % 8)
}
b, err := br.r.ReadByte()
if err == io.EOF {
br.err = io.EOF
bz2.eof = true
return 0, io.EOF
}
if err != nil {
br.err = err
return 0, err
}
z, err := br.r.ReadByte()
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
br.err = err
return 0, err
}
if b != 'B' || z != 'Z' {
return 0, StructuralError("bad magic value in continuation file")
}
if err := bz2.setup(false); err != nil {
return 0, err
}
}
}
}
// readBlock reads a bzip2 block. The magic number should already have been consumed.
func (bz2 *reader) readBlock() (err error) {
br := &bz2.br
bz2.wantBlockCRC = uint32(br.ReadBits64(32)) // skip checksum. TODO: check it if we can figure out what it is.
bz2.blockCRC = 0
bz2.fileCRC = (bz2.fileCRC<<1 | bz2.fileCRC>>31) ^ bz2.wantBlockCRC
randomized := br.ReadBits(1)
if randomized != 0 {
return StructuralError("deprecated randomized files")
}
origPtr := uint(br.ReadBits(24))
// If not every byte value is used in the block (i.e., it's text) then
// the symbol set is reduced. The symbols used are stored as a
// two-level, 16x16 bitmap.
symbolRangeUsedBitmap := br.ReadBits(16)
symbolPresent := make([]bool, 256)
numSymbols := 0
for symRange := uint(0); symRange < 16; symRange++ {
if symbolRangeUsedBitmap&(1<<(15-symRange)) != 0 {
bits := br.ReadBits(16)
for symbol := uint(0); symbol < 16; symbol++ {
if bits&(1<<(15-symbol)) != 0 {
symbolPresent[16*symRange+symbol] = true
numSymbols++
}
}
}
}
if numSymbols == 0 {
// There must be an EOF symbol.
return StructuralError("no symbols in input")
}
// A block uses between two and six different Huffman trees.
numHuffmanTrees := br.ReadBits(3)
if numHuffmanTrees < 2 || numHuffmanTrees > 6 {
return StructuralError("invalid number of Huffman trees")
}
// The Huffman tree can switch every 50 symbols so there's a list of
// tree indexes telling us which tree to use for each 50 symbol block.
numSelectors := br.ReadBits(15)
treeIndexes := make([]uint8, numSelectors)
// The tree indexes are move-to-front transformed and stored as unary
// numbers.
mtfTreeDecoder := newMTFDecoderWithRange(numHuffmanTrees)
for i := range treeIndexes {
c := 0
for {
inc := br.ReadBits(1)
if inc == 0 {
break
}
c++
}
if c >= numHuffmanTrees {
return StructuralError("tree index too large")
}
treeIndexes[i] = mtfTreeDecoder.Decode(c)
}
// The list of symbols for the move-to-front transform is taken from
// the previously decoded symbol bitmap.
symbols := make([]byte, numSymbols)
nextSymbol := 0
for i := 0; i < 256; i++ {
if symbolPresent[i] {
symbols[nextSymbol] = byte(i)
nextSymbol++
}
}
mtf := newMTFDecoder(symbols)
numSymbols += 2 // to account for RUNA and RUNB symbols
huffmanTrees := make([]huffmanTree, numHuffmanTrees)
// Now we decode the arrays of code-lengths for each tree.
lengths := make([]uint8, numSymbols)
for i := range huffmanTrees {
// The code lengths are delta encoded from a 5-bit base value.
length := br.ReadBits(5)
for j := range lengths {
for {
if length < 1 || length > 20 {
return StructuralError("Huffman length out of range")
}
if !br.ReadBit() {
break
}
if br.ReadBit() {
length--
} else {
length++
}
}
lengths[j] = uint8(length)
}
huffmanTrees[i], err = newHuffmanTree(lengths)
if err != nil {
return err
}
}
selectorIndex := 1 // the next tree index to use
if len(treeIndexes) == 0 {
return StructuralError("no tree selectors given")
}
if int(treeIndexes[0]) >= len(huffmanTrees) {
return StructuralError("tree selector out of range")
}
currentHuffmanTree := huffmanTrees[treeIndexes[0]]
bufIndex := 0 // indexes bz2.buf, the output buffer.
// The output of the move-to-front transform is run-length encoded and
// we merge the decoding into the Huffman parsing loop. These two
// variables accumulate the repeat count. See the Wikipedia page for
// details.
repeat := 0
repeatPower := 0
// The `C' array (used by the inverse BWT) needs to be zero initialized.
clear(bz2.c[:])
decoded := 0 // counts the number of symbols decoded by the current tree.
for {
if decoded == 50 {
if selectorIndex >= numSelectors {
return StructuralError("insufficient selector indices for number of symbols")
}
if int(treeIndexes[selectorIndex]) >= len(huffmanTrees) {
return StructuralError("tree selector out of range")
}
currentHuffmanTree = huffmanTrees[treeIndexes[selectorIndex]]
selectorIndex++
decoded = 0
}
v := currentHuffmanTree.Decode(br)
decoded++
if v < 2 {
// This is either the RUNA or RUNB symbol.
if repeat == 0 {
repeatPower = 1
}
repeat += repeatPower << v
repeatPower <<= 1
// This limit of 2 million comes from the bzip2 source
// code. It prevents repeat from overflowing.
if repeat > 2*1024*1024 {
return StructuralError("repeat count too large")
}
continue
}
if repeat > 0 {
// We have decoded a complete run-length so we need to
// replicate the last output symbol.
if repeat > bz2.blockSize-bufIndex {
return StructuralError("repeats past end of block")
}
for i := 0; i < repeat; i++ {
b := mtf.First()
bz2.tt[bufIndex] = uint32(b)
bz2.c[b]++
bufIndex++
}
repeat = 0
}
if int(v) == numSymbols-1 {
// This is the EOF symbol. Because it's always at the
// end of the move-to-front list, and never gets moved
// to the front, it has this unique value.
break
}
// Since two metasymbols (RUNA and RUNB) have values 0 and 1,
// one would expect |v-2| to be passed to the MTF decoder.
// However, the front of the MTF list is never referenced as 0,
// it's always referenced with a run-length of 1. Thus 0
// doesn't need to be encoded and we have |v-1| in the next
// line.
b := mtf.Decode(int(v - 1))
if bufIndex >= bz2.blockSize {
return StructuralError("data exceeds block size")
}
bz2.tt[bufIndex] = uint32(b)
bz2.c[b]++
bufIndex++
}
if origPtr >= uint(bufIndex) {
return StructuralError("origPtr out of bounds")
}
// We have completed the entropy decoding. Now we can perform the
// inverse BWT and setup the RLE buffer.
bz2.preRLE = bz2.tt[:bufIndex]
bz2.preRLEUsed = 0
bz2.tPos = inverseBWT(bz2.preRLE, origPtr, bz2.c[:])
bz2.lastByte = -1
bz2.byteRepeats = 0
bz2.repeats = 0
return nil
}
// inverseBWT implements the inverse Burrows-Wheeler transform as described in
// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2.
// In that document, origPtr is called “I” and c is the “C” array after the
// first pass over the data. It's an argument here because we merge the first
// pass with the Huffman decoding.
//
// This also implements the “single array” method from the bzip2 source code
// which leaves the output, still shuffled, in the bottom 8 bits of tt with the
// index of the next byte in the top 24-bits. The index of the first byte is
// returned.
func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 {
sum := uint(0)
for i := 0; i < 256; i++ {
sum += c[i]
c[i] = sum - c[i]
}
for i := range tt {
b := tt[i] & 0xff
tt[c[b]] |= uint32(i) << 8
c[b]++
}
return tt[origPtr] >> 8
}
// This is a standard CRC32 like in hash/crc32 except that all the shifts are reversed,
// causing the bits in the input to be processed in the reverse of the usual order.
var crctab [256]uint32
func init() {
const poly = 0x04C11DB7
for i := range crctab {
crc := uint32(i) << 24
for j := 0; j < 8; j++ {
if crc&0x80000000 != 0 {
crc = (crc << 1) ^ poly
} else {
crc <<= 1
}
}
crctab[i] = crc
}
}
// updateCRC updates the crc value to incorporate the data in b.
// The initial value is 0.
func updateCRC(val uint32, b []byte) uint32 {
crc := ^val
for _, v := range b {
crc = crctab[byte(crc>>24)^v] ^ (crc << 8)
}
return ^crc
}

View File

@@ -0,0 +1,240 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bzip2
import (
"bytes"
"encoding/hex"
"fmt"
"io"
"os"
"testing"
)
func mustDecodeHex(s string) []byte {
b, err := hex.DecodeString(s)
if err != nil {
panic(err)
}
return b
}
func mustLoadFile(f string) []byte {
b, err := os.ReadFile(f)
if err != nil {
panic(err)
}
return b
}
func trim(b []byte) string {
const limit = 1024
if len(b) < limit {
return fmt.Sprintf("%q", b)
}
return fmt.Sprintf("%q...", b[:limit])
}
func TestReader(t *testing.T) {
var vectors = []struct {
desc string
input []byte
output []byte
fail bool
}{{
desc: "hello world",
input: mustDecodeHex("" +
"425a68393141592653594eece83600000251800010400006449080200031064c" +
"4101a7a9a580bb9431f8bb9229c28482776741b0",
),
output: []byte("hello world\n"),
}, {
desc: "concatenated files",
input: mustDecodeHex("" +
"425a68393141592653594eece83600000251800010400006449080200031064c" +
"4101a7a9a580bb9431f8bb9229c28482776741b0425a68393141592653594eec" +
"e83600000251800010400006449080200031064c4101a7a9a580bb9431f8bb92" +
"29c28482776741b0",
),
output: []byte("hello world\nhello world\n"),
}, {
desc: "32B zeros",
input: mustDecodeHex("" +
"425a6839314159265359b5aa5098000000600040000004200021008283177245" +
"385090b5aa5098",
),
output: make([]byte, 32),
}, {
desc: "1MiB zeros",
input: mustDecodeHex("" +
"425a683931415926535938571ce50008084000c0040008200030cc0529a60806" +
"c4201e2ee48a70a12070ae39ca",
),
output: make([]byte, 1<<20),
}, {
desc: "random data",
input: mustLoadFile("testdata/pass-random1.bz2"),
output: mustLoadFile("testdata/pass-random1.bin"),
}, {
desc: "random data - full symbol range",
input: mustLoadFile("testdata/pass-random2.bz2"),
output: mustLoadFile("testdata/pass-random2.bin"),
}, {
desc: "random data - uses RLE1 stage",
input: mustDecodeHex("" +
"425a6839314159265359d992d0f60000137dfe84020310091c1e280e100e0428" +
"01099210094806c0110002e70806402000546034000034000000f28300000320" +
"00d3403264049270eb7a9280d308ca06ad28f6981bee1bf8160727c7364510d7" +
"3a1e123083421b63f031f63993a0f40051fbf177245385090d992d0f60",
),
output: mustDecodeHex("" +
"92d5652616ac444a4a04af1a8a3964aca0450d43d6cf233bd03233f4ba92f871" +
"9e6c2a2bd4f5f88db07ecd0da3a33b263483db9b2c158786ad6363be35d17335" +
"ba",
),
}, {
desc: "1MiB sawtooth",
input: mustLoadFile("testdata/pass-sawtooth.bz2"),
output: func() []byte {
b := make([]byte, 1<<20)
for i := range b {
b[i] = byte(i)
}
return b
}(),
}, {
desc: "RLE2 buffer overrun - issue 5747",
input: mustLoadFile("testdata/fail-issue5747.bz2"),
fail: true,
}, {
desc: "out-of-range selector - issue 8363",
input: mustDecodeHex("" +
"425a68393141592653594eece83600000251800010400006449080200031064c" +
"4101a7a9a580bb943117724538509000000000",
),
fail: true,
}, {
desc: "bad block size - issue 13941",
input: mustDecodeHex("" +
"425a683131415926535936dc55330063ffc0006000200020a40830008b0008b8" +
"bb9229c28481b6e2a998",
),
fail: true,
}, {
desc: "bad huffman delta",
input: mustDecodeHex("" +
"425a6836314159265359b1f7404b000000400040002000217d184682ee48a70a" +
"12163ee80960",
),
fail: true,
}}
for i, v := range vectors {
rd := NewReader(bytes.NewReader(v.input))
buf, err := io.ReadAll(rd)
if fail := bool(err != nil); fail != v.fail {
if fail {
t.Errorf("test %d (%s), unexpected failure: %v", i, v.desc, err)
} else {
t.Errorf("test %d (%s), unexpected success", i, v.desc)
}
}
if !v.fail && !bytes.Equal(buf, v.output) {
t.Errorf("test %d (%s), output mismatch:\ngot %s\nwant %s", i, v.desc, trim(buf), trim(v.output))
}
}
}
func TestBitReader(t *testing.T) {
var vectors = []struct {
nbits uint // Number of bits to read
value int // Expected output value (0 for error)
fail bool // Expected operation failure?
}{
{nbits: 1, value: 1},
{nbits: 1, value: 0},
{nbits: 1, value: 1},
{nbits: 5, value: 11},
{nbits: 32, value: 0x12345678},
{nbits: 15, value: 14495},
{nbits: 3, value: 6},
{nbits: 6, value: 13},
{nbits: 1, fail: true},
}
rd := bytes.NewReader([]byte{0xab, 0x12, 0x34, 0x56, 0x78, 0x71, 0x3f, 0x8d})
br := newBitReader(rd)
for i, v := range vectors {
val := br.ReadBits(v.nbits)
if fail := bool(br.err != nil); fail != v.fail {
if fail {
t.Errorf("test %d, unexpected failure: ReadBits(%d) = %v", i, v.nbits, br.err)
} else {
t.Errorf("test %d, unexpected success: ReadBits(%d) = nil", i, v.nbits)
}
}
if !v.fail && val != v.value {
t.Errorf("test %d, mismatching value: ReadBits(%d) = %d, want %d", i, v.nbits, val, v.value)
}
}
}
func TestMTF(t *testing.T) {
var vectors = []struct {
idx int // Input index
sym uint8 // Expected output symbol
}{
{idx: 1, sym: 1}, // [1 0 2 3 4]
{idx: 0, sym: 1}, // [1 0 2 3 4]
{idx: 1, sym: 0}, // [0 1 2 3 4]
{idx: 4, sym: 4}, // [4 0 1 2 3]
{idx: 1, sym: 0}, // [0 4 1 2 3]
}
mtf := newMTFDecoderWithRange(5)
for i, v := range vectors {
sym := mtf.Decode(v.idx)
t.Log(mtf)
if sym != v.sym {
t.Errorf("test %d, symbol mismatch: Decode(%d) = %d, want %d", i, v.idx, sym, v.sym)
}
}
}
func TestZeroRead(t *testing.T) {
b := mustDecodeHex("425a6839314159265359b5aa5098000000600040000004200021008283177245385090b5aa5098")
r := NewReader(bytes.NewReader(b))
if n, err := r.Read(nil); n != 0 || err != nil {
t.Errorf("Read(nil) = (%d, %v), want (0, nil)", n, err)
}
}
var (
digits = mustLoadFile("testdata/e.txt.bz2")
newton = mustLoadFile("testdata/Isaac.Newton-Opticks.txt.bz2")
random = mustLoadFile("testdata/random.data.bz2")
)
func benchmarkDecode(b *testing.B, compressed []byte) {
// Determine the uncompressed size of testfile.
uncompressedSize, err := io.Copy(io.Discard, NewReader(bytes.NewReader(compressed)))
if err != nil {
b.Fatal(err)
}
b.SetBytes(uncompressedSize)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := bytes.NewReader(compressed)
io.Copy(io.Discard, NewReader(r))
}
}
func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) }
func BenchmarkDecodeNewton(b *testing.B) { benchmarkDecode(b, newton) }
func BenchmarkDecodeRand(b *testing.B) { benchmarkDecode(b, random) }

View File

@@ -0,0 +1,234 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bzip2
import (
"cmp"
"slices"
)
// A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a
// symbol.
type huffmanTree struct {
// nodes contains all the non-leaf nodes in the tree. nodes[0] is the
// root of the tree and nextNode contains the index of the next element
// of nodes to use when the tree is being constructed.
nodes []huffmanNode
nextNode int
}
// A huffmanNode is a node in the tree. left and right contain indexes into the
// nodes slice of the tree. If left or right is invalidNodeValue then the child
// is a left node and its value is in leftValue/rightValue.
//
// The symbols are uint16s because bzip2 encodes not only MTF indexes in the
// tree, but also two magic values for run-length encoding and an EOF symbol.
// Thus there are more than 256 possible symbols.
type huffmanNode struct {
left, right uint16
leftValue, rightValue uint16
}
// invalidNodeValue is an invalid index which marks a leaf node in the tree.
const invalidNodeValue = 0xffff
// Decode reads bits from the given bitReader and navigates the tree until a
// symbol is found.
func (t *huffmanTree) Decode(br *bitReader) (v uint16) {
nodeIndex := uint16(0) // node 0 is the root of the tree.
for {
node := &t.nodes[nodeIndex]
var bit uint16
if br.bits > 0 {
// Get next bit - fast path.
br.bits--
bit = uint16(br.n>>(br.bits&63)) & 1
} else {
// Get next bit - slow path.
// Use ReadBits to retrieve a single bit
// from the underling io.ByteReader.
bit = uint16(br.ReadBits(1))
}
// Trick a compiler into generating conditional move instead of branch,
// by making both loads unconditional.
l, r := node.left, node.right
if bit == 1 {
nodeIndex = l
} else {
nodeIndex = r
}
if nodeIndex == invalidNodeValue {
// We found a leaf. Use the value of bit to decide
// whether is a left or a right value.
l, r := node.leftValue, node.rightValue
if bit == 1 {
v = l
} else {
v = r
}
return
}
}
}
// newHuffmanTree builds a Huffman tree from a slice containing the code
// lengths of each symbol. The maximum code length is 32 bits.
func newHuffmanTree(lengths []uint8) (huffmanTree, error) {
// There are many possible trees that assign the same code length to
// each symbol (consider reflecting a tree down the middle, for
// example). Since the code length assignments determine the
// efficiency of the tree, each of these trees is equally good. In
// order to minimize the amount of information needed to build a tree
// bzip2 uses a canonical tree so that it can be reconstructed given
// only the code length assignments.
if len(lengths) < 2 {
panic("newHuffmanTree: too few symbols")
}
var t huffmanTree
// First we sort the code length assignments by ascending code length,
// using the symbol value to break ties.
pairs := make([]huffmanSymbolLengthPair, len(lengths))
for i, length := range lengths {
pairs[i].value = uint16(i)
pairs[i].length = length
}
slices.SortFunc(pairs, func(a, b huffmanSymbolLengthPair) int {
if c := cmp.Compare(a.length, b.length); c != 0 {
return c
}
return cmp.Compare(a.value, b.value)
})
// Now we assign codes to the symbols, starting with the longest code.
// We keep the codes packed into a uint32, at the most-significant end.
// So branches are taken from the MSB downwards. This makes it easy to
// sort them later.
code := uint32(0)
length := uint8(32)
codes := make([]huffmanCode, len(lengths))
for i := len(pairs) - 1; i >= 0; i-- {
if length > pairs[i].length {
length = pairs[i].length
}
codes[i].code = code
codes[i].codeLen = length
codes[i].value = pairs[i].value
// We need to 'increment' the code, which means treating |code|
// like a |length| bit number.
code += 1 << (32 - length)
}
// Now we can sort by the code so that the left half of each branch are
// grouped together, recursively.
slices.SortFunc(codes, func(a, b huffmanCode) int {
return cmp.Compare(a.code, b.code)
})
t.nodes = make([]huffmanNode, len(codes))
_, err := buildHuffmanNode(&t, codes, 0)
return t, err
}
// huffmanSymbolLengthPair contains a symbol and its code length.
type huffmanSymbolLengthPair struct {
value uint16
length uint8
}
// huffmanCode contains a symbol, its code and code length.
type huffmanCode struct {
code uint32
codeLen uint8
value uint16
}
// buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in
// the Huffman tree at the given level. It returns the index of the newly
// constructed node.
func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err error) {
test := uint32(1) << (31 - level)
// We have to search the list of codes to find the divide between the left and right sides.
firstRightIndex := len(codes)
for i, code := range codes {
if code.code&test != 0 {
firstRightIndex = i
break
}
}
left := codes[:firstRightIndex]
right := codes[firstRightIndex:]
if len(left) == 0 || len(right) == 0 {
// There is a superfluous level in the Huffman tree indicating
// a bug in the encoder. However, this bug has been observed in
// the wild so we handle it.
// If this function was called recursively then we know that
// len(codes) >= 2 because, otherwise, we would have hit the
// "leaf node" case, below, and not recurred.
//
// However, for the initial call it's possible that len(codes)
// is zero or one. Both cases are invalid because a zero length
// tree cannot encode anything and a length-1 tree can only
// encode EOF and so is superfluous. We reject both.
if len(codes) < 2 {
return 0, StructuralError("empty Huffman tree")
}
// In this case the recursion doesn't always reduce the length
// of codes so we need to ensure termination via another
// mechanism.
if level == 31 {
// Since len(codes) >= 2 the only way that the values
// can match at all 32 bits is if they are equal, which
// is invalid. This ensures that we never enter
// infinite recursion.
return 0, StructuralError("equal symbols in Huffman tree")
}
if len(left) == 0 {
return buildHuffmanNode(t, right, level+1)
}
return buildHuffmanNode(t, left, level+1)
}
nodeIndex = uint16(t.nextNode)
node := &t.nodes[t.nextNode]
t.nextNode++
if len(left) == 1 {
// leaf node
node.left = invalidNodeValue
node.leftValue = left[0].value
} else {
node.left, err = buildHuffmanNode(t, left, level+1)
}
if err != nil {
return
}
if len(right) == 1 {
// leaf node
node.right = invalidNodeValue
node.rightValue = right[0].value
} else {
node.right, err = buildHuffmanNode(t, right, level+1)
}
return
}

View File

@@ -0,0 +1,53 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bzip2
// moveToFrontDecoder implements a move-to-front list. Such a list is an
// efficient way to transform a string with repeating elements into one with
// many small valued numbers, which is suitable for entropy encoding. It works
// by starting with an initial list of symbols and references symbols by their
// index into that list. When a symbol is referenced, it's moved to the front
// of the list. Thus, a repeated symbol ends up being encoded with many zeros,
// as the symbol will be at the front of the list after the first access.
type moveToFrontDecoder []byte
// newMTFDecoder creates a move-to-front decoder with an explicit initial list
// of symbols.
func newMTFDecoder(symbols []byte) moveToFrontDecoder {
if len(symbols) > 256 {
panic("too many symbols")
}
return moveToFrontDecoder(symbols)
}
// newMTFDecoderWithRange creates a move-to-front decoder with an initial
// symbol list of 0...n-1.
func newMTFDecoderWithRange(n int) moveToFrontDecoder {
if n > 256 {
panic("newMTFDecoderWithRange: cannot have > 256 symbols")
}
m := make([]byte, n)
for i := 0; i < n; i++ {
m[i] = byte(i)
}
return moveToFrontDecoder(m)
}
func (m moveToFrontDecoder) Decode(n int) (b byte) {
// Implement move-to-front with a simple copy. This approach
// beats more sophisticated approaches in benchmarking, probably
// because it has high locality of reference inside of a
// single cache line (most move-to-front operations have n < 64).
b = m[n]
copy(m[1:], m[:n])
m[0] = b
return
}
// First returns the symbol at the front of the list.
func (m moveToFrontDecoder) First() byte {
return m[0]
}

Binary file not shown.

BIN
src/compress/bzip2/testdata/e.txt.bz2 vendored Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
<EFBFBD><EFBFBD>e&<16>DJJ<04><1A>9d<39><64>E

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,747 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"errors"
"fmt"
"io"
"math"
)
const (
NoCompression = 0
BestSpeed = 1
BestCompression = 9
DefaultCompression = -1
// HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman
// entropy encoding. This mode is useful in compressing data that has
// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
// that lacks an entropy encoder. Compression gains are achieved when
// certain bytes in the input stream occur more frequently than others.
//
// Note that HuffmanOnly produces a compressed output that is
// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
// continue to be able to decompress this output.
HuffmanOnly = -2
)
const (
logWindowSize = 15
windowSize = 1 << logWindowSize
windowMask = windowSize - 1
// The LZ77 step produces a sequence of literal tokens and <length, offset>
// pair tokens. The offset is also known as distance. The underlying wire
// format limits the range of lengths and offsets. For example, there are
// 256 legitimate lengths: those in the range [3, 258]. This package's
// compressor uses a higher minimum match length, enabling optimizations
// such as finding matches via 32-bit loads and compares.
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
minMatchLength = 4 // The smallest match length that the compressor actually emits
maxMatchLength = 258 // The largest match length
baseMatchOffset = 1 // The smallest match offset
maxMatchOffset = 1 << 15 // The largest match offset
// The maximum number of tokens we put into a single flate block, just to
// stop things from getting too large.
maxFlateBlockTokens = 1 << 14
maxStoreBlockSize = 65535
hashBits = 17 // After 17 performance degrades
hashSize = 1 << hashBits
hashMask = (1 << hashBits) - 1
maxHashOffset = 1 << 24
skipNever = math.MaxInt32
)
type compressionLevel struct {
level, good, lazy, nice, chain, fastSkipHashing int
}
var levels = []compressionLevel{
{0, 0, 0, 0, 0, 0}, // NoCompression.
{1, 0, 0, 0, 0, 0}, // BestSpeed uses a custom algorithm; see deflatefast.go.
// For levels 2-3 we don't bother trying with lazy matches.
{2, 4, 0, 16, 8, 5},
{3, 4, 0, 32, 32, 6},
// Levels 4-9 use increasingly more lazy matching
// and increasingly stringent conditions for "good enough".
{4, 4, 4, 16, 16, skipNever},
{5, 8, 16, 32, 32, skipNever},
{6, 8, 16, 128, 128, skipNever},
{7, 8, 32, 128, 256, skipNever},
{8, 32, 128, 258, 1024, skipNever},
{9, 32, 258, 258, 4096, skipNever},
}
type compressor struct {
compressionLevel
w *huffmanBitWriter
bulkHasher func([]byte, []uint32)
// compression algorithm
fill func(*compressor, []byte) int // copy data to window
step func(*compressor) // process window
bestSpeed *deflateFast // Encoder for BestSpeed
// Input hash chains
// hashHead[hashValue] contains the largest inputIndex with the specified hash value
// If hashHead[hashValue] is within the current window, then
// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
// with the same hash value.
chainHead int
hashHead [hashSize]uint32
hashPrev [windowSize]uint32
hashOffset int
// input window: unprocessed data is window[index:windowEnd]
index int
window []byte
windowEnd int
blockStart int // window index where current tokens start
byteAvailable bool // if true, still need to process window[index-1].
sync bool // requesting flush
// queued output tokens
tokens []token
// deflate state
length int
offset int
maxInsertIndex int
err error
// hashMatch must be able to contain hashes for the maximum match length.
hashMatch [maxMatchLength - 1]uint32
}
func (d *compressor) fillDeflate(b []byte) int {
if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
// shift the window by windowSize
copy(d.window, d.window[windowSize:2*windowSize])
d.index -= windowSize
d.windowEnd -= windowSize
if d.blockStart >= windowSize {
d.blockStart -= windowSize
} else {
d.blockStart = math.MaxInt32
}
d.hashOffset += windowSize
if d.hashOffset > maxHashOffset {
delta := d.hashOffset - 1
d.hashOffset -= delta
d.chainHead -= delta
// Iterate over slices instead of arrays to avoid copying
// the entire table onto the stack (Issue #18625).
for i, v := range d.hashPrev[:] {
if int(v) > delta {
d.hashPrev[i] = uint32(int(v) - delta)
} else {
d.hashPrev[i] = 0
}
}
for i, v := range d.hashHead[:] {
if int(v) > delta {
d.hashHead[i] = uint32(int(v) - delta)
} else {
d.hashHead[i] = 0
}
}
}
}
n := copy(d.window[d.windowEnd:], b)
d.windowEnd += n
return n
}
func (d *compressor) writeBlock(tokens []token, index int) error {
if index > 0 {
var window []byte
if d.blockStart <= index {
window = d.window[d.blockStart:index]
}
d.blockStart = index
d.w.writeBlock(tokens, false, window)
return d.w.err
}
return nil
}
// fillWindow will fill the current window with the supplied
// dictionary and calculate all hashes.
// This is much faster than doing a full encode.
// Should only be used after a reset.
func (d *compressor) fillWindow(b []byte) {
// Do not fill window if we are in store-only mode.
if d.compressionLevel.level < 2 {
return
}
if d.index != 0 || d.windowEnd != 0 {
panic("internal error: fillWindow called with stale data")
}
// If we are given too much, cut it.
if len(b) > windowSize {
b = b[len(b)-windowSize:]
}
// Add all to window.
n := copy(d.window, b)
// Calculate 256 hashes at the time (more L1 cache hits)
loops := (n + 256 - minMatchLength) / 256
for j := 0; j < loops; j++ {
index := j * 256
end := index + 256 + minMatchLength - 1
if end > n {
end = n
}
toCheck := d.window[index:end]
dstSize := len(toCheck) - minMatchLength + 1
if dstSize <= 0 {
continue
}
dst := d.hashMatch[:dstSize]
d.bulkHasher(toCheck, dst)
for i, val := range dst {
di := i + index
hh := &d.hashHead[val&hashMask]
// Get previous value with the same hash.
// Our chain should point to the previous value.
d.hashPrev[di&windowMask] = *hh
// Set the head of the hash chain to us.
*hh = uint32(di + d.hashOffset)
}
}
// Update window information.
d.windowEnd = n
d.index = n
}
// Try to find a match starting at index whose length is greater than prevSize.
// We only look at chainCount possibilities before giving up.
func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
minMatchLook := maxMatchLength
if lookahead < minMatchLook {
minMatchLook = lookahead
}
win := d.window[0 : pos+minMatchLook]
// We quit when we get a match that's at least nice long
nice := len(win) - pos
if d.nice < nice {
nice = d.nice
}
// If we've got a match that's good enough, only look in 1/4 the chain.
tries := d.chain
length = prevLength
if length >= d.good {
tries >>= 2
}
wEnd := win[pos+length]
wPos := win[pos:]
minIndex := pos - windowSize
for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] {
n := matchLen(win[i:], wPos, minMatchLook)
if n > length && (n > minMatchLength || pos-i <= 4096) {
length = n
offset = pos - i
ok = true
if n >= nice {
// The match is good enough that we don't try to find a better one.
break
}
wEnd = win[pos+n]
}
}
if i == minIndex {
// hashPrev[i & windowMask] has already been overwritten, so stop now.
break
}
i = int(d.hashPrev[i&windowMask]) - d.hashOffset
if i < minIndex || i < 0 {
break
}
}
return
}
func (d *compressor) writeStoredBlock(buf []byte) error {
if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
return d.w.err
}
d.w.writeBytes(buf)
return d.w.err
}
const hashmul = 0x1e35a7bd
// hash4 returns a hash representation of the first 4 bytes
// of the supplied slice.
// The caller must ensure that len(b) >= 4.
func hash4(b []byte) uint32 {
return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits)
}
// bulkHash4 will compute hashes using the same
// algorithm as hash4.
func bulkHash4(b []byte, dst []uint32) {
if len(b) < minMatchLength {
return
}
hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
dst[0] = (hb * hashmul) >> (32 - hashBits)
end := len(b) - minMatchLength + 1
for i := 1; i < end; i++ {
hb = (hb << 8) | uint32(b[i+3])
dst[i] = (hb * hashmul) >> (32 - hashBits)
}
}
// matchLen returns the number of matching bytes in a and b
// up to length 'max'. Both slices must be at least 'max'
// bytes in size.
func matchLen(a, b []byte, max int) int {
a = a[:max]
b = b[:len(a)]
for i, av := range a {
if b[i] != av {
return i
}
}
return max
}
// encSpeed will compress and store the currently added data,
// if enough has been accumulated or we at the end of the stream.
// Any error that occurred will be in d.err
func (d *compressor) encSpeed() {
// We only compress if we have maxStoreBlockSize.
if d.windowEnd < maxStoreBlockSize {
if !d.sync {
return
}
// Handle small sizes.
if d.windowEnd < 128 {
switch {
case d.windowEnd == 0:
return
case d.windowEnd <= 16:
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
default:
d.w.writeBlockHuff(false, d.window[:d.windowEnd])
d.err = d.w.err
}
d.windowEnd = 0
d.bestSpeed.reset()
return
}
}
// Encode the block.
d.tokens = d.bestSpeed.encode(d.tokens[:0], d.window[:d.windowEnd])
// If we removed less than 1/16th, Huffman compress the block.
if len(d.tokens) > d.windowEnd-(d.windowEnd>>4) {
d.w.writeBlockHuff(false, d.window[:d.windowEnd])
} else {
d.w.writeBlockDynamic(d.tokens, false, d.window[:d.windowEnd])
}
d.err = d.w.err
d.windowEnd = 0
}
func (d *compressor) initDeflate() {
d.window = make([]byte, 2*windowSize)
d.hashOffset = 1
d.tokens = make([]token, 0, maxFlateBlockTokens+1)
d.length = minMatchLength - 1
d.offset = 0
d.byteAvailable = false
d.index = 0
d.chainHead = -1
d.bulkHasher = bulkHash4
}
func (d *compressor) deflate() {
if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
return
}
d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
Loop:
for {
if d.index > d.windowEnd {
panic("index > windowEnd")
}
lookahead := d.windowEnd - d.index
if lookahead < minMatchLength+maxMatchLength {
if !d.sync {
break Loop
}
if d.index > d.windowEnd {
panic("index > windowEnd")
}
if lookahead == 0 {
// Flush current output block if any.
if d.byteAvailable {
// There is still one pending token that needs to be flushed
d.tokens = append(d.tokens, literalToken(uint32(d.window[d.index-1])))
d.byteAvailable = false
}
if len(d.tokens) > 0 {
if d.err = d.writeBlock(d.tokens, d.index); d.err != nil {
return
}
d.tokens = d.tokens[:0]
}
break Loop
}
}
if d.index < d.maxInsertIndex {
// Update the hash
hash := hash4(d.window[d.index : d.index+minMatchLength])
hh := &d.hashHead[hash&hashMask]
d.chainHead = int(*hh)
d.hashPrev[d.index&windowMask] = uint32(d.chainHead)
*hh = uint32(d.index + d.hashOffset)
}
prevLength := d.length
prevOffset := d.offset
d.length = minMatchLength - 1
d.offset = 0
minIndex := d.index - windowSize
if minIndex < 0 {
minIndex = 0
}
if d.chainHead-d.hashOffset >= minIndex &&
(d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 ||
d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) {
if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
d.length = newLength
d.offset = newOffset
}
}
if d.fastSkipHashing != skipNever && d.length >= minMatchLength ||
d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength {
// There was a match at the previous step, and the current match is
// not better. Output the previous match.
if d.fastSkipHashing != skipNever {
d.tokens = append(d.tokens, matchToken(uint32(d.length-baseMatchLength), uint32(d.offset-baseMatchOffset)))
} else {
d.tokens = append(d.tokens, matchToken(uint32(prevLength-baseMatchLength), uint32(prevOffset-baseMatchOffset)))
}
// Insert in the hash table all strings up to the end of the match.
// index and index-1 are already inserted. If there is not enough
// lookahead, the last two strings are not inserted into the hash
// table.
if d.length <= d.fastSkipHashing {
var newIndex int
if d.fastSkipHashing != skipNever {
newIndex = d.index + d.length
} else {
newIndex = d.index + prevLength - 1
}
index := d.index
for index++; index < newIndex; index++ {
if index < d.maxInsertIndex {
hash := hash4(d.window[index : index+minMatchLength])
// Get previous value with the same hash.
// Our chain should point to the previous value.
hh := &d.hashHead[hash&hashMask]
d.hashPrev[index&windowMask] = *hh
// Set the head of the hash chain to us.
*hh = uint32(index + d.hashOffset)
}
}
d.index = index
if d.fastSkipHashing == skipNever {
d.byteAvailable = false
d.length = minMatchLength - 1
}
} else {
// For matches this long, we don't bother inserting each individual
// item into the table.
d.index += d.length
}
if len(d.tokens) == maxFlateBlockTokens {
// The block includes the current character
if d.err = d.writeBlock(d.tokens, d.index); d.err != nil {
return
}
d.tokens = d.tokens[:0]
}
} else {
if d.fastSkipHashing != skipNever || d.byteAvailable {
i := d.index - 1
if d.fastSkipHashing != skipNever {
i = d.index
}
d.tokens = append(d.tokens, literalToken(uint32(d.window[i])))
if len(d.tokens) == maxFlateBlockTokens {
if d.err = d.writeBlock(d.tokens, i+1); d.err != nil {
return
}
d.tokens = d.tokens[:0]
}
}
d.index++
if d.fastSkipHashing == skipNever {
d.byteAvailable = true
}
}
}
}
func (d *compressor) fillStore(b []byte) int {
n := copy(d.window[d.windowEnd:], b)
d.windowEnd += n
return n
}
func (d *compressor) store() {
if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) {
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
d.windowEnd = 0
}
}
// storeHuff compresses and stores the currently added data
// when the d.window is full or we are at the end of the stream.
// Any error that occurred will be in d.err
func (d *compressor) storeHuff() {
if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
return
}
d.w.writeBlockHuff(false, d.window[:d.windowEnd])
d.err = d.w.err
d.windowEnd = 0
}
func (d *compressor) write(b []byte) (n int, err error) {
if d.err != nil {
return 0, d.err
}
n = len(b)
for len(b) > 0 {
d.step(d)
b = b[d.fill(d, b):]
if d.err != nil {
return 0, d.err
}
}
return n, nil
}
func (d *compressor) syncFlush() error {
if d.err != nil {
return d.err
}
d.sync = true
d.step(d)
if d.err == nil {
d.w.writeStoredHeader(0, false)
d.w.flush()
d.err = d.w.err
}
d.sync = false
return d.err
}
func (d *compressor) init(w io.Writer, level int) (err error) {
d.w = newHuffmanBitWriter(w)
switch {
case level == NoCompression:
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillStore
d.step = (*compressor).store
case level == HuffmanOnly:
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillStore
d.step = (*compressor).storeHuff
case level == BestSpeed:
d.compressionLevel = levels[level]
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillStore
d.step = (*compressor).encSpeed
d.bestSpeed = newDeflateFast()
d.tokens = make([]token, maxStoreBlockSize)
case level == DefaultCompression:
level = 6
fallthrough
case 2 <= level && level <= 9:
d.compressionLevel = levels[level]
d.initDeflate()
d.fill = (*compressor).fillDeflate
d.step = (*compressor).deflate
default:
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
}
return nil
}
func (d *compressor) reset(w io.Writer) {
d.w.reset(w)
d.sync = false
d.err = nil
switch d.compressionLevel.level {
case NoCompression:
d.windowEnd = 0
case BestSpeed:
d.windowEnd = 0
d.tokens = d.tokens[:0]
d.bestSpeed.reset()
default:
d.chainHead = -1
for i := range d.hashHead {
d.hashHead[i] = 0
}
for i := range d.hashPrev {
d.hashPrev[i] = 0
}
d.hashOffset = 1
d.index, d.windowEnd = 0, 0
d.blockStart, d.byteAvailable = 0, false
d.tokens = d.tokens[:0]
d.length = minMatchLength - 1
d.offset = 0
d.maxInsertIndex = 0
}
}
func (d *compressor) close() error {
if d.err == errWriterClosed {
return nil
}
if d.err != nil {
return d.err
}
d.sync = true
d.step(d)
if d.err != nil {
return d.err
}
if d.w.writeStoredHeader(0, true); d.w.err != nil {
return d.w.err
}
d.w.flush()
if d.w.err != nil {
return d.w.err
}
d.err = errWriterClosed
return nil
}
// NewWriter returns a new [Writer] compressing data at the given level.
// Following zlib, levels range from 1 ([BestSpeed]) to 9 ([BestCompression]);
// higher levels typically run slower but compress more. Level 0
// ([NoCompression]) does not attempt any compression; it only adds the
// necessary DEFLATE framing.
// Level -1 ([DefaultCompression]) uses the default compression level.
// Level -2 ([HuffmanOnly]) will use Huffman compression only, giving
// a very fast compression for all types of input, but sacrificing considerable
// compression efficiency.
//
// If level is in the range [-2, 9] then the error returned will be nil.
// Otherwise the error returned will be non-nil.
func NewWriter(w io.Writer, level int) (*Writer, error) {
var dw Writer
if err := dw.d.init(w, level); err != nil {
return nil, err
}
return &dw, nil
}
// NewWriterDict is like [NewWriter] but initializes the new
// [Writer] with a preset dictionary. The returned [Writer] behaves
// as if the dictionary had been written to it without producing
// any compressed output. The compressed data written to w
// can only be decompressed by a [Reader] initialized with the
// same dictionary.
func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
dw := &dictWriter{w}
zw, err := NewWriter(dw, level)
if err != nil {
return nil, err
}
zw.d.fillWindow(dict)
zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
return zw, err
}
type dictWriter struct {
w io.Writer
}
func (w *dictWriter) Write(b []byte) (n int, err error) {
return w.w.Write(b)
}
var errWriterClosed = errors.New("flate: closed writer")
// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see [NewWriter]).
type Writer struct {
d compressor
dict []byte
}
// Write writes data to w, which will eventually write the
// compressed form of data to its underlying writer.
func (w *Writer) Write(data []byte) (n int, err error) {
return w.d.write(data)
}
// Flush flushes any pending data to the underlying writer.
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet.
// Flush does not return until the data has been written.
// Calling Flush when there is no pending data still causes the [Writer]
// to emit a sync marker of at least 4 bytes.
// If the underlying writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (w *Writer) Flush() error {
// For more about flushing:
// https://www.bolet.org/~pornin/deflate-flush.html
return w.d.syncFlush()
}
// Close flushes and closes the writer.
func (w *Writer) Close() error {
return w.d.close()
}
// Reset discards the writer's state and makes it equivalent to
// the result of [NewWriter] or [NewWriterDict] called with dst
// and w's level and dictionary.
func (w *Writer) Reset(dst io.Writer) {
if dw, ok := w.d.w.writer.(*dictWriter); ok {
// w was created with NewWriterDict
dw.w = dst
w.d.reset(dw)
w.d.fillWindow(w.dict)
} else {
// w was created with NewWriter
w.d.reset(dst)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,309 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import "math"
// This encoding algorithm, which prioritizes speed over output size, is
// based on Snappy's LZ77-style encoder: github.com/golang/snappy
const (
tableBits = 14 // Bits used in the table.
tableSize = 1 << tableBits // Size of the table.
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
// Reset the buffer offset when reaching this.
// Offsets are stored between blocks as int32 values.
// Since the offset we are checking against is at the beginning
// of the buffer, we need to subtract the current and input
// buffer to not risk overflowing the int32.
bufferReset = math.MaxInt32 - maxStoreBlockSize*2
)
func load32(b []byte, i int32) uint32 {
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load64(b []byte, i int32) uint64 {
b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func hash(u uint32) uint32 {
return (u * 0x1e35a7bd) >> tableShift
}
// These constants are defined by the Snappy implementation so that its
// assembly implementation can fast-path some 16-bytes-at-a-time copies. They
// aren't necessary in the pure Go implementation, as we don't use those same
// optimizations, but using the same thresholds doesn't really hurt.
const (
inputMargin = 16 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
type tableEntry struct {
val uint32 // Value at destination
offset int32
}
// deflateFast maintains the table for matches,
// and the previous byte block for cross block matching.
type deflateFast struct {
table [tableSize]tableEntry
prev []byte // Previous block, zero length if unknown.
cur int32 // Current match offset.
}
func newDeflateFast() *deflateFast {
return &deflateFast{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}
}
// encode encodes a block given in src and appends tokens
// to dst and returns the result.
func (e *deflateFast) encode(dst []token, src []byte) []token {
// Ensure that e.cur doesn't wrap.
if e.cur >= bufferReset {
e.shiftOffsets()
}
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
e.cur += maxStoreBlockSize
e.prev = e.prev[:0]
return emitLiteral(dst, src)
}
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := int32(0)
s := int32(0)
cv := load32(src, s)
nextHash := hash(cv)
for {
// Copied from the C++ snappy implementation:
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned (or skipped), look at every third byte, etc.. When a match
// is found, immediately go back to looking at every byte. This is a
// small loss (~5% performance, ~0.1% density) for compressible data
// due to more bookkeeping, but for non-compressible data (such as
// JPEG) it's a huge win since the compressor quickly "realizes" the
// data is incompressible and doesn't bother looking for matches
// everywhere.
//
// The "skip" variable keeps track of how many bytes there are since
// the last match; dividing it by 32 (ie. right-shifting by five) gives
// the number of bytes to move ahead for each iteration.
skip := int32(32)
nextS := s
var candidate tableEntry
for {
s = nextS
bytesBetweenHashLookups := skip >> 5
nextS = s + bytesBetweenHashLookups
skip += bytesBetweenHashLookups
if nextS > sLimit {
goto emitRemainder
}
candidate = e.table[nextHash&tableMask]
now := load32(src, nextS)
e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
nextHash = hash(now)
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || cv != candidate.val {
// Out of range or not matched.
cv = now
continue
}
break
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
dst = emitLiteral(dst, src[nextEmit:s])
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
//
s += 4
t := candidate.offset - e.cur + 4
l := e.matchLen(s, t, src)
// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
dst = append(dst, matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset)))
s += l
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load64(src, s-1)
prevHash := hash(uint32(x))
e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
x >>= 8
currHash := hash(uint32(x))
candidate = e.table[currHash&tableMask]
e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != candidate.val {
cv = uint32(x >> 8)
nextHash = hash(cv)
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
dst = emitLiteral(dst, src[nextEmit:])
}
e.cur += int32(len(src))
e.prev = e.prev[:len(src)]
copy(e.prev, src)
return dst
}
func emitLiteral(dst []token, lit []byte) []token {
for _, v := range lit {
dst = append(dst, literalToken(uint32(v)))
}
return dst
}
// matchLen returns the match length between src[s:] and src[t:].
// t can be negative to indicate the match is starting in e.prev.
// We assume that src[s-4:s] and src[t-4:t] already match.
func (e *deflateFast) matchLen(s, t int32, src []byte) int32 {
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
// If we are inside the current block
if t >= 0 {
b := src[t:]
a := src[s:s1]
b = b[:len(a)]
// Extend the match to be as long as possible.
for i := range a {
if a[i] != b[i] {
return int32(i)
}
}
return int32(len(a))
}
// We found a match in the previous block.
tp := int32(len(e.prev)) + t
if tp < 0 {
return 0
}
// Extend the match to be as long as possible.
a := src[s:s1]
b := e.prev[tp:]
if len(b) > len(a) {
b = b[:len(a)]
}
a = a[:len(b)]
for i := range b {
if a[i] != b[i] {
return int32(i)
}
}
// If we reached our limit, we matched everything we are
// allowed to in the previous block and we return.
n := int32(len(b))
if int(s+n) == s1 {
return n
}
// Continue looking for more matches in the current block.
a = src[s+n : s1]
b = src[:len(a)]
for i := range a {
if a[i] != b[i] {
return int32(i) + n
}
}
return int32(len(a)) + n
}
// Reset resets the encoding history.
// This ensures that no matches are made to the previous block.
func (e *deflateFast) reset() {
e.prev = e.prev[:0]
// Bump the offset, so all matches will fail distance check.
// Nothing should be >= e.cur in the table.
e.cur += maxMatchOffset
// Protect against e.cur wraparound.
if e.cur >= bufferReset {
e.shiftOffsets()
}
}
// shiftOffsets will shift down all match offset.
// This is only called in rare situations to prevent integer overflow.
//
// See https://golang.org/issue/18636 and https://github.com/golang/go/issues/34121.
func (e *deflateFast) shiftOffsets() {
if len(e.prev) == 0 {
// We have no history; just clear the table.
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.cur = maxMatchOffset + 1
return
}
// Shift down everything in the table that isn't already too far away.
for i := range e.table[:] {
v := e.table[i].offset - e.cur + maxMatchOffset + 1
if v < 0 {
// We want to reset e.cur to maxMatchOffset + 1, so we need to shift
// all table entries down by (e.cur - (maxMatchOffset + 1)).
// Because we ignore matches > maxMatchOffset, we can cap
// any negative offsets at 0.
v = 0
}
e.table[i].offset = v
}
e.cur = maxMatchOffset + 1
}

View File

@@ -0,0 +1,182 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
// LZ77 decompresses data through sequences of two forms of commands:
//
// - Literal insertions: Runs of one or more symbols are inserted into the data
// stream as is. This is accomplished through the writeByte method for a
// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
// Any valid stream must start with a literal insertion if no preset dictionary
// is used.
//
// - Backward copies: Runs of one or more symbols are copied from previously
// emitted data. Backward copies come as the tuple (dist, length) where dist
// determines how far back in the stream to copy from and length determines how
// many bytes to copy. Note that it is valid for the length to be greater than
// the distance. Since LZ77 uses forward copies, that situation is used to
// perform a form of run-length encoding on repeated runs of symbols.
// The writeCopy and tryWriteCopy are used to implement this command.
//
// For performance reasons, this implementation performs little to no sanity
// checks about the arguments. As such, the invariants documented for each
// method call must be respected.
type dictDecoder struct {
hist []byte // Sliding window history
// Invariant: 0 <= rdPos <= wrPos <= len(hist)
wrPos int // Current output position in buffer
rdPos int // Have emitted hist[:rdPos] already
full bool // Has a full window length been written yet?
}
// init initializes dictDecoder to have a sliding window dictionary of the given
// size. If a preset dict is provided, it will initialize the dictionary with
// the contents of dict.
func (dd *dictDecoder) init(size int, dict []byte) {
*dd = dictDecoder{hist: dd.hist}
if cap(dd.hist) < size {
dd.hist = make([]byte, size)
}
dd.hist = dd.hist[:size]
if len(dict) > len(dd.hist) {
dict = dict[len(dict)-len(dd.hist):]
}
dd.wrPos = copy(dd.hist, dict)
if dd.wrPos == len(dd.hist) {
dd.wrPos = 0
dd.full = true
}
dd.rdPos = dd.wrPos
}
// histSize reports the total amount of historical data in the dictionary.
func (dd *dictDecoder) histSize() int {
if dd.full {
return len(dd.hist)
}
return dd.wrPos
}
// availRead reports the number of bytes that can be flushed by readFlush.
func (dd *dictDecoder) availRead() int {
return dd.wrPos - dd.rdPos
}
// availWrite reports the available amount of output buffer space.
func (dd *dictDecoder) availWrite() int {
return len(dd.hist) - dd.wrPos
}
// writeSlice returns a slice of the available buffer to write data to.
//
// This invariant will be kept: len(s) <= availWrite()
func (dd *dictDecoder) writeSlice() []byte {
return dd.hist[dd.wrPos:]
}
// writeMark advances the writer pointer by cnt.
//
// This invariant must be kept: 0 <= cnt <= availWrite()
func (dd *dictDecoder) writeMark(cnt int) {
dd.wrPos += cnt
}
// writeByte writes a single byte to the dictionary.
//
// This invariant must be kept: 0 < availWrite()
func (dd *dictDecoder) writeByte(c byte) {
dd.hist[dd.wrPos] = c
dd.wrPos++
}
// writeCopy copies a string at a given (dist, length) to the output.
// This returns the number of bytes copied and may be less than the requested
// length if the available space in the output buffer is too small.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) writeCopy(dist, length int) int {
dstBase := dd.wrPos
dstPos := dstBase
srcPos := dstPos - dist
endPos := dstPos + length
if endPos > len(dd.hist) {
endPos = len(dd.hist)
}
// Copy non-overlapping section after destination position.
//
// This section is non-overlapping in that the copy length for this section
// is always less than or equal to the backwards distance. This can occur
// if a distance refers to data that wraps-around in the buffer.
// Thus, a backwards copy is performed here; that is, the exact bytes in
// the source prior to the copy is placed in the destination.
if srcPos < 0 {
srcPos += len(dd.hist)
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
srcPos = 0
}
// Copy possibly overlapping section before destination position.
//
// This section can overlap if the copy length for this section is larger
// than the backwards distance. This is allowed by LZ77 so that repeated
// strings can be succinctly represented using (dist, length) pairs.
// Thus, a forwards copy is performed here; that is, the bytes copied is
// possibly dependent on the resulting bytes in the destination as the copy
// progresses along. This is functionally equivalent to the following:
//
// for i := 0; i < endPos-dstPos; i++ {
// dd.hist[dstPos+i] = dd.hist[srcPos+i]
// }
// dstPos = endPos
//
for dstPos < endPos {
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// tryWriteCopy tries to copy a string at a given (distance, length) to the
// output. This specialized version is optimized for short distances.
//
// This method is designed to be inlined for performance reasons.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
dstPos := dd.wrPos
endPos := dstPos + length
if dstPos < dist || endPos > len(dd.hist) {
return 0
}
dstBase := dstPos
srcPos := dstPos - dist
// Copy possibly overlapping section before destination position.
for dstPos < endPos {
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// readFlush returns a slice of the historical buffer that is ready to be
// emitted to the user. The data returned by readFlush must be fully consumed
// before calling any other dictDecoder methods.
func (dd *dictDecoder) readFlush() []byte {
toRead := dd.hist[dd.rdPos:dd.wrPos]
dd.rdPos = dd.wrPos
if dd.wrPos == len(dd.hist) {
dd.wrPos, dd.rdPos = 0, 0
dd.full = true
}
return toRead
}

View File

@@ -0,0 +1,139 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"bytes"
"strings"
"testing"
)
func TestDictDecoder(t *testing.T) {
const (
abc = "ABC\n"
fox = "The quick brown fox jumped over the lazy dog!\n"
poem = "The Road Not Taken\nRobert Frost\n" +
"\n" +
"Two roads diverged in a yellow wood,\n" +
"And sorry I could not travel both\n" +
"And be one traveler, long I stood\n" +
"And looked down one as far as I could\n" +
"To where it bent in the undergrowth;\n" +
"\n" +
"Then took the other, as just as fair,\n" +
"And having perhaps the better claim,\n" +
"Because it was grassy and wanted wear;\n" +
"Though as for that the passing there\n" +
"Had worn them really about the same,\n" +
"\n" +
"And both that morning equally lay\n" +
"In leaves no step had trodden black.\n" +
"Oh, I kept the first for another day!\n" +
"Yet knowing how way leads on to way,\n" +
"I doubted if I should ever come back.\n" +
"\n" +
"I shall be telling this with a sigh\n" +
"Somewhere ages and ages hence:\n" +
"Two roads diverged in a wood, and I-\n" +
"I took the one less traveled by,\n" +
"And that has made all the difference.\n"
)
var poemRefs = []struct {
dist int // Backward distance (0 if this is an insertion)
length int // Length of copy or insertion
}{
{0, 38}, {33, 3}, {0, 48}, {79, 3}, {0, 11}, {34, 5}, {0, 6}, {23, 7},
{0, 8}, {50, 3}, {0, 2}, {69, 3}, {34, 5}, {0, 4}, {97, 3}, {0, 4},
{43, 5}, {0, 6}, {7, 4}, {88, 7}, {0, 12}, {80, 3}, {0, 2}, {141, 4},
{0, 1}, {196, 3}, {0, 3}, {157, 3}, {0, 6}, {181, 3}, {0, 2}, {23, 3},
{77, 3}, {28, 5}, {128, 3}, {110, 4}, {70, 3}, {0, 4}, {85, 6}, {0, 2},
{182, 6}, {0, 4}, {133, 3}, {0, 7}, {47, 5}, {0, 20}, {112, 5}, {0, 1},
{58, 3}, {0, 8}, {59, 3}, {0, 4}, {173, 3}, {0, 5}, {114, 3}, {0, 4},
{92, 5}, {0, 2}, {71, 3}, {0, 2}, {76, 5}, {0, 1}, {46, 3}, {96, 4},
{130, 4}, {0, 3}, {360, 3}, {0, 3}, {178, 5}, {0, 7}, {75, 3}, {0, 3},
{45, 6}, {0, 6}, {299, 6}, {180, 3}, {70, 6}, {0, 1}, {48, 3}, {66, 4},
{0, 3}, {47, 5}, {0, 9}, {325, 3}, {0, 1}, {359, 3}, {318, 3}, {0, 2},
{199, 3}, {0, 1}, {344, 3}, {0, 3}, {248, 3}, {0, 10}, {310, 3}, {0, 3},
{93, 6}, {0, 3}, {252, 3}, {157, 4}, {0, 2}, {273, 5}, {0, 14}, {99, 4},
{0, 1}, {464, 4}, {0, 2}, {92, 4}, {495, 3}, {0, 1}, {322, 4}, {16, 4},
{0, 3}, {402, 3}, {0, 2}, {237, 4}, {0, 2}, {432, 4}, {0, 1}, {483, 5},
{0, 2}, {294, 4}, {0, 2}, {306, 3}, {113, 5}, {0, 1}, {26, 4}, {164, 3},
{488, 4}, {0, 1}, {542, 3}, {248, 6}, {0, 5}, {205, 3}, {0, 8}, {48, 3},
{449, 6}, {0, 2}, {192, 3}, {328, 4}, {9, 5}, {433, 3}, {0, 3}, {622, 25},
{615, 5}, {46, 5}, {0, 2}, {104, 3}, {475, 10}, {549, 3}, {0, 4}, {597, 8},
{314, 3}, {0, 1}, {473, 6}, {317, 5}, {0, 1}, {400, 3}, {0, 3}, {109, 3},
{151, 3}, {48, 4}, {0, 4}, {125, 3}, {108, 3}, {0, 2},
}
var got, want bytes.Buffer
var dd dictDecoder
dd.init(1<<11, nil)
var writeCopy = func(dist, length int) {
for length > 0 {
cnt := dd.tryWriteCopy(dist, length)
if cnt == 0 {
cnt = dd.writeCopy(dist, length)
}
length -= cnt
if dd.availWrite() == 0 {
got.Write(dd.readFlush())
}
}
}
var writeString = func(str string) {
for len(str) > 0 {
cnt := copy(dd.writeSlice(), str)
str = str[cnt:]
dd.writeMark(cnt)
if dd.availWrite() == 0 {
got.Write(dd.readFlush())
}
}
}
writeString(".")
want.WriteByte('.')
str := poem
for _, ref := range poemRefs {
if ref.dist == 0 {
writeString(str[:ref.length])
} else {
writeCopy(ref.dist, ref.length)
}
str = str[ref.length:]
}
want.WriteString(poem)
writeCopy(dd.histSize(), 33)
want.Write(want.Bytes()[:33])
writeString(abc)
writeCopy(len(abc), 59*len(abc))
want.WriteString(strings.Repeat(abc, 60))
writeString(fox)
writeCopy(len(fox), 9*len(fox))
want.WriteString(strings.Repeat(fox, 10))
writeString(".")
writeCopy(1, 9)
want.WriteString(strings.Repeat(".", 10))
writeString(strings.ToUpper(poem))
writeCopy(len(poem), 7*len(poem))
want.WriteString(strings.Repeat(strings.ToUpper(poem), 8))
writeCopy(dd.histSize(), 10)
want.Write(want.Bytes()[want.Len()-dd.histSize():][:10])
got.Write(dd.readFlush())
if got.String() != want.String() {
t.Errorf("final string mismatch:\ngot %q\nwant %q", got.String(), want.String())
}
}

View File

@@ -0,0 +1,243 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate_test
import (
"bytes"
"compress/flate"
"fmt"
"io"
"log"
"os"
"strings"
"sync"
)
// In performance critical applications, Reset can be used to discard the
// current compressor or decompressor state and reinitialize them quickly
// by taking advantage of previously allocated memory.
func Example_reset() {
proverbs := []string{
"Don't communicate by sharing memory, share memory by communicating.\n",
"Concurrency is not parallelism.\n",
"The bigger the interface, the weaker the abstraction.\n",
"Documentation is for users.\n",
}
var r strings.Reader
var b bytes.Buffer
buf := make([]byte, 32<<10)
zw, err := flate.NewWriter(nil, flate.DefaultCompression)
if err != nil {
log.Fatal(err)
}
zr := flate.NewReader(nil)
for _, s := range proverbs {
r.Reset(s)
b.Reset()
// Reset the compressor and encode from some input stream.
zw.Reset(&b)
if _, err := io.CopyBuffer(zw, &r, buf); err != nil {
log.Fatal(err)
}
if err := zw.Close(); err != nil {
log.Fatal(err)
}
// Reset the decompressor and decode to some output stream.
if err := zr.(flate.Resetter).Reset(&b, nil); err != nil {
log.Fatal(err)
}
if _, err := io.CopyBuffer(os.Stdout, zr, buf); err != nil {
log.Fatal(err)
}
if err := zr.Close(); err != nil {
log.Fatal(err)
}
}
// Output:
// Don't communicate by sharing memory, share memory by communicating.
// Concurrency is not parallelism.
// The bigger the interface, the weaker the abstraction.
// Documentation is for users.
}
// A preset dictionary can be used to improve the compression ratio.
// The downside to using a dictionary is that the compressor and decompressor
// must agree in advance what dictionary to use.
func Example_dictionary() {
// The dictionary is a string of bytes. When compressing some input data,
// the compressor will attempt to substitute substrings with matches found
// in the dictionary. As such, the dictionary should only contain substrings
// that are expected to be found in the actual data stream.
const dict = `<?xml version="1.0"?>` + `<book>` + `<data>` + `<meta name="` + `" content="`
// The data to compress should (but is not required to) contain frequent
// substrings that match those in the dictionary.
const data = `<?xml version="1.0"?>
<book>
<meta name="title" content="The Go Programming Language"/>
<meta name="authors" content="Alan Donovan and Brian Kernighan"/>
<meta name="published" content="2015-10-26"/>
<meta name="isbn" content="978-0134190440"/>
<data>...</data>
</book>
`
var b bytes.Buffer
// Compress the data using the specially crafted dictionary.
zw, err := flate.NewWriterDict(&b, flate.DefaultCompression, []byte(dict))
if err != nil {
log.Fatal(err)
}
if _, err := io.Copy(zw, strings.NewReader(data)); err != nil {
log.Fatal(err)
}
if err := zw.Close(); err != nil {
log.Fatal(err)
}
// The decompressor must use the same dictionary as the compressor.
// Otherwise, the input may appear as corrupted.
fmt.Println("Decompressed output using the dictionary:")
zr := flate.NewReaderDict(bytes.NewReader(b.Bytes()), []byte(dict))
if _, err := io.Copy(os.Stdout, zr); err != nil {
log.Fatal(err)
}
if err := zr.Close(); err != nil {
log.Fatal(err)
}
fmt.Println()
// Substitute all of the bytes in the dictionary with a '#' to visually
// demonstrate the approximate effectiveness of using a preset dictionary.
fmt.Println("Substrings matched by the dictionary are marked with #:")
hashDict := []byte(dict)
for i := range hashDict {
hashDict[i] = '#'
}
zr = flate.NewReaderDict(&b, hashDict)
if _, err := io.Copy(os.Stdout, zr); err != nil {
log.Fatal(err)
}
if err := zr.Close(); err != nil {
log.Fatal(err)
}
// Output:
// Decompressed output using the dictionary:
// <?xml version="1.0"?>
// <book>
// <meta name="title" content="The Go Programming Language"/>
// <meta name="authors" content="Alan Donovan and Brian Kernighan"/>
// <meta name="published" content="2015-10-26"/>
// <meta name="isbn" content="978-0134190440"/>
// <data>...</data>
// </book>
//
// Substrings matched by the dictionary are marked with #:
// #####################
// ######
// ############title###########The Go Programming Language"/#
// ############authors###########Alan Donovan and Brian Kernighan"/#
// ############published###########2015-10-26"/#
// ############isbn###########978-0134190440"/#
// ######...</#####
// </#####
}
// DEFLATE is suitable for transmitting compressed data across the network.
func Example_synchronization() {
var wg sync.WaitGroup
defer wg.Wait()
// Use io.Pipe to simulate a network connection.
// A real network application should take care to properly close the
// underlying connection.
rp, wp := io.Pipe()
// Start a goroutine to act as the transmitter.
wg.Add(1)
go func() {
defer wg.Done()
zw, err := flate.NewWriter(wp, flate.BestSpeed)
if err != nil {
log.Fatal(err)
}
b := make([]byte, 256)
for _, m := range strings.Fields("A long time ago in a galaxy far, far away...") {
// We use a simple framing format where the first byte is the
// message length, followed the message itself.
b[0] = uint8(copy(b[1:], m))
if _, err := zw.Write(b[:1+len(m)]); err != nil {
log.Fatal(err)
}
// Flush ensures that the receiver can read all data sent so far.
if err := zw.Flush(); err != nil {
log.Fatal(err)
}
}
if err := zw.Close(); err != nil {
log.Fatal(err)
}
}()
// Start a goroutine to act as the receiver.
wg.Add(1)
go func() {
defer wg.Done()
zr := flate.NewReader(rp)
b := make([]byte, 256)
for {
// Read the message length.
// This is guaranteed to return for every corresponding
// Flush and Close on the transmitter side.
if _, err := io.ReadFull(zr, b[:1]); err != nil {
if err == io.EOF {
break // The transmitter closed the stream
}
log.Fatal(err)
}
// Read the message content.
n := int(b[0])
if _, err := io.ReadFull(zr, b[:n]); err != nil {
log.Fatal(err)
}
fmt.Printf("Received %d bytes: %s\n", n, b[:n])
}
fmt.Println()
if err := zr.Close(); err != nil {
log.Fatal(err)
}
}()
// Output:
// Received 1 bytes: A
// Received 4 bytes: long
// Received 4 bytes: time
// Received 3 bytes: ago
// Received 2 bytes: in
// Received 1 bytes: a
// Received 6 bytes: galaxy
// Received 4 bytes: far,
// Received 3 bytes: far
// Received 7 bytes: away...
}

View File

@@ -0,0 +1,352 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This test tests some internals of the flate package.
// The tests in package compress/gzip serve as the
// end-to-end test of the decompressor.
package flate
import (
"bytes"
"encoding/hex"
"io"
"strings"
"testing"
)
// The following test should not panic.
func TestIssue5915(t *testing.T) {
bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 6, 0, 11, 0, 8, 0, 6, 6, 10, 8}
var h huffmanDecoder
if h.init(bits) {
t.Fatalf("Given sequence of bits is bad, and should not succeed.")
}
}
// The following test should not panic.
func TestIssue5962(t *testing.T) {
bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0,
5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11}
var h huffmanDecoder
if h.init(bits) {
t.Fatalf("Given sequence of bits is bad, and should not succeed.")
}
}
// The following test should not panic.
func TestIssue6255(t *testing.T) {
bits1 := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11}
bits2 := []int{11, 13}
var h huffmanDecoder
if !h.init(bits1) {
t.Fatalf("Given sequence of bits is good and should succeed.")
}
if h.init(bits2) {
t.Fatalf("Given sequence of bits is bad and should not succeed.")
}
}
func TestInvalidEncoding(t *testing.T) {
// Initialize Huffman decoder to recognize "0".
var h huffmanDecoder
if !h.init([]int{1}) {
t.Fatal("Failed to initialize Huffman decoder")
}
// Initialize decompressor with invalid Huffman coding.
var f decompressor
f.r = bytes.NewReader([]byte{0xff})
_, err := f.huffSym(&h)
if err == nil {
t.Fatal("Should have rejected invalid bit sequence")
}
}
func TestInvalidBits(t *testing.T) {
oversubscribed := []int{1, 2, 3, 4, 4, 5}
incomplete := []int{1, 2, 4, 4}
var h huffmanDecoder
if h.init(oversubscribed) {
t.Fatal("Should reject oversubscribed bit-length set")
}
if h.init(incomplete) {
t.Fatal("Should reject incomplete bit-length set")
}
}
func TestStreams(t *testing.T) {
// To verify any of these hexstrings as valid or invalid flate streams
// according to the C zlib library, you can use the Python wrapper library:
// >>> hex_string = "010100feff11"
// >>> import zlib
// >>> zlib.decompress(hex_string.decode("hex"), -15) # Negative means raw DEFLATE
// '\x11'
testCases := []struct {
desc string // Description of the stream
stream string // Hexstring of the input DEFLATE stream
want string // Expected result. Use "fail" to expect failure
}{{
"degenerate HCLenTree",
"05e0010000000000100000000000000000000000000000000000000000000000" +
"00000000000000000004",
"fail",
}, {
"complete HCLenTree, empty HLitTree, empty HDistTree",
"05e0010400000000000000000000000000000000000000000000000000000000" +
"00000000000000000010",
"fail",
}, {
"empty HCLenTree",
"05e0010000000000000000000000000000000000000000000000000000000000" +
"00000000000000000010",
"fail",
}, {
"complete HCLenTree, complete HLitTree, empty HDistTree, use missing HDist symbol",
"000100feff000de0010400000000100000000000000000000000000000000000" +
"0000000000000000000000000000002c",
"fail",
}, {
"complete HCLenTree, complete HLitTree, degenerate HDistTree, use missing HDist symbol",
"000100feff000de0010000000000000000000000000000000000000000000000" +
"00000000000000000610000000004070",
"fail",
}, {
"complete HCLenTree, empty HLitTree, empty HDistTree",
"05e0010400000000100400000000000000000000000000000000000000000000" +
"0000000000000000000000000008",
"fail",
}, {
"complete HCLenTree, empty HLitTree, degenerate HDistTree",
"05e0010400000000100400000000000000000000000000000000000000000000" +
"0000000000000000000800000008",
"fail",
}, {
"complete HCLenTree, degenerate HLitTree, degenerate HDistTree, use missing HLit symbol",
"05e0010400000000100000000000000000000000000000000000000000000000" +
"0000000000000000001c",
"fail",
}, {
"complete HCLenTree, complete HLitTree, too large HDistTree",
"edff870500000000200400000000000000000000000000000000000000000000" +
"000000000000000000080000000000000004",
"fail",
}, {
"complete HCLenTree, complete HLitTree, empty HDistTree, excessive repeater code",
"edfd870500000000200400000000000000000000000000000000000000000000" +
"000000000000000000e8b100",
"fail",
}, {
"complete HCLenTree, complete HLitTree, empty HDistTree of normal length 30",
"05fd01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
"ffffffffffffffffff07000000fe01",
"",
}, {
"complete HCLenTree, complete HLitTree, empty HDistTree of excessive length 31",
"05fe01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
"ffffffffffffffffff07000000fc03",
"fail",
}, {
"complete HCLenTree, over-subscribed HLitTree, empty HDistTree",
"05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" +
"ffffffffffffffffff07f00f",
"fail",
}, {
"complete HCLenTree, under-subscribed HLitTree, empty HDistTree",
"05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" +
"fffffffffcffffffff07f00f",
"fail",
}, {
"complete HCLenTree, complete HLitTree with single code, empty HDistTree",
"05e001240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
"ffffffffffffffffff07f00f",
"01",
}, {
"complete HCLenTree, complete HLitTree with multiple codes, empty HDistTree",
"05e301240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
"ffffffffffffffffff07807f",
"01",
}, {
"complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HDist symbol",
"000100feff000de0010400000000100000000000000000000000000000000000" +
"0000000000000000000000000000003c",
"00000000",
}, {
"complete HCLenTree, degenerate HLitTree, degenerate HDistTree",
"05e0010400000000100000000000000000000000000000000000000000000000" +
"0000000000000000000c",
"",
}, {
"complete HCLenTree, degenerate HLitTree, empty HDistTree",
"05e0010400000000100000000000000000000000000000000000000000000000" +
"00000000000000000004",
"",
}, {
"complete HCLenTree, complete HLitTree, empty HDistTree, spanning repeater code",
"edfd870500000000200400000000000000000000000000000000000000000000" +
"000000000000000000e8b000",
"",
}, {
"complete HCLenTree with length codes, complete HLitTree, empty HDistTree",
"ede0010400000000100000000000000000000000000000000000000000000000" +
"0000000000000000000400004000",
"",
}, {
"complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit symbol 284 with count 31",
"000100feff00ede0010400000000100000000000000000000000000000000000" +
"000000000000000000000000000000040000407f00",
"0000000000000000000000000000000000000000000000000000000000000000" +
"0000000000000000000000000000000000000000000000000000000000000000" +
"0000000000000000000000000000000000000000000000000000000000000000" +
"0000000000000000000000000000000000000000000000000000000000000000" +
"0000000000000000000000000000000000000000000000000000000000000000" +
"0000000000000000000000000000000000000000000000000000000000000000" +
"0000000000000000000000000000000000000000000000000000000000000000" +
"0000000000000000000000000000000000000000000000000000000000000000" +
"000000",
}, {
"complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit and HDist symbols",
"0cc2010d00000082b0ac4aff0eb07d27060000ffff",
"616263616263",
}, {
"fixed block, use reserved symbol 287",
"33180700",
"fail",
}, {
"raw block",
"010100feff11",
"11",
}, {
"issue 10426 - over-subscribed HCLenTree causes a hang",
"344c4a4e494d4b070000ff2e2eff2e2e2e2e2eff",
"fail",
}, {
"issue 11030 - empty HDistTree unexpectedly leads to error",
"05c0070600000080400fff37a0ca",
"",
}, {
"issue 11033 - empty HDistTree unexpectedly leads to error",
"050fb109c020cca5d017dcbca044881ee1034ec149c8980bbc413c2ab35be9dc" +
"b1473449922449922411202306ee97b0383a521b4ffdcf3217f9f7d3adb701",
"3130303634342068652e706870005d05355f7ed957ff084a90925d19e3ebc6d0" +
"c6d7",
}}
for i, tc := range testCases {
data, err := hex.DecodeString(tc.stream)
if err != nil {
t.Fatal(err)
}
data, err = io.ReadAll(NewReader(bytes.NewReader(data)))
if tc.want == "fail" {
if err == nil {
t.Errorf("#%d (%s): got nil error, want non-nil", i, tc.desc)
}
} else {
if err != nil {
t.Errorf("#%d (%s): %v", i, tc.desc, err)
continue
}
if got := hex.EncodeToString(data); got != tc.want {
t.Errorf("#%d (%s):\ngot %q\nwant %q", i, tc.desc, got, tc.want)
}
}
}
}
func TestTruncatedStreams(t *testing.T) {
const data = "\x00\f\x00\xf3\xffhello, world\x01\x00\x00\xff\xff"
for i := 0; i < len(data)-1; i++ {
r := NewReader(strings.NewReader(data[:i]))
_, err := io.Copy(io.Discard, r)
if err != io.ErrUnexpectedEOF {
t.Errorf("io.Copy(%d) on truncated stream: got %v, want %v", i, err, io.ErrUnexpectedEOF)
}
}
}
// Verify that flate.Reader.Read returns (n, io.EOF) instead
// of (n, nil) + (0, io.EOF) when possible.
//
// This helps net/http.Transport reuse HTTP/1 connections more
// aggressively.
//
// See https://github.com/google/go-github/pull/317 for background.
func TestReaderEarlyEOF(t *testing.T) {
t.Parallel()
testSizes := []int{
1, 2, 3, 4, 5, 6, 7, 8,
100, 1000, 10000, 100000,
128, 1024, 16384, 131072,
// Testing multiples of windowSize triggers the case
// where Read will fail to return an early io.EOF.
windowSize * 1, windowSize * 2, windowSize * 3,
}
var maxSize int
for _, n := range testSizes {
if maxSize < n {
maxSize = n
}
}
readBuf := make([]byte, 40)
data := make([]byte, maxSize)
for i := range data {
data[i] = byte(i)
}
for _, sz := range testSizes {
if testing.Short() && sz > windowSize {
continue
}
for _, flush := range []bool{true, false} {
earlyEOF := true // Do we expect early io.EOF?
var buf bytes.Buffer
w, _ := NewWriter(&buf, 5)
w.Write(data[:sz])
if flush {
// If a Flush occurs after all the actual data, the flushing
// semantics dictate that we will observe a (0, io.EOF) since
// Read must return data before it knows that the stream ended.
w.Flush()
earlyEOF = false
}
w.Close()
r := NewReader(&buf)
for {
n, err := r.Read(readBuf)
if err == io.EOF {
// If the availWrite == windowSize, then that means that the
// previous Read returned because the write buffer was full
// and it just so happened that the stream had no more data.
// This situation is rare, but unavoidable.
if r.(*decompressor).dict.availWrite() == windowSize {
earlyEOF = false
}
if n == 0 && earlyEOF {
t.Errorf("On size:%d flush:%v, Read() = (0, io.EOF), want (n, io.EOF)", sz, flush)
}
if n != 0 && !earlyEOF {
t.Errorf("On size:%d flush:%v, Read() = (%d, io.EOF), want (0, io.EOF)", sz, flush, n)
}
break
}
if err != nil {
t.Fatal(err)
}
}
}
}
}

View File

@@ -0,0 +1,701 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"io"
)
const (
// The largest offset code.
offsetCodeCount = 30
// The special code used to mark the end of a block.
endBlockMarker = 256
// The first length code.
lengthCodesStart = 257
// The number of codegen codes.
codegenCodeCount = 19
badCode = 255
// bufferFlushSize indicates the buffer size
// after which bytes are flushed to the writer.
// Should preferably be a multiple of 6, since
// we accumulate 6 bytes between writes to the buffer.
bufferFlushSize = 240
// bufferSize is the actual output byte buffer size.
// It must have additional headroom for a flush
// which can contain up to 8 bytes.
bufferSize = bufferFlushSize + 8
)
// The number of extra bits needed by length code X - LENGTH_CODES_START.
var lengthExtraBits = []int8{
/* 257 */ 0, 0, 0,
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
/* 280 */ 4, 5, 5, 5, 5, 0,
}
// The length indicated by length code X - LENGTH_CODES_START.
var lengthBase = []uint32{
0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
64, 80, 96, 112, 128, 160, 192, 224, 255,
}
// offset code word extra bits.
var offsetExtraBits = []int8{
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
}
var offsetBase = []uint32{
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
}
// The odd order in which the codegen code sizes are written.
var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
type huffmanBitWriter struct {
// writer is the underlying writer.
// Do not use it directly; use the write method, which ensures
// that Write errors are sticky.
writer io.Writer
// Data waiting to be written is bytes[0:nbytes]
// and then the low nbits of bits. Data is always written
// sequentially into the bytes array.
bits uint64
nbits uint
bytes [bufferSize]byte
codegenFreq [codegenCodeCount]int32
nbytes int
literalFreq []int32
offsetFreq []int32
codegen []uint8
literalEncoding *huffmanEncoder
offsetEncoding *huffmanEncoder
codegenEncoding *huffmanEncoder
err error
}
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
return &huffmanBitWriter{
writer: w,
literalFreq: make([]int32, maxNumLit),
offsetFreq: make([]int32, offsetCodeCount),
codegen: make([]uint8, maxNumLit+offsetCodeCount+1),
literalEncoding: newHuffmanEncoder(maxNumLit),
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
}
}
func (w *huffmanBitWriter) reset(writer io.Writer) {
w.writer = writer
w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
}
func (w *huffmanBitWriter) flush() {
if w.err != nil {
w.nbits = 0
return
}
n := w.nbytes
for w.nbits != 0 {
w.bytes[n] = byte(w.bits)
w.bits >>= 8
if w.nbits > 8 { // Avoid underflow
w.nbits -= 8
} else {
w.nbits = 0
}
n++
}
w.bits = 0
w.write(w.bytes[:n])
w.nbytes = 0
}
func (w *huffmanBitWriter) write(b []byte) {
if w.err != nil {
return
}
_, w.err = w.writer.Write(b)
}
func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
if w.err != nil {
return
}
w.bits |= uint64(b) << w.nbits
w.nbits += nb
if w.nbits >= 48 {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
bytes := w.bytes[n : n+6]
bytes[0] = byte(bits)
bytes[1] = byte(bits >> 8)
bytes[2] = byte(bits >> 16)
bytes[3] = byte(bits >> 24)
bytes[4] = byte(bits >> 32)
bytes[5] = byte(bits >> 40)
n += 6
if n >= bufferFlushSize {
w.write(w.bytes[:n])
n = 0
}
w.nbytes = n
}
}
func (w *huffmanBitWriter) writeBytes(bytes []byte) {
if w.err != nil {
return
}
n := w.nbytes
if w.nbits&7 != 0 {
w.err = InternalError("writeBytes with unfinished bits")
return
}
for w.nbits != 0 {
w.bytes[n] = byte(w.bits)
w.bits >>= 8
w.nbits -= 8
n++
}
if n != 0 {
w.write(w.bytes[:n])
}
w.nbytes = 0
w.write(bytes)
}
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
// the literal and offset lengths arrays (which are concatenated into a single
// array). This method generates that run-length encoding.
//
// The result is written into the codegen array, and the frequencies
// of each code is written into the codegenFreq array.
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
// information. Code badCode is an end marker
//
// numLiterals The number of literals in literalEncoding
// numOffsets The number of offsets in offsetEncoding
// litenc, offenc The literal and offset encoder to use
func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
for i := range w.codegenFreq {
w.codegenFreq[i] = 0
}
// Note that we are using codegen both as a temporary variable for holding
// a copy of the frequencies, and as the place where we put the result.
// This is fine because the output is always shorter than the input used
// so far.
codegen := w.codegen // cache
// Copy the concatenated code sizes to codegen. Put a marker at the end.
cgnl := codegen[:numLiterals]
for i := range cgnl {
cgnl[i] = uint8(litEnc.codes[i].len)
}
cgnl = codegen[numLiterals : numLiterals+numOffsets]
for i := range cgnl {
cgnl[i] = uint8(offEnc.codes[i].len)
}
codegen[numLiterals+numOffsets] = badCode
size := codegen[0]
count := 1
outIndex := 0
for inIndex := 1; size != badCode; inIndex++ {
// INVARIANT: We have seen "count" copies of size that have not yet
// had output generated for them.
nextSize := codegen[inIndex]
if nextSize == size {
count++
continue
}
// We need to generate codegen indicating "count" of size.
if size != 0 {
codegen[outIndex] = size
outIndex++
w.codegenFreq[size]++
count--
for count >= 3 {
n := 6
if n > count {
n = count
}
codegen[outIndex] = 16
outIndex++
codegen[outIndex] = uint8(n - 3)
outIndex++
w.codegenFreq[16]++
count -= n
}
} else {
for count >= 11 {
n := 138
if n > count {
n = count
}
codegen[outIndex] = 18
outIndex++
codegen[outIndex] = uint8(n - 11)
outIndex++
w.codegenFreq[18]++
count -= n
}
if count >= 3 {
// count >= 3 && count <= 10
codegen[outIndex] = 17
outIndex++
codegen[outIndex] = uint8(count - 3)
outIndex++
w.codegenFreq[17]++
count = 0
}
}
count--
for ; count >= 0; count-- {
codegen[outIndex] = size
outIndex++
w.codegenFreq[size]++
}
// Set up invariant for next time through the loop.
size = nextSize
count = 1
}
// Marker indicating the end of the codegen.
codegen[outIndex] = badCode
}
// dynamicSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
numCodegens = len(w.codegenFreq)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
int(w.codegenFreq[16])*2 +
int(w.codegenFreq[17])*3 +
int(w.codegenFreq[18])*7
size = header +
litEnc.bitLength(w.literalFreq) +
offEnc.bitLength(w.offsetFreq) +
extraBits
return size, numCodegens
}
// fixedSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) fixedSize(extraBits int) int {
return 3 +
fixedLiteralEncoding.bitLength(w.literalFreq) +
fixedOffsetEncoding.bitLength(w.offsetFreq) +
extraBits
}
// storedSize calculates the stored size, including header.
// The function returns the size in bits and whether the block
// fits inside a single block.
func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
if in == nil {
return 0, false
}
if len(in) <= maxStoreBlockSize {
return (len(in) + 5) * 8, true
}
return 0, false
}
func (w *huffmanBitWriter) writeCode(c hcode) {
if w.err != nil {
return
}
w.bits |= uint64(c.code) << w.nbits
w.nbits += uint(c.len)
if w.nbits >= 48 {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
bytes := w.bytes[n : n+6]
bytes[0] = byte(bits)
bytes[1] = byte(bits >> 8)
bytes[2] = byte(bits >> 16)
bytes[3] = byte(bits >> 24)
bytes[4] = byte(bits >> 32)
bytes[5] = byte(bits >> 40)
n += 6
if n >= bufferFlushSize {
w.write(w.bytes[:n])
n = 0
}
w.nbytes = n
}
}
// Write the header of a dynamic Huffman block to the output stream.
//
// numLiterals The number of literals specified in codegen
// numOffsets The number of offsets specified in codegen
// numCodegens The number of codegens used in codegen
func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
if w.err != nil {
return
}
var firstBits int32 = 4
if isEof {
firstBits = 5
}
w.writeBits(firstBits, 3)
w.writeBits(int32(numLiterals-257), 5)
w.writeBits(int32(numOffsets-1), 5)
w.writeBits(int32(numCodegens-4), 4)
for i := 0; i < numCodegens; i++ {
value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
w.writeBits(int32(value), 3)
}
i := 0
for {
var codeWord int = int(w.codegen[i])
i++
if codeWord == badCode {
break
}
w.writeCode(w.codegenEncoding.codes[uint32(codeWord)])
switch codeWord {
case 16:
w.writeBits(int32(w.codegen[i]), 2)
i++
case 17:
w.writeBits(int32(w.codegen[i]), 3)
i++
case 18:
w.writeBits(int32(w.codegen[i]), 7)
i++
}
}
}
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
if w.err != nil {
return
}
var flag int32
if isEof {
flag = 1
}
w.writeBits(flag, 3)
w.flush()
w.writeBits(int32(length), 16)
w.writeBits(int32(^uint16(length)), 16)
}
func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
if w.err != nil {
return
}
// Indicate that we are a fixed Huffman block
var value int32 = 2
if isEof {
value = 3
}
w.writeBits(value, 3)
}
// writeBlock will write a block of tokens with the smallest encoding.
// The original input can be supplied, and if the huffman encoded data
// is larger than the original bytes, the data will be written as a
// stored block.
// If the input is nil, the tokens will always be Huffman encoded.
func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
if w.err != nil {
return
}
tokens = append(tokens, endBlockMarker)
numLiterals, numOffsets := w.indexTokens(tokens)
var extraBits int
storedSize, storable := w.storedSize(input)
if storable {
// We only bother calculating the costs of the extra bits required by
// the length of offset fields (which will be the same for both fixed
// and dynamic encoding), if we need to compare those two encodings
// against stored encoding.
for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
// First eight length codes have extra size = 0.
extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
}
for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
// First four offset codes have extra size = 0.
extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
}
}
// Figure out smallest code.
// Fixed Huffman baseline.
var literalEncoding = fixedLiteralEncoding
var offsetEncoding = fixedOffsetEncoding
var size = w.fixedSize(extraBits)
// Dynamic Huffman?
var numCodegens int
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
if dynamicSize < size {
size = dynamicSize
literalEncoding = w.literalEncoding
offsetEncoding = w.offsetEncoding
}
// Stored bytes?
if storable && storedSize < size {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
// Huffman.
if literalEncoding == fixedLiteralEncoding {
w.writeFixedHeader(eof)
} else {
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
}
// Write the tokens.
w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
}
// writeBlockDynamic encodes a block using a dynamic Huffman table.
// This should be used if the symbols used have a disproportionate
// histogram distribution.
// If input is supplied and the compression savings are below 1/16th of the
// input size the block is stored.
func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
if w.err != nil {
return
}
tokens = append(tokens, endBlockMarker)
numLiterals, numOffsets := w.indexTokens(tokens)
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0)
// Store bytes, if we don't get a reasonable improvement.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
// Write Huffman table.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
// Write the tokens.
w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes)
}
// indexTokens indexes a slice of tokens, and updates
// literalFreq and offsetFreq, and generates literalEncoding
// and offsetEncoding.
// The number of literal and offset tokens is returned.
func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) {
for i := range w.literalFreq {
w.literalFreq[i] = 0
}
for i := range w.offsetFreq {
w.offsetFreq[i] = 0
}
for _, t := range tokens {
if t < matchType {
w.literalFreq[t.literal()]++
continue
}
length := t.length()
offset := t.offset()
w.literalFreq[lengthCodesStart+lengthCode(length)]++
w.offsetFreq[offsetCode(offset)]++
}
// get the number of literals
numLiterals = len(w.literalFreq)
for w.literalFreq[numLiterals-1] == 0 {
numLiterals--
}
// get the number of offsets
numOffsets = len(w.offsetFreq)
for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
numOffsets--
}
if numOffsets == 0 {
// We haven't found a single match. If we want to go with the dynamic encoding,
// we should count at least one offset to be sure that the offset huffman tree could be encoded.
w.offsetFreq[0] = 1
numOffsets = 1
}
w.literalEncoding.generate(w.literalFreq, 15)
w.offsetEncoding.generate(w.offsetFreq, 15)
return
}
// writeTokens writes a slice of tokens to the output.
// codes for literal and offset encoding must be supplied.
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
if w.err != nil {
return
}
for _, t := range tokens {
if t < matchType {
w.writeCode(leCodes[t.literal()])
continue
}
// Write the length
length := t.length()
lengthCode := lengthCode(length)
w.writeCode(leCodes[lengthCode+lengthCodesStart])
extraLengthBits := uint(lengthExtraBits[lengthCode])
if extraLengthBits > 0 {
extraLength := int32(length - lengthBase[lengthCode])
w.writeBits(extraLength, extraLengthBits)
}
// Write the offset
offset := t.offset()
offsetCode := offsetCode(offset)
w.writeCode(oeCodes[offsetCode])
extraOffsetBits := uint(offsetExtraBits[offsetCode])
if extraOffsetBits > 0 {
extraOffset := int32(offset - offsetBase[offsetCode])
w.writeBits(extraOffset, extraOffsetBits)
}
}
}
// huffOffset is a static offset encoder used for huffman only encoding.
// It can be reused since we will not be encoding offset values.
var huffOffset *huffmanEncoder
func init() {
offsetFreq := make([]int32, offsetCodeCount)
offsetFreq[0] = 1
huffOffset = newHuffmanEncoder(offsetCodeCount)
huffOffset.generate(offsetFreq, 15)
}
// writeBlockHuff encodes a block of bytes as either
// Huffman encoded literals or uncompressed bytes if the
// results only gains very little from compression.
func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
if w.err != nil {
return
}
// Clear histogram
for i := range w.literalFreq {
w.literalFreq[i] = 0
}
// Add everything as literals
histogram(input, w.literalFreq)
w.literalFreq[endBlockMarker] = 1
const numLiterals = endBlockMarker + 1
w.offsetFreq[0] = 1
const numOffsets = 1
w.literalEncoding.generate(w.literalFreq, 15)
// Figure out smallest code.
// Always use dynamic Huffman or Store
var numCodegens int
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
// Store bytes, if we don't get a reasonable improvement.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
// Huffman.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
encoding := w.literalEncoding.codes[:257]
n := w.nbytes
for _, t := range input {
// Bitwriting inlined, ~30% speedup
c := encoding[t]
w.bits |= uint64(c.code) << w.nbits
w.nbits += uint(c.len)
if w.nbits < 48 {
continue
}
// Store 6 bytes
bits := w.bits
w.bits >>= 48
w.nbits -= 48
bytes := w.bytes[n : n+6]
bytes[0] = byte(bits)
bytes[1] = byte(bits >> 8)
bytes[2] = byte(bits >> 16)
bytes[3] = byte(bits >> 24)
bytes[4] = byte(bits >> 32)
bytes[5] = byte(bits >> 40)
n += 6
if n < bufferFlushSize {
continue
}
w.write(w.bytes[:n])
if w.err != nil {
return // Return early in the event of write failures
}
n = 0
}
w.nbytes = n
w.writeCode(encoding[endBlockMarker])
}
// histogram accumulates a histogram of b in h.
//
// len(h) must be >= 256, and h's elements must be all zeroes.
func histogram(b []byte, h []int32) {
h = h[:256]
for _, t := range b {
h[t]++
}
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,345 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"math"
"math/bits"
"sort"
)
// hcode is a huffman code with a bit code and bit length.
type hcode struct {
code, len uint16
}
type huffmanEncoder struct {
codes []hcode
freqcache []literalNode
bitCount [17]int32
lns byLiteral // stored to avoid repeated allocation in generate
lfs byFreq // stored to avoid repeated allocation in generate
}
type literalNode struct {
literal uint16
freq int32
}
// A levelInfo describes the state of the constructed tree for a given depth.
type levelInfo struct {
// Our level. for better printing
level int32
// The frequency of the last node at this level
lastFreq int32
// The frequency of the next character to add to this level
nextCharFreq int32
// The frequency of the next pair (from level below) to add to this level.
// Only valid if the "needed" value of the next lower level is 0.
nextPairFreq int32
// The number of chains remaining to generate for this level before moving
// up to the next level
needed int32
}
// set sets the code and length of an hcode.
func (h *hcode) set(code uint16, length uint16) {
h.len = length
h.code = code
}
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
func newHuffmanEncoder(size int) *huffmanEncoder {
return &huffmanEncoder{codes: make([]hcode, size)}
}
// Generates a HuffmanCode corresponding to the fixed literal table.
func generateFixedLiteralEncoding() *huffmanEncoder {
h := newHuffmanEncoder(maxNumLit)
codes := h.codes
var ch uint16
for ch = 0; ch < maxNumLit; ch++ {
var bits uint16
var size uint16
switch {
case ch < 144:
// size 8, 000110000 .. 10111111
bits = ch + 48
size = 8
case ch < 256:
// size 9, 110010000 .. 111111111
bits = ch + 400 - 144
size = 9
case ch < 280:
// size 7, 0000000 .. 0010111
bits = ch - 256
size = 7
default:
// size 8, 11000000 .. 11000111
bits = ch + 192 - 280
size = 8
}
codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
}
return h
}
func generateFixedOffsetEncoding() *huffmanEncoder {
h := newHuffmanEncoder(30)
codes := h.codes
for ch := range codes {
codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
}
return h
}
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
func (h *huffmanEncoder) bitLength(freq []int32) int {
var total int
for i, f := range freq {
if f != 0 {
total += int(f) * int(h.codes[i].len)
}
}
return total
}
const maxBitsLimit = 16
// bitCounts computes the number of literals assigned to each bit size in the Huffman encoding.
// It is only called when list.length >= 3.
// The cases of 0, 1, and 2 literals are handled by special case code.
//
// list is an array of the literals with non-zero frequencies
// and their associated frequencies. The array is in order of increasing
// frequency and has as its last element a special element with frequency
// MaxInt32.
//
// maxBits is the maximum number of bits that should be used to encode any literal.
// It must be less than 16.
//
// bitCounts returns an integer slice in which slice[i] indicates the number of literals
// that should be encoded in i bits.
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
if maxBits >= maxBitsLimit {
panic("flate: maxBits too large")
}
n := int32(len(list))
list = list[0 : n+1]
list[n] = maxNode()
// The tree can't have greater depth than n - 1, no matter what. This
// saves a little bit of work in some small cases
if maxBits > n-1 {
maxBits = n - 1
}
// Create information about each of the levels.
// A bogus "Level 0" whose sole purpose is so that
// level1.prev.needed==0. This makes level1.nextPairFreq
// be a legitimate value that never gets chosen.
var levels [maxBitsLimit]levelInfo
// leafCounts[i] counts the number of literals at the left
// of ancestors of the rightmost node at level i.
// leafCounts[i][j] is the number of literals at the left
// of the level j ancestor.
var leafCounts [maxBitsLimit][maxBitsLimit]int32
for level := int32(1); level <= maxBits; level++ {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
levels[level] = levelInfo{
level: level,
lastFreq: list[1].freq,
nextCharFreq: list[2].freq,
nextPairFreq: list[0].freq + list[1].freq,
}
leafCounts[level][level] = 2
if level == 1 {
levels[level].nextPairFreq = math.MaxInt32
}
}
// We need a total of 2*n - 2 items at top level and have already generated 2.
levels[maxBits].needed = 2*n - 4
level := maxBits
for {
l := &levels[level]
if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
// We've run out of both leafs and pairs.
// End all calculations for this level.
// To make sure we never come back to this level or any lower level,
// set nextPairFreq impossibly large.
l.needed = 0
levels[level+1].nextPairFreq = math.MaxInt32
level++
continue
}
prevFreq := l.lastFreq
if l.nextCharFreq < l.nextPairFreq {
// The next item on this row is a leaf node.
n := leafCounts[level][level] + 1
l.lastFreq = l.nextCharFreq
// Lower leafCounts are the same of the previous node.
leafCounts[level][level] = n
l.nextCharFreq = list[n].freq
} else {
// The next item on this row is a pair from the previous row.
// nextPairFreq isn't valid until we generate two
// more values in the level below
l.lastFreq = l.nextPairFreq
// Take leaf counts from the lower level, except counts[level] remains the same.
copy(leafCounts[level][:level], leafCounts[level-1][:level])
levels[l.level-1].needed = 2
}
if l.needed--; l.needed == 0 {
// We've done everything we need to do for this level.
// Continue calculating one level up. Fill in nextPairFreq
// of that level with the sum of the two nodes we've just calculated on
// this level.
if l.level == maxBits {
// All done!
break
}
levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
level++
} else {
// If we stole from below, move down temporarily to replenish it.
for levels[level-1].needed > 0 {
level--
}
}
}
// Somethings is wrong if at the end, the top level is null or hasn't used
// all of the leaves.
if leafCounts[maxBits][maxBits] != n {
panic("leafCounts[maxBits][maxBits] != n")
}
bitCount := h.bitCount[:maxBits+1]
bits := 1
counts := &leafCounts[maxBits]
for level := maxBits; level > 0; level-- {
// chain.leafCount gives the number of literals requiring at least "bits"
// bits to encode.
bitCount[bits] = counts[level] - counts[level-1]
bits++
}
return bitCount
}
// Look at the leaves and assign them a bit count and an encoding as specified
// in RFC 1951 3.2.2
func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
code := uint16(0)
for n, bits := range bitCount {
code <<= 1
if n == 0 || bits == 0 {
continue
}
// The literals list[len(list)-bits] .. list[len(list)-bits]
// are encoded using "bits" bits, and get the values
// code, code + 1, .... The code values are
// assigned in literal order (not frequency order).
chunk := list[len(list)-int(bits):]
h.lns.sort(chunk)
for _, node := range chunk {
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
code++
}
list = list[0 : len(list)-int(bits)]
}
}
// Update this Huffman Code object to be the minimum code for the specified frequency count.
//
// freq is an array of frequencies, in which freq[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
if h.freqcache == nil {
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
// The largest of these is maxNumLit, so we allocate for that case.
h.freqcache = make([]literalNode, maxNumLit+1)
}
list := h.freqcache[:len(freq)+1]
// Number of non-zero literals
count := 0
// Set list to be the set of all non-zero literals and their frequencies
for i, f := range freq {
if f != 0 {
list[count] = literalNode{uint16(i), f}
count++
} else {
h.codes[i].len = 0
}
}
list = list[:count]
if count <= 2 {
// Handle the small cases here, because they are awkward for the general case code. With
// two or fewer literals, everything has bit length 1.
for i, node := range list {
// "list" is in order of increasing literal value.
h.codes[node.literal].set(uint16(i), 1)
}
return
}
h.lfs.sort(list)
// Get the number of literals for each bit count
bitCount := h.bitCounts(list, maxBits)
// And do the assignment
h.assignEncodingAndSize(bitCount, list)
}
type byLiteral []literalNode
func (s *byLiteral) sort(a []literalNode) {
*s = byLiteral(a)
sort.Sort(s)
}
func (s byLiteral) Len() int { return len(s) }
func (s byLiteral) Less(i, j int) bool {
return s[i].literal < s[j].literal
}
func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
type byFreq []literalNode
func (s *byFreq) sort(a []literalNode) {
*s = byFreq(a)
sort.Sort(s)
}
func (s byFreq) Len() int { return len(s) }
func (s byFreq) Less(i, j int) bool {
if s[i].freq == s[j].freq {
return s[i].literal < s[j].literal
}
return s[i].freq < s[j].freq
}
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func reverseBits(number uint16, bitLength byte) uint16 {
return bits.Reverse16(number << (16 - bitLength))
}

View File

@@ -0,0 +1,836 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package flate implements the DEFLATE compressed data format, described in
// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
// formats.
package flate
import (
"bufio"
"io"
"math/bits"
"strconv"
"sync"
)
const (
maxCodeLen = 16 // max length of Huffman code
// The next three numbers come from the RFC section 3.2.7, with the
// additional proviso in section 3.2.5 which implies that distance codes
// 30 and 31 should never occur in compressed data.
maxNumLit = 286
maxNumDist = 30
numCodes = 19 // number of codes in Huffman meta-code
)
// Initialize the fixedHuffmanDecoder only once upon first use.
var fixedOnce sync.Once
var fixedHuffmanDecoder huffmanDecoder
// A CorruptInputError reports the presence of corrupt input at a given offset.
type CorruptInputError int64
func (e CorruptInputError) Error() string {
return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
}
// An InternalError reports an error in the flate code itself.
type InternalError string
func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
// A ReadError reports an error encountered while reading input.
//
// Deprecated: No longer returned.
type ReadError struct {
Offset int64 // byte offset where error occurred
Err error // error returned by underlying Read
}
func (e *ReadError) Error() string {
return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// A WriteError reports an error encountered while writing output.
//
// Deprecated: No longer returned.
type WriteError struct {
Offset int64 // byte offset where error occurred
Err error // error returned by underlying Write
}
func (e *WriteError) Error() string {
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict]
// to switch to a new underlying [Reader]. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
Reset(r io.Reader, dict []byte) error
}
// The data structure for decoding Huffman tables is based on that of
// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
// For codes smaller than the table width, there are multiple entries
// (each combination of trailing bits has the same value). For codes
// larger than the table width, the table contains a link to an overflow
// table. The width of each entry in the link table is the maximum code
// size minus the chunk width.
//
// Note that you can do a lookup in the table even without all bits
// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
// have the property that shorter codes come before longer ones, the
// bit length estimate in the result is a lower bound on the actual
// number of bits.
//
// See the following:
// https://github.com/madler/zlib/raw/master/doc/algorithm.txt
// chunk & 15 is number of bits
// chunk >> 4 is value, including table link
const (
huffmanChunkBits = 9
huffmanNumChunks = 1 << huffmanChunkBits
huffmanCountMask = 15
huffmanValueShift = 4
)
type huffmanDecoder struct {
min int // the minimum code length
chunks [huffmanNumChunks]uint32 // chunks as described above
links [][]uint32 // overflow links
linkMask uint32 // mask the width of the link table
}
// Initialize Huffman decoding tables from array of code lengths.
// Following this function, h is guaranteed to be initialized into a complete
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
// degenerate case where the tree has only a single symbol with length 1. Empty
// trees are permitted.
func (h *huffmanDecoder) init(lengths []int) bool {
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
// development to supplement the currently ad-hoc unit tests.
const sanity = false
if h.min != 0 {
*h = huffmanDecoder{}
}
// Count number of codes of each length,
// compute min and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range lengths {
if n == 0 {
continue
}
if min == 0 || n < min {
min = n
}
if n > max {
max = n
}
count[n]++
}
// Empty tree. The decompressor.huffSym function will fail later if the tree
// is used. Technically, an empty tree is only valid for the HDIST tree and
// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
// is guaranteed to fail since it will attempt to use the tree to decode the
// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
// guaranteed to fail later since the compressed data section must be
// composed of at least one symbol (the end-of-block marker).
if max == 0 {
return true
}
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
code <<= 1
nextcode[i] = code
code += count[i]
}
// Check that the coding is complete (i.e., that we've
// assigned all 2-to-the-max possible bit sequences).
// Exception: To be compatible with zlib, we also need to
// accept degenerate single-code codings. See also
// TestDegenerateHuffmanCoding.
if code != 1<<uint(max) && !(code == 1 && max == 1) {
return false
}
h.min = min
if max > huffmanChunkBits {
numLinks := 1 << (uint(max) - huffmanChunkBits)
h.linkMask = uint32(numLinks - 1)
// create link tables
link := nextcode[huffmanChunkBits+1] >> 1
h.links = make([][]uint32, huffmanNumChunks-link)
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(bits.Reverse16(uint16(j)))
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
if sanity && h.chunks[reverse] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
h.links[off] = make([]uint32, numLinks)
}
}
for i, n := range lengths {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
chunk := uint32(i<<huffmanValueShift | n)
reverse := int(bits.Reverse16(uint16(code)))
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
// We should never need to overwrite
// an existing chunk. Also, 0 is
// never a valid chunk, because the
// lower 4 "count" bits should be
// between 1 and 15.
if sanity && h.chunks[off] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[off] = chunk
}
} else {
j := reverse & (huffmanNumChunks - 1)
if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
// Longer codes should have been
// associated with a link table above.
panic("impossible: not an indirect chunk")
}
value := h.chunks[j] >> huffmanValueShift
linktab := h.links[value]
reverse >>= huffmanChunkBits
for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
if sanity && linktab[off] != 0 {
panic("impossible: overwriting existing chunk")
}
linktab[off] = chunk
}
}
}
if sanity {
// Above we've sanity checked that we never overwrote
// an existing entry. Here we additionally check that
// we filled the tables completely.
for i, chunk := range h.chunks {
if chunk == 0 {
// As an exception, in the degenerate
// single-code case, we allow odd
// chunks to be missing.
if code == 1 && i%2 == 1 {
continue
}
panic("impossible: missing chunk")
}
}
for _, linktab := range h.links {
for _, chunk := range linktab {
if chunk == 0 {
panic("impossible: missing chunk")
}
}
}
}
return true
}
// The actual read interface needed by [NewReader].
// If the passed in io.Reader does not also have ReadByte,
// the [NewReader] will introduce its own buffering.
type Reader interface {
io.Reader
io.ByteReader
}
// Decompress state.
type decompressor struct {
// Input source.
r Reader
rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader
roffset int64
// Input bits, in top of b.
b uint32
nb uint
// Huffman decoders for literal/length, distance.
h1, h2 huffmanDecoder
// Length arrays used to define Huffman codes.
bits *[maxNumLit + maxNumDist]int
codebits *[numCodes]int
// Output history, buffer.
dict dictDecoder
// Temporary buffer (avoids repeated allocation).
buf [4]byte
// Next step in the decompression,
// and decompression state.
step func(*decompressor)
stepState int
final bool
err error
toRead []byte
hl, hd *huffmanDecoder
copyLen int
copyDist int
}
func (f *decompressor) nextBlock() {
for f.nb < 1+2 {
if f.err = f.moreBits(); f.err != nil {
return
}
}
f.final = f.b&1 == 1
f.b >>= 1
typ := f.b & 3
f.b >>= 2
f.nb -= 1 + 2
switch typ {
case 0:
f.dataBlock()
case 1:
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlock()
case 2:
// compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil {
break
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlock()
default:
// 3 is reserved.
f.err = CorruptInputError(f.roffset)
}
}
func (f *decompressor) Read(b []byte) (int, error) {
for {
if len(f.toRead) > 0 {
n := copy(b, f.toRead)
f.toRead = f.toRead[n:]
if len(f.toRead) == 0 {
return n, f.err
}
return n, nil
}
if f.err != nil {
return 0, f.err
}
f.step(f)
if f.err != nil && len(f.toRead) == 0 {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
}
}
}
func (f *decompressor) Close() error {
if f.err == io.EOF {
return nil
}
return f.err
}
// RFC 1951 section 3.2.7.
// Compression with dynamic Huffman codes
var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
func (f *decompressor) readHuffman() error {
// HLIT[5], HDIST[5], HCLEN[4].
for f.nb < 5+5+4 {
if err := f.moreBits(); err != nil {
return err
}
}
nlit := int(f.b&0x1F) + 257
if nlit > maxNumLit {
return CorruptInputError(f.roffset)
}
f.b >>= 5
ndist := int(f.b&0x1F) + 1
if ndist > maxNumDist {
return CorruptInputError(f.roffset)
}
f.b >>= 5
nclen := int(f.b&0xF) + 4
// numCodes is 19, so nclen is always valid.
f.b >>= 4
f.nb -= 5 + 5 + 4
// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
for i := 0; i < nclen; i++ {
for f.nb < 3 {
if err := f.moreBits(); err != nil {
return err
}
}
f.codebits[codeOrder[i]] = int(f.b & 0x7)
f.b >>= 3
f.nb -= 3
}
for i := nclen; i < len(codeOrder); i++ {
f.codebits[codeOrder[i]] = 0
}
if !f.h1.init(f.codebits[0:]) {
return CorruptInputError(f.roffset)
}
// HLIT + 257 code lengths, HDIST + 1 code lengths,
// using the code length Huffman code.
for i, n := 0, nlit+ndist; i < n; {
x, err := f.huffSym(&f.h1)
if err != nil {
return err
}
if x < 16 {
// Actual length.
f.bits[i] = x
i++
continue
}
// Repeat previous length or zero.
var rep int
var nb uint
var b int
switch x {
default:
return InternalError("unexpected length code")
case 16:
rep = 3
nb = 2
if i == 0 {
return CorruptInputError(f.roffset)
}
b = f.bits[i-1]
case 17:
rep = 3
nb = 3
b = 0
case 18:
rep = 11
nb = 7
b = 0
}
for f.nb < nb {
if err := f.moreBits(); err != nil {
return err
}
}
rep += int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
if i+rep > n {
return CorruptInputError(f.roffset)
}
for j := 0; j < rep; j++ {
f.bits[i] = b
i++
}
}
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
return CorruptInputError(f.roffset)
}
// As an optimization, we can initialize the min bits to read at a time
// for the HLIT tree to the length of the EOB marker since we know that
// every block must terminate with one. This preserves the property that
// we never read any extra bytes after the end of the DEFLATE stream.
if f.h1.min < f.bits[endBlockMarker] {
f.h1.min = f.bits[endBlockMarker]
}
return nil
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBlock() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
v, err := f.huffSym(f.hl)
if err != nil {
f.err = err
return
}
var n uint // number of bits extra
var length int
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = f.moreBits(); err != nil {
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = f.moreBits(); err != nil {
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = f.moreBits(); err != nil {
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
// Copy a single uncompressed data block from input to output.
func (f *decompressor) dataBlock() {
// Uncompressed.
// Discard current half-byte.
f.nb = 0
f.b = 0
// Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[0:4])
f.roffset += int64(nr)
if err != nil {
f.err = noEOF(err)
return
}
n := int(f.buf[0]) | int(f.buf[1])<<8
nn := int(f.buf[2]) | int(f.buf[3])<<8
if uint16(nn) != uint16(^n) {
f.err = CorruptInputError(f.roffset)
return
}
if n == 0 {
f.toRead = f.dict.readFlush()
f.finishBlock()
return
}
f.copyLen = n
f.copyData()
}
// copyData copies f.copyLen bytes from the underlying reader into f.hist.
// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
buf := f.dict.writeSlice()
if len(buf) > f.copyLen {
buf = buf[:f.copyLen]
}
cnt, err := io.ReadFull(f.r, buf)
f.roffset += int64(cnt)
f.copyLen -= cnt
f.dict.writeMark(cnt)
if err != nil {
f.err = noEOF(err)
return
}
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).copyData
return
}
f.finishBlock()
}
func (f *decompressor) finishBlock() {
if f.final {
if f.dict.availRead() > 0 {
f.toRead = f.dict.readFlush()
}
f.err = io.EOF
}
f.step = (*decompressor).nextBlock
}
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
func noEOF(e error) error {
if e == io.EOF {
return io.ErrUnexpectedEOF
}
return e
}
func (f *decompressor) moreBits() error {
c, err := f.r.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
// Read the next Huffman-encoded symbol from f according to h.
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(h.min)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := f.r.ReadByte()
if err != nil {
f.b = b
f.nb = nb
return 0, noEOF(err)
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := h.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
f.err = CorruptInputError(f.roffset)
return 0, f.err
}
f.b = b >> (n & 31)
f.nb = nb - n
return int(chunk >> huffmanValueShift), nil
}
}
}
func (f *decompressor) makeReader(r io.Reader) {
if rr, ok := r.(Reader); ok {
f.rBuf = nil
f.r = rr
return
}
// Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor.
if f.rBuf != nil {
f.rBuf.Reset(r)
} else {
// bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader.
f.rBuf = bufio.NewReader(r)
}
f.r = f.rBuf
}
func fixedHuffmanDecoderInit() {
fixedOnce.Do(func() {
// These come from the RFC section 3.2.6.
var bits [288]int
for i := 0; i < 144; i++ {
bits[i] = 8
}
for i := 144; i < 256; i++ {
bits[i] = 9
}
for i := 256; i < 280; i++ {
bits[i] = 7
}
for i := 280; i < 288; i++ {
bits[i] = 8
}
fixedHuffmanDecoder.init(bits[:])
})
}
func (f *decompressor) Reset(r io.Reader, dict []byte) error {
*f = decompressor{
rBuf: f.rBuf,
bits: f.bits,
codebits: f.codebits,
dict: f.dict,
step: (*decompressor).nextBlock,
}
f.makeReader(r)
f.dict.init(maxMatchOffset, dict)
return nil
}
// NewReader returns a new ReadCloser that can be used
// to read the uncompressed version of r.
// If r does not also implement [io.ByteReader],
// the decompressor may read more data than necessary from r.
// The reader returns [io.EOF] after the final block in the DEFLATE stream has
// been encountered. Any trailing data after the final block is ignored.
//
// The [io.ReadCloser] returned by NewReader also implements [Resetter].
func NewReader(r io.Reader) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
f.makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.dict.init(maxMatchOffset, nil)
return &f
}
// NewReaderDict is like [NewReader] but initializes the reader
// with a preset dictionary. The returned [Reader] behaves as if
// the uncompressed data stream started with the given dictionary,
// which has already been read. NewReaderDict is typically used
// to read data compressed by NewWriterDict.
//
// The ReadCloser returned by NewReaderDict also implements [Resetter].
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
f.makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.dict.init(maxMatchOffset, dict)
return &f
}

View File

@@ -0,0 +1,137 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"bufio"
"bytes"
"io"
"strings"
"testing"
)
func TestReset(t *testing.T) {
ss := []string{
"lorem ipsum izzle fo rizzle",
"the quick brown fox jumped over",
}
deflated := make([]bytes.Buffer, 2)
for i, s := range ss {
w, _ := NewWriter(&deflated[i], 1)
w.Write([]byte(s))
w.Close()
}
inflated := make([]bytes.Buffer, 2)
f := NewReader(&deflated[0])
io.Copy(&inflated[0], f)
f.(Resetter).Reset(&deflated[1], nil)
io.Copy(&inflated[1], f)
f.Close()
for i, s := range ss {
if s != inflated[i].String() {
t.Errorf("inflated[%d]:\ngot %q\nwant %q", i, inflated[i], s)
}
}
}
func TestReaderTruncated(t *testing.T) {
vectors := []struct{ input, output string }{
{"\x00", ""},
{"\x00\f", ""},
{"\x00\f\x00", ""},
{"\x00\f\x00\xf3\xff", ""},
{"\x00\f\x00\xf3\xffhello", "hello"},
{"\x00\f\x00\xf3\xffhello, world", "hello, world"},
{"\x02", ""},
{"\xf2H\xcd", "He"},
{"\xf2H͙0a\u0084\t", "Hel\x90\x90\x90\x90\x90"},
{"\xf2H͙0a\u0084\t\x00", "Hel\x90\x90\x90\x90\x90"},
}
for i, v := range vectors {
r := strings.NewReader(v.input)
zr := NewReader(r)
b, err := io.ReadAll(zr)
if err != io.ErrUnexpectedEOF {
t.Errorf("test %d, error mismatch: got %v, want io.ErrUnexpectedEOF", i, err)
}
if string(b) != v.output {
t.Errorf("test %d, output mismatch: got %q, want %q", i, b, v.output)
}
}
}
func TestResetDict(t *testing.T) {
dict := []byte("the lorem fox")
ss := []string{
"lorem ipsum izzle fo rizzle",
"the quick brown fox jumped over",
}
deflated := make([]bytes.Buffer, len(ss))
for i, s := range ss {
w, _ := NewWriterDict(&deflated[i], DefaultCompression, dict)
w.Write([]byte(s))
w.Close()
}
inflated := make([]bytes.Buffer, len(ss))
f := NewReader(nil)
for i := range inflated {
f.(Resetter).Reset(&deflated[i], dict)
io.Copy(&inflated[i], f)
}
f.Close()
for i, s := range ss {
if s != inflated[i].String() {
t.Errorf("inflated[%d]:\ngot %q\nwant %q", i, inflated[i], s)
}
}
}
func TestReaderReusesReaderBuffer(t *testing.T) {
encodedReader := bytes.NewReader([]byte{})
encodedNotByteReader := struct{ io.Reader }{encodedReader}
t.Run("BufferIsReused", func(t *testing.T) {
f := NewReader(encodedNotByteReader).(*decompressor)
bufioR, ok := f.r.(*bufio.Reader)
if !ok {
t.Fatalf("bufio.Reader should be created")
}
f.Reset(encodedNotByteReader, nil)
if bufioR != f.r {
t.Fatalf("bufio.Reader was not reused")
}
})
t.Run("BufferIsNotReusedWhenGotByteReader", func(t *testing.T) {
f := NewReader(encodedNotByteReader).(*decompressor)
if _, ok := f.r.(*bufio.Reader); !ok {
t.Fatalf("bufio.Reader should be created")
}
f.Reset(encodedReader, nil)
if f.r != encodedReader {
t.Fatalf("provided io.ByteReader should be used directly")
}
})
t.Run("BufferIsCreatedAfterByteReader", func(t *testing.T) {
for i, r := range []io.Reader{encodedReader, bufio.NewReader(encodedReader)} {
f := NewReader(r).(*decompressor)
if f.r != r {
t.Fatalf("provided io.ByteReader should be used directly, i=%d", i)
}
f.Reset(encodedNotByteReader, nil)
if _, ok := f.r.(*bufio.Reader); !ok {
t.Fatalf("bufio.Reader should be created, i=%d", i)
}
}
})
}

View File

@@ -0,0 +1,98 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"bytes"
"io"
"os"
"runtime"
"strings"
"testing"
)
func TestNlitOutOfRange(t *testing.T) {
// Trying to decode this bogus flate data, which has a Huffman table
// with nlit=288, should not panic.
io.Copy(io.Discard, NewReader(strings.NewReader(
"\xfc\xfe\x36\xe7\x5e\x1c\xef\xb3\x55\x58\x77\xb6\x56\xb5\x43\xf4"+
"\x6f\xf2\xd2\xe6\x3d\x99\xa0\x85\x8c\x48\xeb\xf8\xda\x83\x04\x2a"+
"\x75\xc4\xf8\x0f\x12\x11\xb9\xb4\x4b\x09\xa0\xbe\x8b\x91\x4c")))
}
var suites = []struct{ name, file string }{
// Digits is the digits of the irrational number e. Its decimal representation
// does not repeat, but there are only 10 possible digits, so it should be
// reasonably compressible.
{"Digits", "../testdata/e.txt"},
// Newton is Isaac Newtons's educational text on Opticks.
{"Newton", "../../testdata/Isaac.Newton-Opticks.txt"},
}
func BenchmarkDecode(b *testing.B) {
doBench(b, func(b *testing.B, buf0 []byte, level, n int) {
b.ReportAllocs()
b.StopTimer()
b.SetBytes(int64(n))
compressed := new(bytes.Buffer)
w, err := NewWriter(compressed, level)
if err != nil {
b.Fatal(err)
}
for i := 0; i < n; i += len(buf0) {
if len(buf0) > n-i {
buf0 = buf0[:n-i]
}
io.Copy(w, bytes.NewReader(buf0))
}
w.Close()
buf1 := compressed.Bytes()
buf0, compressed, w = nil, nil, nil
runtime.GC()
b.StartTimer()
for i := 0; i < b.N; i++ {
io.Copy(io.Discard, NewReader(bytes.NewReader(buf1)))
}
})
}
var levelTests = []struct {
name string
level int
}{
{"Huffman", HuffmanOnly},
{"Speed", BestSpeed},
{"Default", DefaultCompression},
{"Compression", BestCompression},
}
var sizes = []struct {
name string
n int
}{
{"1e4", 1e4},
{"1e5", 1e5},
{"1e6", 1e6},
}
func doBench(b *testing.B, f func(b *testing.B, buf []byte, level, n int)) {
for _, suite := range suites {
buf, err := os.ReadFile(suite.file)
if err != nil {
b.Fatal(err)
}
if len(buf) == 0 {
b.Fatalf("test file %q has no data", suite.file)
}
for _, l := range levelTests {
for _, s := range sizes {
b.Run(suite.name+"/"+l.name+"/"+s.name, func(b *testing.B) {
f(b, buf, l.level, s.n)
})
}
}
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420198938095257201065485863278865936153381827968230301952035301852968995773622599413891249721775283479131515574857242454150695950829533116861727855889075098381754637464939319255060400927701671139009848824012858361603563707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104752162056966024058038150193511253382430035587640247496473263914199272604269922796782354781636009341721641219924586315030286182974555706749838505494588586926995690927210797509302955321165344987202755960236480665499119881834797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548161361157352552133475741849468438523323907394143334547762416862518983569485562099219222184272550254256887671790494601653466804988627232791786085784383827967976681454100953883786360950680064225125205117392984896084128488626945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645995813390478027590099465764078951269468398352595709825822620522489407726719478268482601476990902640136394437455305068203496252451749399651431429809190659250937221696461515709858387410597885959772975498930161753928468138268683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244136549762780797715691435997700129616089441694868555848406353422072225828488648158456028506016842739452267467678895252138522549954666727823986456596116354886230577456498035593634568174324112515076069479451096596094025228879710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821682998948722658804857564014270477555132379641451523746234364542858444795265867821051141354735739523113427166102135969536231442952484937187110145765403590279934403742007310578539062198387447808478489683321445713868751943506430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675142691239748940907186494231961567945208095146550225231603881930142093762137855956638937787083039069792077346722182562599661501421503068038447734549202605414665925201497442850732518666002132434088190710486331734649651453905796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007230558763176359421873125147120532928191826186125867321579198414848829164470609575270695722091756711672291098169091528017350671274858322287183520935396572512108357915136988209144421006751033467110314126711136990865851639831501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064204675259070915481416549859461637180

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,4 @@
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
<EFBFBD><EFBFBD><EFBFBD>vH
<EFBFBD><EFBFBD>%<25><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><17>ɷ<EFBFBD><C9B7><06>}<18><>><07><><EFBFBD>ls<6C><73><EFBFBD>m<EFBFBD>IGH<15><><EFBFBD><EFBFBD>1Y<31>4<EFBFBD>[<5B><> 0ˆ[|]o#<23>
<EFBFBD>-#<23><><EFBFBD>ul<><6C><EFBFBD>pf<70><66>ٱ<EFBFBD>n<EFBFBD>Y<EFBFBD>ԀY<D480>w<EFBFBD>C8ɯ02<30> F=gn<67>r<EFBFBD>N!O<><4F><EFBFBD>{<04><><03><05>k<EFBFBD>*<2A>w(<28><>b<EFBFBD> <20><1F>kQC9/<2F><>lu><3E>5<EFBFBD>C.<2E><>u<EFBFBD><75><EFBFBD>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,2 @@
101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010
232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,14 @@
//Copyright2009ThGoAuthor.Allrightrrvd.
//UofthiourccodigovrndbyBSD-tyl
//licnthtcnbfoundinthLICENSEfil.
pckgmin
import"o"
funcmin(){
vrb=mk([]byt,65535)
f,_:=o.Crt("huffmn-null-mx.in")
f.Writ(b)
}
ABCDEFGHIJKLMNOPQRSTUVXxyz!"#¤%&/?"

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
<1C>_K<5F>0<1C><><EFBFBD><05><0E><>`K<><4B>0Aasě)^<5E>H<EFBFBD><48><EFBFBD><EFBFBD><EFBFBD>Iɟb߻<><DFBB>_><3E>4

View File

@@ -0,0 +1 @@
<1C>_K<5F>0<1C><><EFBFBD><05><0E><>`K<><4B>0Aasě)^<5E>H<EFBFBD><48><EFBFBD><EFBFBD><EFBFBD>Iɟb߻<><DFBB>_><3E>4

View File

@@ -0,0 +1,3 @@
<04>AK<41>0<07><>x<>ß<EFBFBD>Z<EFBFBD><5A><EFBFBD><EFBFBD>LP<4C>a<EFBFBD>!<21>x<EFBFBD><78>AD<41><44>I<13>&#I<>E<EFBFBD><45><EFBFBD><EFBFBD><06>p]<5D>Lƿ<4C><C6BF><16>F<EFBFBD>p<><70> 1<>88<38>h<07><13>$<24><><EFBFBD>5S<35><53>- <09>F66!<21>)v<>.<2E><02>0<EFBFBD>Y<EFBFBD><59><1E><02><><EFBFBD><EFBFBD>&<26><> S<><53><EFBFBD>N|d<>2:<3A><>
t<EFBFBD>|<><EB918D><EFBFBD>xz9<7A><39><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><04><><EFBFBD><EFBFBD>Ɏ<EFBFBD>3<><33>
&&=<3D><0E><><EFBFBD><EFBFBD><EFBFBD>ô<EFBFBD>UD<55>=Fu<46><75><EFBFBD><EFBFBD>]<5D><>q<EFBFBD><71><EFBFBD><EFBFBD>UL+<2B><><17><><08>>FQY<51><59>LZ<4C><5A>o<EFBFBD><6F><EFBFBD>fTߵ<54><45><C5B4>{<7B>Yʶb<CAB6>e<EFBFBD>

View File

@@ -0,0 +1,13 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import "os"
func main() {
var b = make([]byte, 65535)
f, _ := os.Create("huffman-null-max.in")
f.Write(b)
}

View File

@@ -0,0 +1 @@
<1C>_K<5F>0<1C><><EFBFBD><05><0E><>`K<><4B>0Aasě)^<5E>H<EFBFBD><48><EFBFBD><EFBFBD><EFBFBD>Iɟb߻<><DFBB>_><3E>4

View File

@@ -0,0 +1 @@
<1C>_K<5F>0<1C><><EFBFBD><05><0E><>`K<><4B>0Aasě)^<5E>H<EFBFBD><48><EFBFBD><EFBFBD><EFBFBD>Iɟb߻<><DFBB>_><3E>4

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,97 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
const (
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
// 8 bits: xlength = length - MIN_MATCH_LENGTH
// 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
literalType = 0 << 30
matchType = 1 << 30
)
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
// is lengthCodes[length - MIN_MATCH_LENGTH]
var lengthCodes = [...]uint32{
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 28,
}
var offsetCodes = [...]uint32{
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
}
type token uint32
// Convert a literal into a literal token.
func literalToken(literal uint32) token { return token(literalType + literal) }
// Convert a < xlength, xoffset > pair into a match token.
func matchToken(xlength uint32, xoffset uint32) token {
return token(matchType + xlength<<lengthShift + xoffset)
}
// Returns the literal of a literal token.
func (t token) literal() uint32 { return uint32(t - literalType) }
// Returns the extra offset of a match token.
func (t token) offset() uint32 { return uint32(t) & offsetMask }
func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) }
func lengthCode(len uint32) uint32 { return lengthCodes[len] }
// Returns the offset code corresponding to a specific offset.
func offsetCode(off uint32) uint32 {
if off < uint32(len(offsetCodes)) {
return offsetCodes[off]
}
if off>>7 < uint32(len(offsetCodes)) {
return offsetCodes[off>>7] + 14
}
return offsetCodes[off>>14] + 28
}

View File

@@ -0,0 +1,237 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"bytes"
"fmt"
"io"
"math/rand"
"runtime"
"testing"
)
func BenchmarkEncode(b *testing.B) {
doBench(b, func(b *testing.B, buf0 []byte, level, n int) {
b.StopTimer()
b.SetBytes(int64(n))
buf1 := make([]byte, n)
for i := 0; i < n; i += len(buf0) {
if len(buf0) > n-i {
buf0 = buf0[:n-i]
}
copy(buf1[i:], buf0)
}
buf0 = nil
w, err := NewWriter(io.Discard, level)
if err != nil {
b.Fatal(err)
}
runtime.GC()
b.StartTimer()
for i := 0; i < b.N; i++ {
w.Reset(io.Discard)
w.Write(buf1)
w.Close()
}
})
}
// errorWriter is a writer that fails after N writes.
type errorWriter struct {
N int
}
func (e *errorWriter) Write(b []byte) (int, error) {
if e.N <= 0 {
return 0, io.ErrClosedPipe
}
e.N--
return len(b), nil
}
// Test if errors from the underlying writer is passed upwards.
func TestWriteError(t *testing.T) {
t.Parallel()
buf := new(bytes.Buffer)
n := 65536
if !testing.Short() {
n *= 4
}
for i := 0; i < n; i++ {
fmt.Fprintf(buf, "asdasfasf%d%dfghfgujyut%dyutyu\n", i, i, i)
}
in := buf.Bytes()
// We create our own buffer to control number of writes.
copyBuffer := make([]byte, 128)
for l := 0; l < 10; l++ {
for fail := 1; fail <= 256; fail *= 2 {
// Fail after 'fail' writes
ew := &errorWriter{N: fail}
w, err := NewWriter(ew, l)
if err != nil {
t.Fatalf("NewWriter: level %d: %v", l, err)
}
n, err := io.CopyBuffer(w, struct{ io.Reader }{bytes.NewBuffer(in)}, copyBuffer)
if err == nil {
t.Fatalf("Level %d: Expected an error, writer was %#v", l, ew)
}
n2, err := w.Write([]byte{1, 2, 2, 3, 4, 5})
if n2 != 0 {
t.Fatal("Level", l, "Expected 0 length write, got", n)
}
if err == nil {
t.Fatal("Level", l, "Expected an error")
}
err = w.Flush()
if err == nil {
t.Fatal("Level", l, "Expected an error on flush")
}
err = w.Close()
if err == nil {
t.Fatal("Level", l, "Expected an error on close")
}
w.Reset(io.Discard)
n2, err = w.Write([]byte{1, 2, 3, 4, 5, 6})
if err != nil {
t.Fatal("Level", l, "Got unexpected error after reset:", err)
}
if n2 == 0 {
t.Fatal("Level", l, "Got 0 length write, expected > 0")
}
if testing.Short() {
return
}
}
}
}
// Test if two runs produce identical results
// even when writing different sizes to the Writer.
func TestDeterministic(t *testing.T) {
t.Parallel()
for i := 0; i <= 9; i++ {
t.Run(fmt.Sprint("L", i), func(t *testing.T) { testDeterministic(i, t) })
}
t.Run("LM2", func(t *testing.T) { testDeterministic(-2, t) })
}
func testDeterministic(i int, t *testing.T) {
t.Parallel()
// Test so much we cross a good number of block boundaries.
var length = maxStoreBlockSize*30 + 500
if testing.Short() {
length /= 10
}
// Create a random, but compressible stream.
rng := rand.New(rand.NewSource(1))
t1 := make([]byte, length)
for i := range t1 {
t1[i] = byte(rng.Int63() & 7)
}
// Do our first encode.
var b1 bytes.Buffer
br := bytes.NewBuffer(t1)
w, err := NewWriter(&b1, i)
if err != nil {
t.Fatal(err)
}
// Use a very small prime sized buffer.
cbuf := make([]byte, 787)
_, err = io.CopyBuffer(w, struct{ io.Reader }{br}, cbuf)
if err != nil {
t.Fatal(err)
}
w.Close()
// We choose a different buffer size,
// bigger than a maximum block, and also a prime.
var b2 bytes.Buffer
cbuf = make([]byte, 81761)
br2 := bytes.NewBuffer(t1)
w2, err := NewWriter(&b2, i)
if err != nil {
t.Fatal(err)
}
_, err = io.CopyBuffer(w2, struct{ io.Reader }{br2}, cbuf)
if err != nil {
t.Fatal(err)
}
w2.Close()
b1b := b1.Bytes()
b2b := b2.Bytes()
if !bytes.Equal(b1b, b2b) {
t.Errorf("level %d did not produce deterministic result, result mismatch, len(a) = %d, len(b) = %d", i, len(b1b), len(b2b))
}
}
// TestDeflateFast_Reset will test that encoding is consistent
// across a warparound of the table offset.
// See https://github.com/golang/go/issues/34121
func TestDeflateFast_Reset(t *testing.T) {
buf := new(bytes.Buffer)
n := 65536
for i := 0; i < n; i++ {
fmt.Fprintf(buf, "asdfasdfasdfasdf%d%dfghfgujyut%dyutyu\n", i, i, i)
}
// This is specific to level 1.
const level = 1
in := buf.Bytes()
offset := 1
if testing.Short() {
offset = 256
}
// We do an encode with a clean buffer to compare.
var want bytes.Buffer
w, err := NewWriter(&want, level)
if err != nil {
t.Fatalf("NewWriter: level %d: %v", level, err)
}
// Output written 3 times.
w.Write(in)
w.Write(in)
w.Write(in)
w.Close()
for ; offset <= 256; offset *= 2 {
w, err := NewWriter(io.Discard, level)
if err != nil {
t.Fatalf("NewWriter: level %d: %v", level, err)
}
// Reset until we are right before the wraparound.
// Each reset adds maxMatchOffset to the offset.
for i := 0; i < (bufferReset-len(in)-offset-maxMatchOffset)/maxMatchOffset; i++ {
// skip ahead to where we are close to wrap around...
w.d.reset(nil)
}
var got bytes.Buffer
w.Reset(&got)
// Write 3 times, close.
for i := 0; i < 3; i++ {
_, err = w.Write(in)
if err != nil {
t.Fatal(err)
}
}
err = w.Close()
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(got.Bytes(), want.Bytes()) {
t.Fatalf("output did not match at wraparound, len(want) = %d, len(got) = %d", want.Len(), got.Len())
}
}
}

View File

@@ -0,0 +1,218 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzip_test
import (
"bytes"
"compress/gzip"
"fmt"
"io"
"log"
"net/http"
"net/http/httptest"
"os"
"strings"
"time"
)
func Example_writerReader() {
var buf bytes.Buffer
zw := gzip.NewWriter(&buf)
// Setting the Header fields is optional.
zw.Name = "a-new-hope.txt"
zw.Comment = "an epic space opera by George Lucas"
zw.ModTime = time.Date(1977, time.May, 25, 0, 0, 0, 0, time.UTC)
_, err := zw.Write([]byte("A long time ago in a galaxy far, far away..."))
if err != nil {
log.Fatal(err)
}
if err := zw.Close(); err != nil {
log.Fatal(err)
}
zr, err := gzip.NewReader(&buf)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Name: %s\nComment: %s\nModTime: %s\n\n", zr.Name, zr.Comment, zr.ModTime.UTC())
if _, err := io.Copy(os.Stdout, zr); err != nil {
log.Fatal(err)
}
if err := zr.Close(); err != nil {
log.Fatal(err)
}
// Output:
// Name: a-new-hope.txt
// Comment: an epic space opera by George Lucas
// ModTime: 1977-05-25 00:00:00 +0000 UTC
//
// A long time ago in a galaxy far, far away...
}
func ExampleReader_Multistream() {
var buf bytes.Buffer
zw := gzip.NewWriter(&buf)
var files = []struct {
name string
comment string
modTime time.Time
data string
}{
{"file-1.txt", "file-header-1", time.Date(2006, time.February, 1, 3, 4, 5, 0, time.UTC), "Hello Gophers - 1"},
{"file-2.txt", "file-header-2", time.Date(2007, time.March, 2, 4, 5, 6, 1, time.UTC), "Hello Gophers - 2"},
}
for _, file := range files {
zw.Name = file.name
zw.Comment = file.comment
zw.ModTime = file.modTime
if _, err := zw.Write([]byte(file.data)); err != nil {
log.Fatal(err)
}
if err := zw.Close(); err != nil {
log.Fatal(err)
}
zw.Reset(&buf)
}
zr, err := gzip.NewReader(&buf)
if err != nil {
log.Fatal(err)
}
for {
zr.Multistream(false)
fmt.Printf("Name: %s\nComment: %s\nModTime: %s\n\n", zr.Name, zr.Comment, zr.ModTime.UTC())
if _, err := io.Copy(os.Stdout, zr); err != nil {
log.Fatal(err)
}
fmt.Print("\n\n")
err = zr.Reset(&buf)
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
}
if err := zr.Close(); err != nil {
log.Fatal(err)
}
// Output:
// Name: file-1.txt
// Comment: file-header-1
// ModTime: 2006-02-01 03:04:05 +0000 UTC
//
// Hello Gophers - 1
//
// Name: file-2.txt
// Comment: file-header-2
// ModTime: 2007-03-02 04:05:06 +0000 UTC
//
// Hello Gophers - 2
}
func Example_compressingReader() {
// This is an example of writing a compressing reader.
// This can be useful for an HTTP client body, as shown.
const testdata = "the data to be compressed"
// This HTTP handler is just for testing purposes.
handler := http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
zr, err := gzip.NewReader(req.Body)
if err != nil {
log.Fatal(err)
}
// Just output the data for the example.
if _, err := io.Copy(os.Stdout, zr); err != nil {
log.Fatal(err)
}
})
ts := httptest.NewServer(handler)
defer ts.Close()
// The remainder is the example code.
// The data we want to compress, as an io.Reader
dataReader := strings.NewReader(testdata)
// bodyReader is the body of the HTTP request, as an io.Reader.
// httpWriter is the body of the HTTP request, as an io.Writer.
bodyReader, httpWriter := io.Pipe()
// Make sure that bodyReader is always closed, so that the
// goroutine below will always exit.
defer bodyReader.Close()
// gzipWriter compresses data to httpWriter.
gzipWriter := gzip.NewWriter(httpWriter)
// errch collects any errors from the writing goroutine.
errch := make(chan error, 1)
go func() {
defer close(errch)
sentErr := false
sendErr := func(err error) {
if !sentErr {
errch <- err
sentErr = true
}
}
// Copy our data to gzipWriter, which compresses it to
// gzipWriter, which feeds it to bodyReader.
if _, err := io.Copy(gzipWriter, dataReader); err != nil && err != io.ErrClosedPipe {
sendErr(err)
}
if err := gzipWriter.Close(); err != nil && err != io.ErrClosedPipe {
sendErr(err)
}
if err := httpWriter.Close(); err != nil && err != io.ErrClosedPipe {
sendErr(err)
}
}()
// Send an HTTP request to the test server.
req, err := http.NewRequest("PUT", ts.URL, bodyReader)
if err != nil {
log.Fatal(err)
}
// Note that passing req to http.Client.Do promises that it
// will close the body, in this case bodyReader.
resp, err := ts.Client().Do(req)
if err != nil {
log.Fatal(err)
}
// Check whether there was an error compressing the data.
if err := <-errch; err != nil {
log.Fatal(err)
}
// For this example we don't care about the response.
resp.Body.Close()
// Output: the data to be compressed
}

View File

@@ -0,0 +1,92 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzip
import (
"bytes"
"encoding/base64"
"io"
"os"
"path/filepath"
"strings"
"testing"
)
func FuzzReader(f *testing.F) {
inp := []byte("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.")
for _, level := range []int{BestSpeed, BestCompression, DefaultCompression, HuffmanOnly} {
b := bytes.NewBuffer(nil)
w, err := NewWriterLevel(b, level)
if err != nil {
f.Fatalf("failed to construct writer: %s", err)
}
_, err = w.Write(inp)
if err != nil {
f.Fatalf("failed to write: %s", err)
}
f.Add(b.Bytes())
}
testdata, err := os.ReadDir("testdata")
if err != nil {
f.Fatalf("failed to read testdata directory: %s", err)
}
for _, de := range testdata {
if de.IsDir() {
continue
}
b, err := os.ReadFile(filepath.Join("testdata", de.Name()))
if err != nil {
f.Fatalf("failed to read testdata: %s", err)
}
// decode any base64 encoded test files
if strings.HasPrefix(de.Name(), ".base64") {
b, err = base64.StdEncoding.DecodeString(string(b))
if err != nil {
f.Fatalf("failed to decode base64 testdata: %s", err)
}
}
f.Add(b)
}
f.Fuzz(func(t *testing.T, b []byte) {
for _, multistream := range []bool{true, false} {
r, err := NewReader(bytes.NewBuffer(b))
if err != nil {
continue
}
r.Multistream(multistream)
decompressed := bytes.NewBuffer(nil)
if _, err := io.Copy(decompressed, r); err != nil {
continue
}
if err := r.Close(); err != nil {
continue
}
for _, level := range []int{NoCompression, BestSpeed, BestCompression, DefaultCompression, HuffmanOnly} {
w, err := NewWriterLevel(io.Discard, level)
if err != nil {
t.Fatalf("failed to construct writer: %s", err)
}
_, err = w.Write(decompressed.Bytes())
if err != nil {
t.Fatalf("failed to write: %s", err)
}
if err := w.Flush(); err != nil {
t.Fatalf("failed to flush: %s", err)
}
if err := w.Close(); err != nil {
t.Fatalf("failed to close: %s", err)
}
}
}
})
}

290
src/compress/gzip/gunzip.go Normal file
View File

@@ -0,0 +1,290 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package gzip implements reading and writing of gzip format compressed files,
// as specified in RFC 1952.
package gzip
import (
"bufio"
"compress/flate"
"encoding/binary"
"errors"
"hash/crc32"
"io"
"time"
)
const (
gzipID1 = 0x1f
gzipID2 = 0x8b
gzipDeflate = 8
flagText = 1 << 0
flagHdrCrc = 1 << 1
flagExtra = 1 << 2
flagName = 1 << 3
flagComment = 1 << 4
)
var (
// ErrChecksum is returned when reading GZIP data that has an invalid checksum.
ErrChecksum = errors.New("gzip: invalid checksum")
// ErrHeader is returned when reading GZIP data that has an invalid header.
ErrHeader = errors.New("gzip: invalid header")
)
var le = binary.LittleEndian
// noEOF converts io.EOF to io.ErrUnexpectedEOF.
func noEOF(err error) error {
if err == io.EOF {
return io.ErrUnexpectedEOF
}
return err
}
// The gzip file stores a header giving metadata about the compressed file.
// That header is exposed as the fields of the [Writer] and [Reader] structs.
//
// Strings must be UTF-8 encoded and may only contain Unicode code points
// U+0001 through U+00FF, due to limitations of the GZIP file format.
type Header struct {
Comment string // comment
Extra []byte // "extra data"
ModTime time.Time // modification time
Name string // file name
OS byte // operating system type
}
// A Reader is an [io.Reader] that can be read to retrieve
// uncompressed data from a gzip-format compressed file.
//
// In general, a gzip file can be a concatenation of gzip files,
// each with its own header. Reads from the Reader
// return the concatenation of the uncompressed data of each.
// Only the first header is recorded in the Reader fields.
//
// Gzip files store a length and checksum of the uncompressed data.
// The Reader will return an [ErrChecksum] when [Reader.Read]
// reaches the end of the uncompressed data if it does not
// have the expected length or checksum. Clients should treat data
// returned by [Reader.Read] as tentative until they receive the [io.EOF]
// marking the end of the data.
type Reader struct {
Header // valid after NewReader or Reader.Reset
r flate.Reader
decompressor io.ReadCloser
digest uint32 // CRC-32, IEEE polynomial (section 8)
size uint32 // Uncompressed size (section 2.3.1)
buf [512]byte
err error
multistream bool
}
// NewReader creates a new [Reader] reading the given reader.
// If r does not also implement [io.ByteReader],
// the decompressor may read more data than necessary from r.
//
// It is the caller's responsibility to call Close on the [Reader] when done.
//
// The [Reader.Header] fields will be valid in the [Reader] returned.
func NewReader(r io.Reader) (*Reader, error) {
z := new(Reader)
if err := z.Reset(r); err != nil {
return nil, err
}
return z, nil
}
// Reset discards the [Reader] z's state and makes it equivalent to the
// result of its original state from [NewReader], but reading from r instead.
// This permits reusing a [Reader] rather than allocating a new one.
func (z *Reader) Reset(r io.Reader) error {
*z = Reader{
decompressor: z.decompressor,
multistream: true,
}
if rr, ok := r.(flate.Reader); ok {
z.r = rr
} else {
z.r = bufio.NewReader(r)
}
z.Header, z.err = z.readHeader()
return z.err
}
// Multistream controls whether the reader supports multistream files.
//
// If enabled (the default), the [Reader] expects the input to be a sequence
// of individually gzipped data streams, each with its own header and
// trailer, ending at EOF. The effect is that the concatenation of a sequence
// of gzipped files is treated as equivalent to the gzip of the concatenation
// of the sequence. This is standard behavior for gzip readers.
//
// Calling Multistream(false) disables this behavior; disabling the behavior
// can be useful when reading file formats that distinguish individual gzip
// data streams or mix gzip data streams with other data streams.
// In this mode, when the [Reader] reaches the end of the data stream,
// [Reader.Read] returns [io.EOF]. The underlying reader must implement [io.ByteReader]
// in order to be left positioned just after the gzip stream.
// To start the next stream, call z.Reset(r) followed by z.Multistream(false).
// If there is no next stream, z.Reset(r) will return [io.EOF].
func (z *Reader) Multistream(ok bool) {
z.multistream = ok
}
// readString reads a NUL-terminated string from z.r.
// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and
// will output a string encoded using UTF-8.
// This method always updates z.digest with the data read.
func (z *Reader) readString() (string, error) {
var err error
needConv := false
for i := 0; ; i++ {
if i >= len(z.buf) {
return "", ErrHeader
}
z.buf[i], err = z.r.ReadByte()
if err != nil {
return "", err
}
if z.buf[i] > 0x7f {
needConv = true
}
if z.buf[i] == 0 {
// Digest covers the NUL terminator.
z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1])
// Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1).
if needConv {
s := make([]rune, 0, i)
for _, v := range z.buf[:i] {
s = append(s, rune(v))
}
return string(s), nil
}
return string(z.buf[:i]), nil
}
}
}
// readHeader reads the GZIP header according to section 2.3.1.
// This method does not set z.err.
func (z *Reader) readHeader() (hdr Header, err error) {
if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil {
// RFC 1952, section 2.2, says the following:
// A gzip file consists of a series of "members" (compressed data sets).
//
// Other than this, the specification does not clarify whether a
// "series" is defined as "one or more" or "zero or more". To err on the
// side of caution, Go interprets this to mean "zero or more".
// Thus, it is okay to return io.EOF here.
return hdr, err
}
if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
return hdr, ErrHeader
}
flg := z.buf[3]
if t := int64(le.Uint32(z.buf[4:8])); t > 0 {
// Section 2.3.1, the zero value for MTIME means that the
// modified time is not set.
hdr.ModTime = time.Unix(t, 0)
}
// z.buf[8] is XFL and is currently ignored.
hdr.OS = z.buf[9]
z.digest = crc32.ChecksumIEEE(z.buf[:10])
if flg&flagExtra != 0 {
if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
return hdr, noEOF(err)
}
z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2])
data := make([]byte, le.Uint16(z.buf[:2]))
if _, err = io.ReadFull(z.r, data); err != nil {
return hdr, noEOF(err)
}
z.digest = crc32.Update(z.digest, crc32.IEEETable, data)
hdr.Extra = data
}
var s string
if flg&flagName != 0 {
if s, err = z.readString(); err != nil {
return hdr, noEOF(err)
}
hdr.Name = s
}
if flg&flagComment != 0 {
if s, err = z.readString(); err != nil {
return hdr, noEOF(err)
}
hdr.Comment = s
}
if flg&flagHdrCrc != 0 {
if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
return hdr, noEOF(err)
}
digest := le.Uint16(z.buf[:2])
if digest != uint16(z.digest) {
return hdr, ErrHeader
}
}
z.digest = 0
if z.decompressor == nil {
z.decompressor = flate.NewReader(z.r)
} else {
z.decompressor.(flate.Resetter).Reset(z.r, nil)
}
return hdr, nil
}
// Read implements [io.Reader], reading uncompressed bytes from its underlying [Reader].
func (z *Reader) Read(p []byte) (n int, err error) {
if z.err != nil {
return 0, z.err
}
for n == 0 {
n, z.err = z.decompressor.Read(p)
z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n])
z.size += uint32(n)
if z.err != io.EOF {
// In the normal case we return here.
return n, z.err
}
// Finished file; check checksum and size.
if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil {
z.err = noEOF(err)
return n, z.err
}
digest := le.Uint32(z.buf[:4])
size := le.Uint32(z.buf[4:8])
if digest != z.digest || size != z.size {
z.err = ErrChecksum
return n, z.err
}
z.digest, z.size = 0, 0
// File is ok; check if there is another.
if !z.multistream {
return n, io.EOF
}
z.err = nil // Remove io.EOF
if _, z.err = z.readHeader(); z.err != nil {
return n, z.err
}
}
return n, nil
}
// Close closes the [Reader]. It does not close the underlying [io.Reader].
// In order for the GZIP checksum to be verified, the reader must be
// fully consumed until the [io.EOF].
func (z *Reader) Close() error { return z.decompressor.Close() }

View File

@@ -0,0 +1,587 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzip
import (
"bytes"
"compress/flate"
"encoding/base64"
"io"
"os"
"strings"
"testing"
"time"
)
type gunzipTest struct {
name string
desc string
raw string
gzip []byte
err error
}
var gunzipTests = []gunzipTest{
{ // has 1 empty fixed-huffman block
"empty.txt",
"empty.txt",
"",
[]byte{
0x1f, 0x8b, 0x08, 0x08, 0xf7, 0x5e, 0x14, 0x4a,
0x00, 0x03, 0x65, 0x6d, 0x70, 0x74, 0x79, 0x2e,
0x74, 0x78, 0x74, 0x00, 0x03, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
nil,
},
{
"",
"empty - with no file name",
"",
[]byte{
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88,
0x00, 0xff, 0x01, 0x00, 0x00, 0xff, 0xff, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
nil,
},
{ // has 1 non-empty fixed huffman block
"hello.txt",
"hello.txt",
"hello world\n",
[]byte{
0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
0x00, 0x00,
},
nil,
},
{ // concatenation
"hello.txt",
"hello.txt x2",
"hello world\n" +
"hello world\n",
[]byte{
0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
0x00, 0x00,
0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
0x00, 0x00,
},
nil,
},
{ // has a fixed huffman block with some length-distance pairs
"shesells.txt",
"shesells.txt",
"she sells seashells by the seashore\n",
[]byte{
0x1f, 0x8b, 0x08, 0x08, 0x72, 0x66, 0x8b, 0x4a,
0x00, 0x03, 0x73, 0x68, 0x65, 0x73, 0x65, 0x6c,
0x6c, 0x73, 0x2e, 0x74, 0x78, 0x74, 0x00, 0x2b,
0xce, 0x48, 0x55, 0x28, 0x4e, 0xcd, 0xc9, 0x29,
0x06, 0x92, 0x89, 0xc5, 0x19, 0x60, 0x56, 0x52,
0xa5, 0x42, 0x09, 0x58, 0x18, 0x28, 0x90, 0x5f,
0x94, 0xca, 0x05, 0x00, 0x76, 0xb0, 0x3b, 0xeb,
0x24, 0x00, 0x00, 0x00,
},
nil,
},
{ // has dynamic huffman blocks
"gettysburg",
"gettysburg",
" Four score and seven years ago our fathers brought forth on\n" +
"this continent, a new nation, conceived in Liberty, and dedicated\n" +
"to the proposition that all men are created equal.\n" +
" Now we are engaged in a great Civil War, testing whether that\n" +
"nation, or any nation so conceived and so dedicated, can long\n" +
"endure.\n" +
" We are met on a great battle-field of that war.\n" +
" We have come to dedicate a portion of that field, as a final\n" +
"resting place for those who here gave their lives that that\n" +
"nation might live. It is altogether fitting and proper that\n" +
"we should do this.\n" +
" But, in a larger sense, we can not dedicate — we can not\n" +
"consecrate — we can not hallow — this ground.\n" +
" The brave men, living and dead, who struggled here, have\n" +
"consecrated it, far above our poor power to add or detract.\n" +
"The world will little note, nor long remember what we say here,\n" +
"but it can never forget what they did here.\n" +
" It is for us the living, rather, to be dedicated here to the\n" +
"unfinished work which they who fought here have thus far so\n" +
"nobly advanced. It is rather for us to be here dedicated to\n" +
"the great task remaining before us — that from these honored\n" +
"dead we take increased devotion to that cause for which they\n" +
"gave the last full measure of devotion —\n" +
" that we here highly resolve that these dead shall not have\n" +
"died in vain — that this nation, under God, shall have a new\n" +
"birth of freedom — and that government of the people, by the\n" +
"people, for the people, shall not perish from this earth.\n" +
"\n" +
"Abraham Lincoln, November 19, 1863, Gettysburg, Pennsylvania\n",
[]byte{
0x1f, 0x8b, 0x08, 0x08, 0xd1, 0x12, 0x2b, 0x4a,
0x00, 0x03, 0x67, 0x65, 0x74, 0x74, 0x79, 0x73,
0x62, 0x75, 0x72, 0x67, 0x00, 0x65, 0x54, 0xcd,
0x6e, 0xd4, 0x30, 0x10, 0xbe, 0xfb, 0x29, 0xe6,
0x01, 0x42, 0xa5, 0x0a, 0x09, 0xc1, 0x11, 0x90,
0x40, 0x48, 0xa8, 0xe2, 0x80, 0xd4, 0xf3, 0x24,
0x9e, 0x24, 0x56, 0xbd, 0x9e, 0xc5, 0x76, 0x76,
0x95, 0x1b, 0x0f, 0xc1, 0x13, 0xf2, 0x24, 0x7c,
0x63, 0x77, 0x9b, 0x4a, 0x5c, 0xaa, 0x6e, 0x6c,
0xcf, 0x7c, 0x7f, 0x33, 0x44, 0x5f, 0x74, 0xcb,
0x54, 0x26, 0xcd, 0x42, 0x9c, 0x3c, 0x15, 0xb9,
0x48, 0xa2, 0x5d, 0x38, 0x17, 0xe2, 0x45, 0xc9,
0x4e, 0x67, 0xae, 0xab, 0xe0, 0xf7, 0x98, 0x75,
0x5b, 0xd6, 0x4a, 0xb3, 0xe6, 0xba, 0x92, 0x26,
0x57, 0xd7, 0x50, 0x68, 0xd2, 0x54, 0x43, 0x92,
0x54, 0x07, 0x62, 0x4a, 0x72, 0xa5, 0xc4, 0x35,
0x68, 0x1a, 0xec, 0x60, 0x92, 0x70, 0x11, 0x4f,
0x21, 0xd1, 0xf7, 0x30, 0x4a, 0xae, 0xfb, 0xd0,
0x9a, 0x78, 0xf1, 0x61, 0xe2, 0x2a, 0xde, 0x55,
0x25, 0xd4, 0xa6, 0x73, 0xd6, 0xb3, 0x96, 0x60,
0xef, 0xf0, 0x9b, 0x2b, 0x71, 0x8c, 0x74, 0x02,
0x10, 0x06, 0xac, 0x29, 0x8b, 0xdd, 0x25, 0xf9,
0xb5, 0x71, 0xbc, 0x73, 0x44, 0x0f, 0x7a, 0xa5,
0xab, 0xb4, 0x33, 0x49, 0x0b, 0x2f, 0xbd, 0x03,
0xd3, 0x62, 0x17, 0xe9, 0x73, 0xb8, 0x84, 0x48,
0x8f, 0x9c, 0x07, 0xaa, 0x52, 0x00, 0x6d, 0xa1,
0xeb, 0x2a, 0xc6, 0xa0, 0x95, 0x76, 0x37, 0x78,
0x9a, 0x81, 0x65, 0x7f, 0x46, 0x4b, 0x45, 0x5f,
0xe1, 0x6d, 0x42, 0xe8, 0x01, 0x13, 0x5c, 0x38,
0x51, 0xd4, 0xb4, 0x38, 0x49, 0x7e, 0xcb, 0x62,
0x28, 0x1e, 0x3b, 0x82, 0x93, 0x54, 0x48, 0xf1,
0xd2, 0x7d, 0xe4, 0x5a, 0xa3, 0xbc, 0x99, 0x83,
0x44, 0x4f, 0x3a, 0x77, 0x36, 0x57, 0xce, 0xcf,
0x2f, 0x56, 0xbe, 0x80, 0x90, 0x9e, 0x84, 0xea,
0x51, 0x1f, 0x8f, 0xcf, 0x90, 0xd4, 0x60, 0xdc,
0x5e, 0xb4, 0xf7, 0x10, 0x0b, 0x26, 0xe0, 0xff,
0xc4, 0xd1, 0xe5, 0x67, 0x2e, 0xe7, 0xc8, 0x93,
0x98, 0x05, 0xb8, 0xa8, 0x45, 0xc0, 0x4d, 0x09,
0xdc, 0x84, 0x16, 0x2b, 0x0d, 0x9a, 0x21, 0x53,
0x04, 0x8b, 0xd2, 0x0b, 0xbd, 0xa2, 0x4c, 0xa7,
0x60, 0xee, 0xd9, 0xe1, 0x1d, 0xd1, 0xb7, 0x4a,
0x30, 0x8f, 0x63, 0xd5, 0xa5, 0x8b, 0x33, 0x87,
0xda, 0x1a, 0x18, 0x79, 0xf3, 0xe3, 0xa6, 0x17,
0x94, 0x2e, 0xab, 0x6e, 0xa0, 0xe3, 0xcd, 0xac,
0x50, 0x8c, 0xca, 0xa7, 0x0d, 0x76, 0x37, 0xd1,
0x23, 0xe7, 0x05, 0x57, 0x8b, 0xa4, 0x22, 0x83,
0xd9, 0x62, 0x52, 0x25, 0xad, 0x07, 0xbb, 0xbf,
0xbf, 0xff, 0xbc, 0xfa, 0xee, 0x20, 0x73, 0x91,
0x29, 0xff, 0x7f, 0x02, 0x71, 0x62, 0x84, 0xb5,
0xf6, 0xb5, 0x25, 0x6b, 0x41, 0xde, 0x92, 0xb7,
0x76, 0x3f, 0x91, 0x91, 0x31, 0x1b, 0x41, 0x84,
0x62, 0x30, 0x0a, 0x37, 0xa4, 0x5e, 0x18, 0x3a,
0x99, 0x08, 0xa5, 0xe6, 0x6d, 0x59, 0x22, 0xec,
0x33, 0x39, 0x86, 0x26, 0xf5, 0xab, 0x66, 0xc8,
0x08, 0x20, 0xcf, 0x0c, 0xd7, 0x47, 0x45, 0x21,
0x0b, 0xf6, 0x59, 0xd5, 0xfe, 0x5c, 0x8d, 0xaa,
0x12, 0x7b, 0x6f, 0xa1, 0xf0, 0x52, 0x33, 0x4f,
0xf5, 0xce, 0x59, 0xd3, 0xab, 0x66, 0x10, 0xbf,
0x06, 0xc4, 0x31, 0x06, 0x73, 0xd6, 0x80, 0xa2,
0x78, 0xc2, 0x45, 0xcb, 0x03, 0x65, 0x39, 0xc9,
0x09, 0xd1, 0x06, 0x04, 0x33, 0x1a, 0x5a, 0xf1,
0xde, 0x01, 0xb8, 0x71, 0x83, 0xc4, 0xb5, 0xb3,
0xc3, 0x54, 0x65, 0x33, 0x0d, 0x5a, 0xf7, 0x9b,
0x90, 0x7c, 0x27, 0x1f, 0x3a, 0x58, 0xa3, 0xd8,
0xfd, 0x30, 0x5f, 0xb7, 0xd2, 0x66, 0xa2, 0x93,
0x1c, 0x28, 0xb7, 0xe9, 0x1b, 0x0c, 0xe1, 0x28,
0x47, 0x26, 0xbb, 0xe9, 0x7d, 0x7e, 0xdc, 0x96,
0x10, 0x92, 0x50, 0x56, 0x7c, 0x06, 0xe2, 0x27,
0xb4, 0x08, 0xd3, 0xda, 0x7b, 0x98, 0x34, 0x73,
0x9f, 0xdb, 0xf6, 0x62, 0xed, 0x31, 0x41, 0x13,
0xd3, 0xa2, 0xa8, 0x4b, 0x3a, 0xc6, 0x1d, 0xe4,
0x2f, 0x8c, 0xf8, 0xfb, 0x97, 0x64, 0xf4, 0xb6,
0x2f, 0x80, 0x5a, 0xf3, 0x56, 0xe0, 0x40, 0x50,
0xd5, 0x19, 0xd0, 0x1e, 0xfc, 0xca, 0xe5, 0xc9,
0xd4, 0x60, 0x00, 0x81, 0x2e, 0xa3, 0xcc, 0xb6,
0x52, 0xf0, 0xb4, 0xdb, 0x69, 0x99, 0xce, 0x7a,
0x32, 0x4c, 0x08, 0xed, 0xaa, 0x10, 0x10, 0xe3,
0x6f, 0xee, 0x99, 0x68, 0x95, 0x9f, 0x04, 0x71,
0xb2, 0x49, 0x2f, 0x62, 0xa6, 0x5e, 0xb4, 0xef,
0x02, 0xed, 0x4f, 0x27, 0xde, 0x4a, 0x0f, 0xfd,
0xc1, 0xcc, 0xdd, 0x02, 0x8f, 0x08, 0x16, 0x54,
0xdf, 0xda, 0xca, 0xe0, 0x82, 0xf1, 0xb4, 0x31,
0x7a, 0xa9, 0x81, 0xfe, 0x90, 0xb7, 0x3e, 0xdb,
0xd3, 0x35, 0xc0, 0x20, 0x80, 0x33, 0x46, 0x4a,
0x63, 0xab, 0xd1, 0x0d, 0x29, 0xd2, 0xe2, 0x84,
0xb8, 0xdb, 0xfa, 0xe9, 0x89, 0x44, 0x86, 0x7c,
0xe8, 0x0b, 0xe6, 0x02, 0x6a, 0x07, 0x9b, 0x96,
0xd0, 0xdb, 0x2e, 0x41, 0x4c, 0xa1, 0xd5, 0x57,
0x45, 0x14, 0xfb, 0xe3, 0xa6, 0x72, 0x5b, 0x87,
0x6e, 0x0c, 0x6d, 0x5b, 0xce, 0xe0, 0x2f, 0xe2,
0x21, 0x81, 0x95, 0xb0, 0xe8, 0xb6, 0x32, 0x0b,
0xb2, 0x98, 0x13, 0x52, 0x5d, 0xfb, 0xec, 0x63,
0x17, 0x8a, 0x9e, 0x23, 0x22, 0x36, 0xee, 0xcd,
0xda, 0xdb, 0xcf, 0x3e, 0xf1, 0xc7, 0xf1, 0x01,
0x12, 0x93, 0x0a, 0xeb, 0x6f, 0xf2, 0x02, 0x15,
0x96, 0x77, 0x5d, 0xef, 0x9c, 0xfb, 0x88, 0x91,
0x59, 0xf9, 0x84, 0xdd, 0x9b, 0x26, 0x8d, 0x80,
0xf9, 0x80, 0x66, 0x2d, 0xac, 0xf7, 0x1f, 0x06,
0xba, 0x7f, 0xff, 0xee, 0xed, 0x40, 0x5f, 0xa5,
0xd6, 0xbd, 0x8c, 0x5b, 0x46, 0xd2, 0x7e, 0x48,
0x4a, 0x65, 0x8f, 0x08, 0x42, 0x60, 0xf7, 0x0f,
0xb9, 0x16, 0x0b, 0x0c, 0x1a, 0x06, 0x00, 0x00,
},
nil,
},
{ // has 1 non-empty fixed huffman block then garbage
"hello.txt",
"hello.txt + garbage",
"hello world\n",
[]byte{
0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
0x00, 0x00, 'g', 'a', 'r', 'b', 'a', 'g', 'e', '!', '!', '!',
},
ErrHeader,
},
{ // has 1 non-empty fixed huffman block not enough header
"hello.txt",
"hello.txt + garbage",
"hello world\n",
[]byte{
0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
0x00, 0x00, gzipID1,
},
io.ErrUnexpectedEOF,
},
{ // has 1 non-empty fixed huffman block but corrupt checksum
"hello.txt",
"hello.txt + corrupt checksum",
"hello world\n",
[]byte{
0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
0x02, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0c, 0x00,
0x00, 0x00,
},
ErrChecksum,
},
{ // has 1 non-empty fixed huffman block but corrupt size
"hello.txt",
"hello.txt + corrupt size",
"hello world\n",
[]byte{
0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0xff, 0x00,
0x00, 0x00,
},
ErrChecksum,
},
{
"f1l3n4m3.tXt",
"header with all fields used",
"",
[]byte{
0x1f, 0x8b, 0x08, 0x1e, 0x70, 0xf0, 0xf9, 0x4a,
0x00, 0xaa, 0x09, 0x00, 0x7a, 0x7a, 0x05, 0x00,
0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x31, 0x6c,
0x33, 0x6e, 0x34, 0x6d, 0x33, 0x2e, 0x74, 0x58,
0x74, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e,
0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e,
0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e,
0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46,
0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e,
0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e,
0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e,
0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e,
0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86,
0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e,
0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6,
0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae,
0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe,
0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde,
0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee,
0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6,
0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
0xff, 0x00, 0x92, 0xfd, 0x01, 0x00, 0x00, 0xff,
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00,
},
nil,
},
{
"",
"truncated gzip file amid raw-block",
"hello",
[]byte{
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
0x00, 0x0c, 0x00, 0xf3, 0xff, 0x68, 0x65, 0x6c, 0x6c, 0x6f,
},
io.ErrUnexpectedEOF,
},
{
"",
"truncated gzip file amid fixed-block",
"He",
[]byte{
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
0xf2, 0x48, 0xcd,
},
io.ErrUnexpectedEOF,
},
{
"hello.txt",
"gzip header with truncated name",
"hello world\n",
[]byte{
0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
0x00, 0x00,
0x1f, 0x8b, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00,
0x00, 0xff, 0x01,
},
io.ErrUnexpectedEOF,
},
{
"",
"gzip header with truncated comment",
"hello world\n",
[]byte{
0x1f, 0x8b, 0x08, 0x10, 0xc8, 0x58, 0x13, 0x4a,
0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
0x00, 0x00,
0x1f, 0x8b, 0x08, 0x10, 0x00, 0x00, 0x00, 0x00,
0x00, 0xff, 0x01,
},
io.ErrUnexpectedEOF,
},
}
func TestDecompressor(t *testing.T) {
// Keep resetting this reader.
// It is intended behavior that Reader.Reset can be called on a zero-value
// Reader and be the equivalent as if NewReader was used instead.
r1 := new(Reader)
b := new(bytes.Buffer)
for _, tt := range gunzipTests {
// Test NewReader.
in := bytes.NewReader(tt.gzip)
r2, err := NewReader(in)
if err != nil {
t.Errorf("%s: NewReader: %s", tt.desc, err)
continue
}
defer r2.Close()
if tt.name != r2.Name {
t.Errorf("%s: got name %s", tt.desc, r2.Name)
}
b.Reset()
n, err := io.Copy(b, r2)
if err != tt.err {
t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
}
s := b.String()
if s != tt.raw {
t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw)
}
// Test Reader.Reset.
in = bytes.NewReader(tt.gzip)
err = r1.Reset(in)
if err != nil {
t.Errorf("%s: Reset: %s", tt.desc, err)
continue
}
if tt.name != r1.Name {
t.Errorf("%s: got name %s", tt.desc, r1.Name)
}
b.Reset()
n, err = io.Copy(b, r1)
if err != tt.err {
t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
}
s = b.String()
if s != tt.raw {
t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw)
}
}
}
func TestIssue6550(t *testing.T) {
// Apples notarization service will recursively attempt to decompress
// files in order to find binaries to notarize. Since the service is
// unable to decompress this file, it may reject the entire toolchain. Use a
// base64-encoded version to avoid this.
// See golang.org/issue/34986
f, err := os.Open("testdata/issue6550.gz.base64")
if err != nil {
t.Fatal(err)
}
gzip, err := NewReader(base64.NewDecoder(base64.StdEncoding, f))
if err != nil {
t.Fatalf("NewReader(testdata/issue6550.gz): %v", err)
}
defer gzip.Close()
done := make(chan bool, 1)
go func() {
_, err := io.Copy(io.Discard, gzip)
if err == nil {
t.Errorf("Copy succeeded")
} else {
t.Logf("Copy failed (correctly): %v", err)
}
done <- true
}()
select {
case <-time.After(1 * time.Second):
t.Errorf("Copy hung")
case <-done:
// ok
}
}
func TestMultistreamFalse(t *testing.T) {
// Find concatenation test.
var tt gunzipTest
for _, tt = range gunzipTests {
if strings.HasSuffix(tt.desc, " x2") {
goto Found
}
}
t.Fatal("cannot find hello.txt x2 in gunzip tests")
Found:
br := bytes.NewReader(tt.gzip)
var r Reader
if err := r.Reset(br); err != nil {
t.Fatalf("first reset: %v", err)
}
// Expect two streams with "hello world\n", then real EOF.
const hello = "hello world\n"
r.Multistream(false)
data, err := io.ReadAll(&r)
if string(data) != hello || err != nil {
t.Fatalf("first stream = %q, %v, want %q, %v", string(data), err, hello, nil)
}
if err := r.Reset(br); err != nil {
t.Fatalf("second reset: %v", err)
}
r.Multistream(false)
data, err = io.ReadAll(&r)
if string(data) != hello || err != nil {
t.Fatalf("second stream = %q, %v, want %q, %v", string(data), err, hello, nil)
}
if err := r.Reset(br); err != io.EOF {
t.Fatalf("third reset: err=%v, want io.EOF", err)
}
}
func TestNilStream(t *testing.T) {
// Go liberally interprets RFC 1952 section 2.2 to mean that a gzip file
// consist of zero or more members. Thus, we test that a nil stream is okay.
_, err := NewReader(bytes.NewReader(nil))
if err != io.EOF {
t.Fatalf("NewReader(nil) on empty stream: got %v, want io.EOF", err)
}
}
func TestTruncatedStreams(t *testing.T) {
cases := []struct {
name string
data []byte
}{
{
name: "original",
data: []byte("\x1f\x8b\b\x04\x00\tn\x88\x00\xff\a\x00foo bar\xcbH\xcd\xc9\xc9\xd7Q(\xcf/\xcaI\x01\x04:r\xab\xff\f\x00\x00\x00"),
},
{
name: "truncated name",
data: []byte{
0x1f, 0x8b, 0x08, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01,
},
},
{
name: "truncated comment",
data: []byte{
0x1f, 0x8b, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01,
},
},
}
// Intentionally iterate starting with at least one byte in the stream.
for _, tc := range cases {
for i := 1; i < len(tc.data); i++ {
r, err := NewReader(strings.NewReader(string(tc.data[:i])))
if err != nil {
if err != io.ErrUnexpectedEOF {
t.Errorf("NewReader(%s-%d) on truncated stream: got %v, want %v", tc.name, i, err, io.ErrUnexpectedEOF)
}
continue
}
_, err = io.Copy(io.Discard, r)
if ferr, ok := err.(*flate.ReadError); ok {
err = ferr.Err
}
if err != io.ErrUnexpectedEOF {
t.Errorf("io.Copy(%s-%d) on truncated stream: got %v, want %v", tc.name, i, err, io.ErrUnexpectedEOF)
}
}
}
}
func TestCVE202230631(t *testing.T) {
var empty = []byte{0x1f, 0x8b, 0x08, 0x00, 0xa7, 0x8f, 0x43, 0x62, 0x00,
0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
r := bytes.NewReader(bytes.Repeat(empty, 4e6))
z, err := NewReader(r)
if err != nil {
t.Fatalf("NewReader: got %v, want nil", err)
}
// Prior to CVE-2022-30631 fix, this would cause an unrecoverable panic due
// to stack exhaustion.
_, err = z.Read(make([]byte, 10))
if err != io.EOF {
t.Errorf("Reader.Read: got %v, want %v", err, io.EOF)
}
}

250
src/compress/gzip/gzip.go Normal file
View File

@@ -0,0 +1,250 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzip
import (
"compress/flate"
"errors"
"fmt"
"hash/crc32"
"io"
"time"
)
// These constants are copied from the flate package, so that code that imports
// "compress/gzip" does not also have to import "compress/flate".
const (
NoCompression = flate.NoCompression
BestSpeed = flate.BestSpeed
BestCompression = flate.BestCompression
DefaultCompression = flate.DefaultCompression
HuffmanOnly = flate.HuffmanOnly
)
// A Writer is an io.WriteCloser.
// Writes to a Writer are compressed and written to w.
type Writer struct {
Header // written at first call to Write, Flush, or Close
w io.Writer
level int
wroteHeader bool
closed bool
buf [10]byte
compressor *flate.Writer
digest uint32 // CRC-32, IEEE polynomial (section 8)
size uint32 // Uncompressed size (section 2.3.1)
err error
}
// NewWriter returns a new [Writer].
// Writes to the returned writer are compressed and written to w.
//
// It is the caller's responsibility to call Close on the [Writer] when done.
// Writes may be buffered and not flushed until Close.
//
// Callers that wish to set the fields in Writer.Header must do so before
// the first call to Write, Flush, or Close.
func NewWriter(w io.Writer) *Writer {
z, _ := NewWriterLevel(w, DefaultCompression)
return z
}
// NewWriterLevel is like [NewWriter] but specifies the compression level instead
// of assuming [DefaultCompression].
//
// The compression level can be [DefaultCompression], [NoCompression], [HuffmanOnly]
// or any integer value between [BestSpeed] and [BestCompression] inclusive.
// The error returned will be nil if the level is valid.
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
if level < HuffmanOnly || level > BestCompression {
return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
}
z := new(Writer)
z.init(w, level)
return z, nil
}
func (z *Writer) init(w io.Writer, level int) {
compressor := z.compressor
if compressor != nil {
compressor.Reset(w)
}
*z = Writer{
Header: Header{
OS: 255, // unknown
},
w: w,
level: level,
compressor: compressor,
}
}
// Reset discards the [Writer] z's state and makes it equivalent to the
// result of its original state from [NewWriter] or [NewWriterLevel], but
// writing to w instead. This permits reusing a [Writer] rather than
// allocating a new one.
func (z *Writer) Reset(w io.Writer) {
z.init(w, z.level)
}
// writeBytes writes a length-prefixed byte slice to z.w.
func (z *Writer) writeBytes(b []byte) error {
if len(b) > 0xffff {
return errors.New("gzip.Write: Extra data is too large")
}
le.PutUint16(z.buf[:2], uint16(len(b)))
_, err := z.w.Write(z.buf[:2])
if err != nil {
return err
}
_, err = z.w.Write(b)
return err
}
// writeString writes a UTF-8 string s in GZIP's format to z.w.
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
func (z *Writer) writeString(s string) (err error) {
// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
needconv := false
for _, v := range s {
if v == 0 || v > 0xff {
return errors.New("gzip.Write: non-Latin-1 header string")
}
if v > 0x7f {
needconv = true
}
}
if needconv {
b := make([]byte, 0, len(s))
for _, v := range s {
b = append(b, byte(v))
}
_, err = z.w.Write(b)
} else {
_, err = io.WriteString(z.w, s)
}
if err != nil {
return err
}
// GZIP strings are NUL-terminated.
z.buf[0] = 0
_, err = z.w.Write(z.buf[:1])
return err
}
// Write writes a compressed form of p to the underlying [io.Writer]. The
// compressed bytes are not necessarily flushed until the [Writer] is closed.
func (z *Writer) Write(p []byte) (int, error) {
if z.err != nil {
return 0, z.err
}
var n int
// Write the GZIP header lazily.
if !z.wroteHeader {
z.wroteHeader = true
z.buf = [10]byte{0: gzipID1, 1: gzipID2, 2: gzipDeflate}
if z.Extra != nil {
z.buf[3] |= 0x04
}
if z.Name != "" {
z.buf[3] |= 0x08
}
if z.Comment != "" {
z.buf[3] |= 0x10
}
if z.ModTime.After(time.Unix(0, 0)) {
// Section 2.3.1, the zero value for MTIME means that the
// modified time is not set.
le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix()))
}
if z.level == BestCompression {
z.buf[8] = 2
} else if z.level == BestSpeed {
z.buf[8] = 4
}
z.buf[9] = z.OS
_, z.err = z.w.Write(z.buf[:10])
if z.err != nil {
return 0, z.err
}
if z.Extra != nil {
z.err = z.writeBytes(z.Extra)
if z.err != nil {
return 0, z.err
}
}
if z.Name != "" {
z.err = z.writeString(z.Name)
if z.err != nil {
return 0, z.err
}
}
if z.Comment != "" {
z.err = z.writeString(z.Comment)
if z.err != nil {
return 0, z.err
}
}
if z.compressor == nil {
z.compressor, _ = flate.NewWriter(z.w, z.level)
}
}
z.size += uint32(len(p))
z.digest = crc32.Update(z.digest, crc32.IEEETable, p)
n, z.err = z.compressor.Write(p)
return n, z.err
}
// Flush flushes any pending compressed data to the underlying writer.
//
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet. Flush does
// not return until the data has been written. If the underlying
// writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (z *Writer) Flush() error {
if z.err != nil {
return z.err
}
if z.closed {
return nil
}
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
z.err = z.compressor.Flush()
return z.err
}
// Close closes the [Writer] by flushing any unwritten data to the underlying
// [io.Writer] and writing the GZIP footer.
// It does not close the underlying [io.Writer].
func (z *Writer) Close() error {
if z.err != nil {
return z.err
}
if z.closed {
return nil
}
z.closed = true
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
z.err = z.compressor.Close()
if z.err != nil {
return z.err
}
le.PutUint32(z.buf[:4], z.digest)
le.PutUint32(z.buf[4:8], z.size)
_, z.err = z.w.Write(z.buf[:8])
return z.err
}

View File

@@ -0,0 +1,280 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzip
import (
"bufio"
"bytes"
"io"
"reflect"
"testing"
"time"
)
// TestEmpty tests that an empty payload still forms a valid GZIP stream.
func TestEmpty(t *testing.T) {
buf := new(bytes.Buffer)
if err := NewWriter(buf).Close(); err != nil {
t.Fatalf("Writer.Close: %v", err)
}
r, err := NewReader(buf)
if err != nil {
t.Fatalf("NewReader: %v", err)
}
if want := (Header{OS: 255}); !reflect.DeepEqual(r.Header, want) {
t.Errorf("Header mismatch:\ngot %#v\nwant %#v", r.Header, want)
}
b, err := io.ReadAll(r)
if err != nil {
t.Fatalf("ReadAll: %v", err)
}
if len(b) != 0 {
t.Fatalf("got %d bytes, want 0", len(b))
}
if err := r.Close(); err != nil {
t.Fatalf("Reader.Close: %v", err)
}
}
// TestRoundTrip tests that gzipping and then gunzipping is the identity
// function.
func TestRoundTrip(t *testing.T) {
buf := new(bytes.Buffer)
w := NewWriter(buf)
w.Comment = "comment"
w.Extra = []byte("extra")
w.ModTime = time.Unix(1e8, 0)
w.Name = "name"
if _, err := w.Write([]byte("payload")); err != nil {
t.Fatalf("Write: %v", err)
}
if err := w.Close(); err != nil {
t.Fatalf("Writer.Close: %v", err)
}
r, err := NewReader(buf)
if err != nil {
t.Fatalf("NewReader: %v", err)
}
b, err := io.ReadAll(r)
if err != nil {
t.Fatalf("ReadAll: %v", err)
}
if string(b) != "payload" {
t.Fatalf("payload is %q, want %q", string(b), "payload")
}
if r.Comment != "comment" {
t.Fatalf("comment is %q, want %q", r.Comment, "comment")
}
if string(r.Extra) != "extra" {
t.Fatalf("extra is %q, want %q", r.Extra, "extra")
}
if r.ModTime.Unix() != 1e8 {
t.Fatalf("mtime is %d, want %d", r.ModTime.Unix(), uint32(1e8))
}
if r.Name != "name" {
t.Fatalf("name is %q, want %q", r.Name, "name")
}
if err := r.Close(); err != nil {
t.Fatalf("Reader.Close: %v", err)
}
}
// TestLatin1 tests the internal functions for converting to and from Latin-1.
func TestLatin1(t *testing.T) {
latin1 := []byte{0xc4, 'u', 0xdf, 'e', 'r', 'u', 'n', 'g', 0}
utf8 := "Äußerung"
z := Reader{r: bufio.NewReader(bytes.NewReader(latin1))}
s, err := z.readString()
if err != nil {
t.Fatalf("readString: %v", err)
}
if s != utf8 {
t.Fatalf("read latin-1: got %q, want %q", s, utf8)
}
buf := bytes.NewBuffer(make([]byte, 0, len(latin1)))
c := Writer{w: buf}
if err = c.writeString(utf8); err != nil {
t.Fatalf("writeString: %v", err)
}
s = buf.String()
if s != string(latin1) {
t.Fatalf("write utf-8: got %q, want %q", s, string(latin1))
}
}
// TestLatin1RoundTrip tests that metadata that is representable in Latin-1
// survives a round trip.
func TestLatin1RoundTrip(t *testing.T) {
testCases := []struct {
name string
ok bool
}{
{"", true},
{"ASCII is OK", true},
{"unless it contains a NUL\x00", false},
{"no matter where \x00 occurs", false},
{"\x00\x00\x00", false},
{"Látin-1 also passes (U+00E1)", true},
{"but LĀtin Extended-A (U+0100) does not", false},
{"neither does 日本語", false},
{"invalid UTF-8 also \xffails", false},
{"\x00 as does Látin-1 with NUL", false},
}
for _, tc := range testCases {
buf := new(bytes.Buffer)
w := NewWriter(buf)
w.Name = tc.name
err := w.Close()
if (err == nil) != tc.ok {
t.Errorf("Writer.Close: name = %q, err = %v", tc.name, err)
continue
}
if !tc.ok {
continue
}
r, err := NewReader(buf)
if err != nil {
t.Errorf("NewReader: %v", err)
continue
}
_, err = io.ReadAll(r)
if err != nil {
t.Errorf("ReadAll: %v", err)
continue
}
if r.Name != tc.name {
t.Errorf("name is %q, want %q", r.Name, tc.name)
continue
}
if err := r.Close(); err != nil {
t.Errorf("Reader.Close: %v", err)
continue
}
}
}
func TestWriterFlush(t *testing.T) {
buf := new(bytes.Buffer)
w := NewWriter(buf)
w.Comment = "comment"
w.Extra = []byte("extra")
w.ModTime = time.Unix(1e8, 0)
w.Name = "name"
n0 := buf.Len()
if n0 != 0 {
t.Fatalf("buffer size = %d before writes; want 0", n0)
}
if err := w.Flush(); err != nil {
t.Fatal(err)
}
n1 := buf.Len()
if n1 == 0 {
t.Fatal("no data after first flush")
}
w.Write([]byte("x"))
n2 := buf.Len()
if n1 != n2 {
t.Fatalf("after writing a single byte, size changed from %d to %d; want no change", n1, n2)
}
if err := w.Flush(); err != nil {
t.Fatal(err)
}
n3 := buf.Len()
if n2 == n3 {
t.Fatal("Flush didn't flush any data")
}
if err := w.Close(); err != nil {
t.Fatal(err)
}
}
// Multiple gzip files concatenated form a valid gzip file.
func TestConcat(t *testing.T) {
var buf bytes.Buffer
w := NewWriter(&buf)
w.Write([]byte("hello "))
w.Close()
w = NewWriter(&buf)
w.Write([]byte("world\n"))
w.Close()
r, err := NewReader(&buf)
if err != nil {
t.Fatal(err)
}
data, err := io.ReadAll(r)
if string(data) != "hello world\n" || err != nil {
t.Fatalf("ReadAll = %q, %v, want %q, nil", data, err, "hello world")
}
}
func TestWriterReset(t *testing.T) {
buf := new(bytes.Buffer)
buf2 := new(bytes.Buffer)
z := NewWriter(buf)
msg := []byte("hello world")
z.Write(msg)
z.Close()
z.Reset(buf2)
z.Write(msg)
z.Close()
if buf.String() != buf2.String() {
t.Errorf("buf2 %q != original buf of %q", buf2.String(), buf.String())
}
}
type limitedWriter struct {
N int
}
func (l *limitedWriter) Write(p []byte) (n int, err error) {
if n := l.N; n < len(p) {
l.N = 0
return n, io.ErrShortWrite
}
l.N -= len(p)
return len(p), nil
}
// Write should never return more bytes than the input slice.
func TestLimitedWrite(t *testing.T) {
msg := []byte("a")
for lim := 2; lim < 20; lim++ {
z := NewWriter(&limitedWriter{lim})
if n, _ := z.Write(msg); n > len(msg) {
t.Errorf("Write() = %d, want %d or less", n, len(msg))
}
z.Reset(&limitedWriter{lim})
z.Header = Header{
Comment: "comment",
Extra: []byte("extra"),
ModTime: time.Now(),
Name: "name",
OS: 1,
}
if n, _ := z.Write(msg); n > len(msg) {
t.Errorf("Write() = %d, want %d or less", n, len(msg))
}
}
}

View File

@@ -0,0 +1,81 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzip
import (
"internal/testenv"
"io/fs"
"os"
"path/filepath"
"runtime"
"strings"
"testing"
)
// TestGZIPFilesHaveZeroMTimes checks that every .gz file in the tree
// has a zero MTIME. This is a requirement for the Debian maintainers
// to be able to have deterministic packages.
//
// To patch a .gz file, use the following command:
//
// $ dd if=/dev/zero bs=1 seek=4 count=4 conv=notrunc of=filename.gz
//
// See https://golang.org/issue/14937.
func TestGZIPFilesHaveZeroMTimes(t *testing.T) {
// To avoid spurious false positives due to untracked GZIP files that
// may be in the user's GOROOT (Issue 18604), we only run this test on
// the builders, which should have a clean checkout of the tree.
if testenv.Builder() == "" {
t.Skip("skipping test on non-builder")
}
if !testenv.HasSrc() {
t.Skip("skipping; no GOROOT available")
}
goroot, err := filepath.EvalSymlinks(runtime.GOROOT())
if err != nil {
t.Fatal("error evaluating GOROOT: ", err)
}
var files []string
err = filepath.WalkDir(goroot, func(path string, info fs.DirEntry, err error) error {
if err != nil {
return err
}
if !info.IsDir() && strings.HasSuffix(path, ".gz") {
files = append(files, path)
}
return nil
})
if err != nil {
if os.IsNotExist(err) {
t.Skipf("skipping: GOROOT directory not found: %s", runtime.GOROOT())
}
t.Fatal("error collecting list of .gz files in GOROOT: ", err)
}
if len(files) == 0 {
t.Fatal("expected to find some .gz files under GOROOT")
}
for _, path := range files {
checkZeroMTime(t, path)
}
}
func checkZeroMTime(t *testing.T, path string) {
f, err := os.Open(path)
if err != nil {
t.Error(err)
return
}
defer f.Close()
gz, err := NewReader(f)
if err != nil {
t.Errorf("cannot read gzip file %s: %s", path, err)
return
}
defer gz.Close()
if !gz.ModTime.IsZero() {
t.Errorf("gzip file %s has non-zero mtime (%s)", path, gz.ModTime)
}
}

File diff suppressed because one or more lines are too long

290
src/compress/lzw/reader.go Normal file
View File

@@ -0,0 +1,290 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package lzw implements the Lempel-Ziv-Welch compressed data format,
// described in T. A. Welch, “A Technique for High-Performance Data
// Compression”, Computer, 17(6) (June 1984), pp 8-19.
//
// In particular, it implements LZW as used by the GIF and PDF file
// formats, which means variable-width codes up to 12 bits and the first
// two non-literal codes are a clear code and an EOF code.
//
// The TIFF file format uses a similar but incompatible version of the LZW
// algorithm. See the golang.org/x/image/tiff/lzw package for an
// implementation.
package lzw
// TODO(nigeltao): check that PDF uses LZW in the same way as GIF,
// modulo LSB/MSB packing order.
import (
"bufio"
"errors"
"fmt"
"io"
)
// Order specifies the bit ordering in an LZW data stream.
type Order int
const (
// LSB means Least Significant Bits first, as used in the GIF file format.
LSB Order = iota
// MSB means Most Significant Bits first, as used in the TIFF and PDF
// file formats.
MSB
)
const (
maxWidth = 12
decoderInvalidCode = 0xffff
flushBuffer = 1 << maxWidth
)
// Reader is an io.Reader which can be used to read compressed data in the
// LZW format.
type Reader struct {
r io.ByteReader
bits uint32
nBits uint
width uint
read func(*Reader) (uint16, error) // readLSB or readMSB
litWidth int // width in bits of literal codes
err error
// The first 1<<litWidth codes are literal codes.
// The next two codes mean clear and EOF.
// Other valid codes are in the range [lo, hi] where lo := clear + 2,
// with the upper bound incrementing on each code seen.
//
// overflow is the code at which hi overflows the code width. It always
// equals 1 << width.
//
// last is the most recently seen code, or decoderInvalidCode.
//
// An invariant is that hi < overflow.
clear, eof, hi, overflow, last uint16
// Each code c in [lo, hi] expands to two or more bytes. For c != hi:
// suffix[c] is the last of these bytes.
// prefix[c] is the code for all but the last byte.
// This code can either be a literal code or another code in [lo, c).
// The c == hi case is a special case.
suffix [1 << maxWidth]uint8
prefix [1 << maxWidth]uint16
// output is the temporary output buffer.
// Literal codes are accumulated from the start of the buffer.
// Non-literal codes decode to a sequence of suffixes that are first
// written right-to-left from the end of the buffer before being copied
// to the start of the buffer.
// It is flushed when it contains >= 1<<maxWidth bytes,
// so that there is always room to decode an entire code.
output [2 * 1 << maxWidth]byte
o int // write index into output
toRead []byte // bytes to return from Read
}
// readLSB returns the next code for "Least Significant Bits first" data.
func (r *Reader) readLSB() (uint16, error) {
for r.nBits < r.width {
x, err := r.r.ReadByte()
if err != nil {
return 0, err
}
r.bits |= uint32(x) << r.nBits
r.nBits += 8
}
code := uint16(r.bits & (1<<r.width - 1))
r.bits >>= r.width
r.nBits -= r.width
return code, nil
}
// readMSB returns the next code for "Most Significant Bits first" data.
func (r *Reader) readMSB() (uint16, error) {
for r.nBits < r.width {
x, err := r.r.ReadByte()
if err != nil {
return 0, err
}
r.bits |= uint32(x) << (24 - r.nBits)
r.nBits += 8
}
code := uint16(r.bits >> (32 - r.width))
r.bits <<= r.width
r.nBits -= r.width
return code, nil
}
// Read implements io.Reader, reading uncompressed bytes from its underlying [Reader].
func (r *Reader) Read(b []byte) (int, error) {
for {
if len(r.toRead) > 0 {
n := copy(b, r.toRead)
r.toRead = r.toRead[n:]
return n, nil
}
if r.err != nil {
return 0, r.err
}
r.decode()
}
}
// decode decompresses bytes from r and leaves them in d.toRead.
// read specifies how to decode bytes into codes.
// litWidth is the width in bits of literal codes.
func (r *Reader) decode() {
// Loop over the code stream, converting codes into decompressed bytes.
loop:
for {
code, err := r.read(r)
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
break
}
switch {
case code < r.clear:
// We have a literal code.
r.output[r.o] = uint8(code)
r.o++
if r.last != decoderInvalidCode {
// Save what the hi code expands to.
r.suffix[r.hi] = uint8(code)
r.prefix[r.hi] = r.last
}
case code == r.clear:
r.width = 1 + uint(r.litWidth)
r.hi = r.eof
r.overflow = 1 << r.width
r.last = decoderInvalidCode
continue
case code == r.eof:
r.err = io.EOF
break loop
case code <= r.hi:
c, i := code, len(r.output)-1
if code == r.hi && r.last != decoderInvalidCode {
// code == hi is a special case which expands to the last expansion
// followed by the head of the last expansion. To find the head, we walk
// the prefix chain until we find a literal code.
c = r.last
for c >= r.clear {
c = r.prefix[c]
}
r.output[i] = uint8(c)
i--
c = r.last
}
// Copy the suffix chain into output and then write that to w.
for c >= r.clear {
r.output[i] = r.suffix[c]
i--
c = r.prefix[c]
}
r.output[i] = uint8(c)
r.o += copy(r.output[r.o:], r.output[i:])
if r.last != decoderInvalidCode {
// Save what the hi code expands to.
r.suffix[r.hi] = uint8(c)
r.prefix[r.hi] = r.last
}
default:
r.err = errors.New("lzw: invalid code")
break loop
}
r.last, r.hi = code, r.hi+1
if r.hi >= r.overflow {
if r.hi > r.overflow {
panic("unreachable")
}
if r.width == maxWidth {
r.last = decoderInvalidCode
// Undo the d.hi++ a few lines above, so that (1) we maintain
// the invariant that d.hi < d.overflow, and (2) d.hi does not
// eventually overflow a uint16.
r.hi--
} else {
r.width++
r.overflow = 1 << r.width
}
}
if r.o >= flushBuffer {
break
}
}
// Flush pending output.
r.toRead = r.output[:r.o]
r.o = 0
}
var errClosed = errors.New("lzw: reader/writer is closed")
// Close closes the [Reader] and returns an error for any future read operation.
// It does not close the underlying [io.Reader].
func (r *Reader) Close() error {
r.err = errClosed // in case any Reads come along
return nil
}
// Reset clears the [Reader]'s state and allows it to be reused again
// as a new [Reader].
func (r *Reader) Reset(src io.Reader, order Order, litWidth int) {
*r = Reader{}
r.init(src, order, litWidth)
}
// NewReader creates a new [io.ReadCloser].
// Reads from the returned [io.ReadCloser] read and decompress data from r.
// If r does not also implement [io.ByteReader],
// the decompressor may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser when
// finished reading.
// The number of bits to use for literal codes, litWidth, must be in the
// range [2,8] and is typically 8. It must equal the litWidth
// used during compression.
//
// It is guaranteed that the underlying type of the returned [io.ReadCloser]
// is a *[Reader].
func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser {
return newReader(r, order, litWidth)
}
func newReader(src io.Reader, order Order, litWidth int) *Reader {
r := new(Reader)
r.init(src, order, litWidth)
return r
}
func (r *Reader) init(src io.Reader, order Order, litWidth int) {
switch order {
case LSB:
r.read = (*Reader).readLSB
case MSB:
r.read = (*Reader).readMSB
default:
r.err = errors.New("lzw: unknown order")
return
}
if litWidth < 2 || 8 < litWidth {
r.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
return
}
br, ok := src.(io.ByteReader)
if !ok && src != nil {
br = bufio.NewReader(src)
}
r.r = br
r.litWidth = litWidth
r.width = 1 + uint(litWidth)
r.clear = uint16(1) << uint(litWidth)
r.eof, r.hi = r.clear+1, r.clear+1
r.overflow = uint16(1) << r.width
r.last = decoderInvalidCode
}

View File

@@ -0,0 +1,313 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzw
import (
"bytes"
"fmt"
"io"
"math"
"os"
"runtime"
"strconv"
"strings"
"testing"
)
type lzwTest struct {
desc string
raw string
compressed string
err error
}
var lzwTests = []lzwTest{
{
"empty;LSB;8",
"",
"\x01\x01",
nil,
},
{
"empty;MSB;8",
"",
"\x80\x80",
nil,
},
{
"tobe;LSB;7",
"TOBEORNOTTOBEORTOBEORNOT",
"\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
nil,
},
{
"tobe;LSB;8",
"TOBEORNOTTOBEORTOBEORNOT",
"\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04\x12\x34\xb8\xb0\xe0\xc1\x84\x01\x01",
nil,
},
{
"tobe;MSB;7",
"TOBEORNOTTOBEORTOBEORNOT",
"\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
nil,
},
{
"tobe;MSB;8",
"TOBEORNOTTOBEORTOBEORNOT",
"\x2a\x13\xc8\x44\x52\x79\x48\x9c\x4f\x2a\x40\xa0\x90\x68\x5c\x16\x0f\x09\x80\x80",
nil,
},
{
"tobe-truncated;LSB;8",
"TOBEORNOTTOBEORTOBEORNOT",
"\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04",
io.ErrUnexpectedEOF,
},
// This example comes from https://en.wikipedia.org/wiki/Graphics_Interchange_Format.
{
"gif;LSB;8",
"\x28\xff\xff\xff\x28\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
"\x00\x51\xfc\x1b\x28\x70\xa0\xc1\x83\x01\x01",
nil,
},
// This example comes from http://compgroups.net/comp.lang.ruby/Decompressing-LZW-compression-from-PDF-file
{
"pdf;MSB;8",
"-----A---B",
"\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01",
nil,
},
}
func TestReader(t *testing.T) {
var b bytes.Buffer
for _, tt := range lzwTests {
d := strings.Split(tt.desc, ";")
var order Order
switch d[1] {
case "LSB":
order = LSB
case "MSB":
order = MSB
default:
t.Errorf("%s: bad order %q", tt.desc, d[1])
}
litWidth, _ := strconv.Atoi(d[2])
rc := NewReader(strings.NewReader(tt.compressed), order, litWidth)
defer rc.Close()
b.Reset()
n, err := io.Copy(&b, rc)
s := b.String()
if err != nil {
if err != tt.err {
t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
}
if err == io.ErrUnexpectedEOF {
// Even if the input is truncated, we should still return the
// partial decoded result.
if n == 0 || !strings.HasPrefix(tt.raw, s) {
t.Errorf("got %d bytes (%q), want a non-empty prefix of %q", n, s, tt.raw)
}
}
continue
}
if s != tt.raw {
t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw)
}
}
}
func TestReaderReset(t *testing.T) {
var b bytes.Buffer
for _, tt := range lzwTests {
d := strings.Split(tt.desc, ";")
var order Order
switch d[1] {
case "LSB":
order = LSB
case "MSB":
order = MSB
default:
t.Errorf("%s: bad order %q", tt.desc, d[1])
}
litWidth, _ := strconv.Atoi(d[2])
rc := NewReader(strings.NewReader(tt.compressed), order, litWidth)
defer rc.Close()
b.Reset()
n, err := io.Copy(&b, rc)
b1 := b.Bytes()
if err != nil {
if err != tt.err {
t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
}
if err == io.ErrUnexpectedEOF {
// Even if the input is truncated, we should still return the
// partial decoded result.
if n == 0 || !strings.HasPrefix(tt.raw, b.String()) {
t.Errorf("got %d bytes (%q), want a non-empty prefix of %q", n, b.String(), tt.raw)
}
}
continue
}
b.Reset()
rc.(*Reader).Reset(strings.NewReader(tt.compressed), order, litWidth)
n, err = io.Copy(&b, rc)
b2 := b.Bytes()
if err != nil {
t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, nil)
continue
}
if !bytes.Equal(b1, b2) {
t.Errorf("bytes read were not the same")
}
}
}
type devZero struct{}
func (devZero) Read(p []byte) (int, error) {
clear(p)
return len(p), nil
}
func TestHiCodeDoesNotOverflow(t *testing.T) {
r := NewReader(devZero{}, LSB, 8)
d := r.(*Reader)
buf := make([]byte, 1024)
oldHi := uint16(0)
for i := 0; i < 100; i++ {
if _, err := io.ReadFull(r, buf); err != nil {
t.Fatalf("i=%d: %v", i, err)
}
// The hi code should never decrease.
if d.hi < oldHi {
t.Fatalf("i=%d: hi=%d decreased from previous value %d", i, d.hi, oldHi)
}
oldHi = d.hi
}
}
// TestNoLongerSavingPriorExpansions tests the decoder state when codes other
// than clear codes continue to be seen after decoder.hi and decoder.width
// reach their maximum values (4095 and 12), i.e. after we no longer save prior
// expansions. In particular, it tests seeing the highest possible code, 4095.
func TestNoLongerSavingPriorExpansions(t *testing.T) {
// Iterations is used to calculate how many input bits are needed to get
// the decoder.hi and decoder.width values up to their maximum.
iterations := []struct {
width, n int
}{
// The final term is 257, not 256, as NewReader initializes d.hi to
// d.clear+1 and the clear code is 256.
{9, 512 - 257},
{10, 1024 - 512},
{11, 2048 - 1024},
{12, 4096 - 2048},
}
nCodes, nBits := 0, 0
for _, e := range iterations {
nCodes += e.n
nBits += e.n * e.width
}
if nCodes != 3839 {
t.Fatalf("nCodes: got %v, want %v", nCodes, 3839)
}
if nBits != 43255 {
t.Fatalf("nBits: got %v, want %v", nBits, 43255)
}
// Construct our input of 43255 zero bits (which gets d.hi and d.width up
// to 4095 and 12), followed by 0xfff (4095) as 12 bits, followed by 0x101
// (EOF) as 12 bits.
//
// 43255 = 5406*8 + 7, and codes are read in LSB order. The final bytes are
// therefore:
//
// xwwwwwww xxxxxxxx yyyyyxxx zyyyyyyy
// 10000000 11111111 00001111 00001000
//
// or split out:
//
// .0000000 ........ ........ ........ w = 0x000
// 1....... 11111111 .....111 ........ x = 0xfff
// ........ ........ 00001... .0001000 y = 0x101
//
// The 12 'w' bits (not all are shown) form the 3839'th code, with value
// 0x000. Just after decoder.read returns that code, d.hi == 4095 and
// d.last == 0.
//
// The 12 'x' bits form the 3840'th code, with value 0xfff or 4095. Just
// after decoder.read returns that code, d.hi == 4095 and d.last ==
// decoderInvalidCode.
//
// The 12 'y' bits form the 3841'st code, with value 0x101, the EOF code.
//
// The 'z' bit is unused.
in := make([]byte, 5406)
in = append(in, 0x80, 0xff, 0x0f, 0x08)
r := NewReader(bytes.NewReader(in), LSB, 8)
nDecoded, err := io.Copy(io.Discard, r)
if err != nil {
t.Fatalf("Copy: %v", err)
}
// nDecoded should be 3841: 3839 literal codes and then 2 decoded bytes
// from 1 non-literal code. The EOF code contributes 0 decoded bytes.
if nDecoded != int64(nCodes+2) {
t.Fatalf("nDecoded: got %v, want %v", nDecoded, nCodes+2)
}
}
func BenchmarkDecoder(b *testing.B) {
buf, err := os.ReadFile("../testdata/e.txt")
if err != nil {
b.Fatal(err)
}
if len(buf) == 0 {
b.Fatalf("test file has no data")
}
getInputBuf := func(buf []byte, n int) []byte {
compressed := new(bytes.Buffer)
w := NewWriter(compressed, LSB, 8)
for i := 0; i < n; i += len(buf) {
if len(buf) > n-i {
buf = buf[:n-i]
}
w.Write(buf)
}
w.Close()
return compressed.Bytes()
}
for e := 4; e <= 6; e++ {
n := int(math.Pow10(e))
b.Run(fmt.Sprint("1e", e), func(b *testing.B) {
b.StopTimer()
b.SetBytes(int64(n))
buf1 := getInputBuf(buf, n)
runtime.GC()
b.StartTimer()
for i := 0; i < b.N; i++ {
io.Copy(io.Discard, NewReader(bytes.NewReader(buf1), LSB, 8))
}
})
b.Run(fmt.Sprint("1e-Reuse", e), func(b *testing.B) {
b.StopTimer()
b.SetBytes(int64(n))
buf1 := getInputBuf(buf, n)
runtime.GC()
b.StartTimer()
r := NewReader(bytes.NewReader(buf1), LSB, 8)
for i := 0; i < b.N; i++ {
io.Copy(io.Discard, r)
r.Close()
r.(*Reader).Reset(bytes.NewReader(buf1), LSB, 8)
}
})
}
}

293
src/compress/lzw/writer.go Normal file
View File

@@ -0,0 +1,293 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzw
import (
"bufio"
"errors"
"fmt"
"io"
)
// A writer is a buffered, flushable writer.
type writer interface {
io.ByteWriter
Flush() error
}
const (
// A code is a 12 bit value, stored as a uint32 when encoding to avoid
// type conversions when shifting bits.
maxCode = 1<<12 - 1
invalidCode = 1<<32 - 1
// There are 1<<12 possible codes, which is an upper bound on the number of
// valid hash table entries at any given point in time. tableSize is 4x that.
tableSize = 4 * 1 << 12
tableMask = tableSize - 1
// A hash table entry is a uint32. Zero is an invalid entry since the
// lower 12 bits of a valid entry must be a non-literal code.
invalidEntry = 0
)
// Writer is an LZW compressor. It writes the compressed form of the data
// to an underlying writer (see [NewWriter]).
type Writer struct {
// w is the writer that compressed bytes are written to.
w writer
// litWidth is the width in bits of literal codes.
litWidth uint
// order, write, bits, nBits and width are the state for
// converting a code stream into a byte stream.
order Order
write func(*Writer, uint32) error
nBits uint
width uint
bits uint32
// hi is the code implied by the next code emission.
// overflow is the code at which hi overflows the code width.
hi, overflow uint32
// savedCode is the accumulated code at the end of the most recent Write
// call. It is equal to invalidCode if there was no such call.
savedCode uint32
// err is the first error encountered during writing. Closing the writer
// will make any future Write calls return errClosed
err error
// table is the hash table from 20-bit keys to 12-bit values. Each table
// entry contains key<<12|val and collisions resolve by linear probing.
// The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
// The values are a 12-bit code.
table [tableSize]uint32
}
// writeLSB writes the code c for "Least Significant Bits first" data.
func (w *Writer) writeLSB(c uint32) error {
w.bits |= c << w.nBits
w.nBits += w.width
for w.nBits >= 8 {
if err := w.w.WriteByte(uint8(w.bits)); err != nil {
return err
}
w.bits >>= 8
w.nBits -= 8
}
return nil
}
// writeMSB writes the code c for "Most Significant Bits first" data.
func (w *Writer) writeMSB(c uint32) error {
w.bits |= c << (32 - w.width - w.nBits)
w.nBits += w.width
for w.nBits >= 8 {
if err := w.w.WriteByte(uint8(w.bits >> 24)); err != nil {
return err
}
w.bits <<= 8
w.nBits -= 8
}
return nil
}
// errOutOfCodes is an internal error that means that the writer has run out
// of unused codes and a clear code needs to be sent next.
var errOutOfCodes = errors.New("lzw: out of codes")
// incHi increments e.hi and checks for both overflow and running out of
// unused codes. In the latter case, incHi sends a clear code, resets the
// writer state and returns errOutOfCodes.
func (w *Writer) incHi() error {
w.hi++
if w.hi == w.overflow {
w.width++
w.overflow <<= 1
}
if w.hi == maxCode {
clear := uint32(1) << w.litWidth
if err := w.write(w, clear); err != nil {
return err
}
w.width = w.litWidth + 1
w.hi = clear + 1
w.overflow = clear << 1
for i := range w.table {
w.table[i] = invalidEntry
}
return errOutOfCodes
}
return nil
}
// Write writes a compressed representation of p to w's underlying writer.
func (w *Writer) Write(p []byte) (n int, err error) {
if w.err != nil {
return 0, w.err
}
if len(p) == 0 {
return 0, nil
}
if maxLit := uint8(1<<w.litWidth - 1); maxLit != 0xff {
for _, x := range p {
if x > maxLit {
w.err = errors.New("lzw: input byte too large for the litWidth")
return 0, w.err
}
}
}
n = len(p)
code := w.savedCode
if code == invalidCode {
// This is the first write; send a clear code.
// https://www.w3.org/Graphics/GIF/spec-gif89a.txt Appendix F
// "Variable-Length-Code LZW Compression" says that "Encoders should
// output a Clear code as the first code of each image data stream".
//
// LZW compression isn't only used by GIF, but it's cheap to follow
// that directive unconditionally.
clear := uint32(1) << w.litWidth
if err := w.write(w, clear); err != nil {
return 0, err
}
// After the starting clear code, the next code sent (for non-empty
// input) is always a literal code.
code, p = uint32(p[0]), p[1:]
}
loop:
for _, x := range p {
literal := uint32(x)
key := code<<8 | literal
// If there is a hash table hit for this key then we continue the loop
// and do not emit a code yet.
hash := (key>>12 ^ key) & tableMask
for h, t := hash, w.table[hash]; t != invalidEntry; {
if key == t>>12 {
code = t & maxCode
continue loop
}
h = (h + 1) & tableMask
t = w.table[h]
}
// Otherwise, write the current code, and literal becomes the start of
// the next emitted code.
if w.err = w.write(w, code); w.err != nil {
return 0, w.err
}
code = literal
// Increment e.hi, the next implied code. If we run out of codes, reset
// the writer state (including clearing the hash table) and continue.
if err1 := w.incHi(); err1 != nil {
if err1 == errOutOfCodes {
continue
}
w.err = err1
return 0, w.err
}
// Otherwise, insert key -> e.hi into the map that e.table represents.
for {
if w.table[hash] == invalidEntry {
w.table[hash] = (key << 12) | w.hi
break
}
hash = (hash + 1) & tableMask
}
}
w.savedCode = code
return n, nil
}
// Close closes the [Writer], flushing any pending output. It does not close
// w's underlying writer.
func (w *Writer) Close() error {
if w.err != nil {
if w.err == errClosed {
return nil
}
return w.err
}
// Make any future calls to Write return errClosed.
w.err = errClosed
// Write the savedCode if valid.
if w.savedCode != invalidCode {
if err := w.write(w, w.savedCode); err != nil {
return err
}
if err := w.incHi(); err != nil && err != errOutOfCodes {
return err
}
} else {
// Write the starting clear code, as w.Write did not.
clear := uint32(1) << w.litWidth
if err := w.write(w, clear); err != nil {
return err
}
}
// Write the eof code.
eof := uint32(1)<<w.litWidth + 1
if err := w.write(w, eof); err != nil {
return err
}
// Write the final bits.
if w.nBits > 0 {
if w.order == MSB {
w.bits >>= 24
}
if err := w.w.WriteByte(uint8(w.bits)); err != nil {
return err
}
}
return w.w.Flush()
}
// Reset clears the [Writer]'s state and allows it to be reused again
// as a new [Writer].
func (w *Writer) Reset(dst io.Writer, order Order, litWidth int) {
*w = Writer{}
w.init(dst, order, litWidth)
}
// NewWriter creates a new [io.WriteCloser].
// Writes to the returned [io.WriteCloser] are compressed and written to w.
// It is the caller's responsibility to call Close on the WriteCloser when
// finished writing.
// The number of bits to use for literal codes, litWidth, must be in the
// range [2,8] and is typically 8. Input bytes must be less than 1<<litWidth.
//
// It is guaranteed that the underlying type of the returned [io.WriteCloser]
// is a *[Writer].
func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser {
return newWriter(w, order, litWidth)
}
func newWriter(dst io.Writer, order Order, litWidth int) *Writer {
w := new(Writer)
w.init(dst, order, litWidth)
return w
}
func (w *Writer) init(dst io.Writer, order Order, litWidth int) {
switch order {
case LSB:
w.write = (*Writer).writeLSB
case MSB:
w.write = (*Writer).writeMSB
default:
w.err = errors.New("lzw: unknown order")
return
}
if litWidth < 2 || 8 < litWidth {
w.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
return
}
bw, ok := dst.(writer)
if !ok && dst != nil {
bw = bufio.NewWriter(dst)
}
w.w = bw
lw := uint(litWidth)
w.order = order
w.width = 1 + lw
w.litWidth = lw
w.hi = 1<<lw + 1
w.overflow = 1 << (lw + 1)
w.savedCode = invalidCode
}

View File

@@ -0,0 +1,238 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzw
import (
"bytes"
"fmt"
"internal/testenv"
"io"
"math"
"os"
"runtime"
"testing"
)
var filenames = []string{
"../testdata/gettysburg.txt",
"../testdata/e.txt",
"../testdata/pi.txt",
}
// testFile tests that compressing and then decompressing the given file with
// the given options yields equivalent bytes to the original file.
func testFile(t *testing.T, fn string, order Order, litWidth int) {
// Read the file, as golden output.
golden, err := os.Open(fn)
if err != nil {
t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err)
return
}
defer golden.Close()
// Read the file again, and push it through a pipe that compresses at the write end, and decompresses at the read end.
raw, err := os.Open(fn)
if err != nil {
t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err)
return
}
piper, pipew := io.Pipe()
defer piper.Close()
go func() {
defer raw.Close()
defer pipew.Close()
lzww := NewWriter(pipew, order, litWidth)
defer lzww.Close()
var b [4096]byte
for {
n, err0 := raw.Read(b[:])
if err0 != nil && err0 != io.EOF {
t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err0)
return
}
_, err1 := lzww.Write(b[:n])
if err1 != nil {
t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err1)
return
}
if err0 == io.EOF {
break
}
}
}()
lzwr := NewReader(piper, order, litWidth)
defer lzwr.Close()
// Compare the two.
b0, err0 := io.ReadAll(golden)
b1, err1 := io.ReadAll(lzwr)
if err0 != nil {
t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err0)
return
}
if err1 != nil {
t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err1)
return
}
if len(b1) != len(b0) {
t.Errorf("%s (order=%d litWidth=%d): length mismatch %d != %d", fn, order, litWidth, len(b1), len(b0))
return
}
for i := 0; i < len(b0); i++ {
if b1[i] != b0[i] {
t.Errorf("%s (order=%d litWidth=%d): mismatch at %d, 0x%02x != 0x%02x\n", fn, order, litWidth, i, b1[i], b0[i])
return
}
}
}
func TestWriter(t *testing.T) {
for _, filename := range filenames {
for _, order := range [...]Order{LSB, MSB} {
// The test data "2.71828 etcetera" is ASCII text requiring at least 6 bits.
for litWidth := 6; litWidth <= 8; litWidth++ {
if filename == "../testdata/gettysburg.txt" && litWidth == 6 {
continue
}
testFile(t, filename, order, litWidth)
}
}
if testing.Short() && testenv.Builder() == "" {
break
}
}
}
func TestWriterReset(t *testing.T) {
for _, order := range [...]Order{LSB, MSB} {
t.Run(fmt.Sprintf("Order %d", order), func(t *testing.T) {
for litWidth := 6; litWidth <= 8; litWidth++ {
t.Run(fmt.Sprintf("LitWidth %d", litWidth), func(t *testing.T) {
var data []byte
if litWidth == 6 {
data = []byte{1, 2, 3}
} else {
data = []byte(`lorem ipsum dolor sit amet`)
}
var buf bytes.Buffer
w := NewWriter(&buf, order, litWidth)
if _, err := w.Write(data); err != nil {
t.Errorf("write: %v: %v", string(data), err)
}
if err := w.Close(); err != nil {
t.Errorf("close: %v", err)
}
b1 := buf.Bytes()
buf.Reset()
w.(*Writer).Reset(&buf, order, litWidth)
if _, err := w.Write(data); err != nil {
t.Errorf("write: %v: %v", string(data), err)
}
if err := w.Close(); err != nil {
t.Errorf("close: %v", err)
}
b2 := buf.Bytes()
if !bytes.Equal(b1, b2) {
t.Errorf("bytes written were not same")
}
})
}
})
}
}
func TestWriterReturnValues(t *testing.T) {
w := NewWriter(io.Discard, LSB, 8)
n, err := w.Write([]byte("asdf"))
if n != 4 || err != nil {
t.Errorf("got %d, %v, want 4, nil", n, err)
}
}
func TestSmallLitWidth(t *testing.T) {
w := NewWriter(io.Discard, LSB, 2)
if _, err := w.Write([]byte{0x03}); err != nil {
t.Fatalf("write a byte < 1<<2: %v", err)
}
if _, err := w.Write([]byte{0x04}); err == nil {
t.Fatal("write a byte >= 1<<2: got nil error, want non-nil")
}
}
func TestStartsWithClearCode(t *testing.T) {
// A literal width of 7 bits means that the code width starts at 8 bits,
// which makes it easier to visually inspect the output (provided that the
// output is short so codes don't get longer). Each byte is a code:
// - ASCII bytes are literal codes,
// - 0x80 is the clear code,
// - 0x81 is the end code.
// - 0x82 and above are copy codes (unused in this test case).
for _, empty := range []bool{false, true} {
var buf bytes.Buffer
w := NewWriter(&buf, LSB, 7)
if !empty {
w.Write([]byte("Hi"))
}
w.Close()
got := buf.String()
want := "\x80\x81"
if !empty {
want = "\x80Hi\x81"
}
if got != want {
t.Errorf("empty=%t: got %q, want %q", empty, got, want)
}
}
}
func BenchmarkEncoder(b *testing.B) {
buf, err := os.ReadFile("../testdata/e.txt")
if err != nil {
b.Fatal(err)
}
if len(buf) == 0 {
b.Fatalf("test file has no data")
}
for e := 4; e <= 6; e++ {
n := int(math.Pow10(e))
buf0 := buf
buf1 := make([]byte, n)
for i := 0; i < n; i += len(buf0) {
if len(buf0) > n-i {
buf0 = buf0[:n-i]
}
copy(buf1[i:], buf0)
}
buf0 = nil
runtime.GC()
b.Run(fmt.Sprint("1e", e), func(b *testing.B) {
b.SetBytes(int64(n))
for i := 0; i < b.N; i++ {
w := NewWriter(io.Discard, LSB, 8)
w.Write(buf1)
w.Close()
}
})
b.Run(fmt.Sprint("1e-Reuse", e), func(b *testing.B) {
b.SetBytes(int64(n))
w := NewWriter(io.Discard, LSB, 8)
for i := 0; i < b.N; i++ {
w.Write(buf1)
w.Close()
w.(*Writer).Reset(io.Discard, LSB, 8)
}
})
}
}

1
src/compress/testdata/e.txt vendored Normal file

File diff suppressed because one or more lines are too long

29
src/compress/testdata/gettysburg.txt vendored Normal file
View File

@@ -0,0 +1,29 @@
Four score and seven years ago our fathers brought forth on
this continent, a new nation, conceived in Liberty, and dedicated
to the proposition that all men are created equal.
Now we are engaged in a great Civil War, testing whether that
nation, or any nation so conceived and so dedicated, can long
endure.
We are met on a great battle-field of that war.
We have come to dedicate a portion of that field, as a final
resting place for those who here gave their lives that that
nation might live. It is altogether fitting and proper that
we should do this.
But, in a larger sense, we can not dedicate - we can not
consecrate - we can not hallow - this ground.
The brave men, living and dead, who struggled here, have
consecrated it, far above our poor power to add or detract.
The world will little note, nor long remember what we say here,
but it can never forget what they did here.
It is for us the living, rather, to be dedicated here to the
unfinished work which they who fought here have thus far so
nobly advanced. It is rather for us to be here dedicated to
the great task remaining before us - that from these honored
dead we take increased devotion to that cause for which they
gave the last full measure of devotion -
that we here highly resolve that these dead shall not have
died in vain - that this nation, under God, shall have a new
birth of freedom - and that government of the people, by the
people, for the people, shall not perish from this earth.
Abraham Lincoln, November 19, 1863, Gettysburg, Pennsylvania

1
src/compress/testdata/pi.txt vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,37 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package zlib_test
import (
"bytes"
"compress/zlib"
"fmt"
"io"
"os"
)
func ExampleNewWriter() {
var b bytes.Buffer
w := zlib.NewWriter(&b)
w.Write([]byte("hello, world\n"))
w.Close()
fmt.Println(b.Bytes())
// Output: [120 156 202 72 205 201 201 215 81 40 207 47 202 73 225 2 4 0 0 255 255 33 231 4 147]
}
func ExampleNewReader() {
buff := []byte{120, 156, 202, 72, 205, 201, 201, 215, 81, 40, 207,
47, 202, 73, 225, 2, 4, 0, 0, 255, 255, 33, 231, 4, 147}
b := bytes.NewReader(buff)
r, err := zlib.NewReader(b)
if err != nil {
panic(err)
}
io.Copy(os.Stdout, r)
// Output: hello, world
r.Close()
}

181
src/compress/zlib/reader.go Normal file
View File

@@ -0,0 +1,181 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package zlib implements reading and writing of zlib format compressed data,
as specified in RFC 1950.
The implementation provides filters that uncompress during reading
and compress during writing. For example, to write compressed data
to a buffer:
var b bytes.Buffer
w := zlib.NewWriter(&b)
w.Write([]byte("hello, world\n"))
w.Close()
and to read that data back:
r, err := zlib.NewReader(&b)
io.Copy(os.Stdout, r)
r.Close()
*/
package zlib
import (
"bufio"
"compress/flate"
"encoding/binary"
"errors"
"hash"
"hash/adler32"
"io"
)
const (
zlibDeflate = 8
zlibMaxWindow = 7
)
var (
// ErrChecksum is returned when reading ZLIB data that has an invalid checksum.
ErrChecksum = errors.New("zlib: invalid checksum")
// ErrDictionary is returned when reading ZLIB data that has an invalid dictionary.
ErrDictionary = errors.New("zlib: invalid dictionary")
// ErrHeader is returned when reading ZLIB data that has an invalid header.
ErrHeader = errors.New("zlib: invalid header")
)
type reader struct {
r flate.Reader
decompressor io.ReadCloser
digest hash.Hash32
err error
scratch [4]byte
}
// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict]
// to switch to a new underlying Reader. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
Reset(r io.Reader, dict []byte) error
}
// NewReader creates a new ReadCloser.
// Reads from the returned ReadCloser read and decompress data from r.
// If r does not implement [io.ByteReader], the decompressor may read more
// data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser when done.
//
// The [io.ReadCloser] returned by NewReader also implements [Resetter].
func NewReader(r io.Reader) (io.ReadCloser, error) {
return NewReaderDict(r, nil)
}
// NewReaderDict is like [NewReader] but uses a preset dictionary.
// NewReaderDict ignores the dictionary if the compressed data does not refer to it.
// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary].
//
// The ReadCloser returned by NewReaderDict also implements [Resetter].
func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) {
z := new(reader)
err := z.Reset(r, dict)
if err != nil {
return nil, err
}
return z, nil
}
func (z *reader) Read(p []byte) (int, error) {
if z.err != nil {
return 0, z.err
}
var n int
n, z.err = z.decompressor.Read(p)
z.digest.Write(p[0:n])
if z.err != io.EOF {
// In the normal case we return here.
return n, z.err
}
// Finished file; check checksum.
if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
z.err = err
return n, z.err
}
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
checksum := binary.BigEndian.Uint32(z.scratch[:4])
if checksum != z.digest.Sum32() {
z.err = ErrChecksum
return n, z.err
}
return n, io.EOF
}
// Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader].
// In order for the ZLIB checksum to be verified, the reader must be
// fully consumed until the [io.EOF].
func (z *reader) Close() error {
if z.err != nil && z.err != io.EOF {
return z.err
}
z.err = z.decompressor.Close()
return z.err
}
func (z *reader) Reset(r io.Reader, dict []byte) error {
*z = reader{decompressor: z.decompressor}
if fr, ok := r.(flate.Reader); ok {
z.r = fr
} else {
z.r = bufio.NewReader(r)
}
// Read the header (RFC 1950 section 2.2.).
_, z.err = io.ReadFull(z.r, z.scratch[0:2])
if z.err != nil {
if z.err == io.EOF {
z.err = io.ErrUnexpectedEOF
}
return z.err
}
h := binary.BigEndian.Uint16(z.scratch[:2])
if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) {
z.err = ErrHeader
return z.err
}
haveDict := z.scratch[1]&0x20 != 0
if haveDict {
_, z.err = io.ReadFull(z.r, z.scratch[0:4])
if z.err != nil {
if z.err == io.EOF {
z.err = io.ErrUnexpectedEOF
}
return z.err
}
checksum := binary.BigEndian.Uint32(z.scratch[:4])
if checksum != adler32.Checksum(dict) {
z.err = ErrDictionary
return z.err
}
}
if z.decompressor == nil {
if haveDict {
z.decompressor = flate.NewReaderDict(z.r, dict)
} else {
z.decompressor = flate.NewReader(z.r)
}
} else {
z.decompressor.(flate.Resetter).Reset(z.r, dict)
}
z.digest = adler32.New()
return nil
}

View File

@@ -0,0 +1,186 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package zlib
import (
"bytes"
"io"
"testing"
)
type zlibTest struct {
desc string
raw string
compressed []byte
dict []byte
err error
}
// Compare-to-golden test data was generated by the ZLIB example program at
// https://www.zlib.net/zpipe.c
var zlibTests = []zlibTest{
{
"truncated empty",
"",
[]byte{},
nil,
io.ErrUnexpectedEOF,
},
{
"truncated dict",
"",
[]byte{0x78, 0xbb},
[]byte{0x00},
io.ErrUnexpectedEOF,
},
{
"truncated checksum",
"",
[]byte{0x78, 0xbb, 0x00, 0x01, 0x00, 0x01, 0xca, 0x48,
0xcd, 0xc9, 0xc9, 0xd7, 0x51, 0x28, 0xcf, 0x2f,
0xca, 0x49, 0x01, 0x04, 0x00, 0x00, 0xff, 0xff,
},
[]byte{0x00},
io.ErrUnexpectedEOF,
},
{
"empty",
"",
[]byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01},
nil,
nil,
},
{
"goodbye",
"goodbye, world",
[]byte{
0x78, 0x9c, 0x4b, 0xcf, 0xcf, 0x4f, 0x49, 0xaa,
0x4c, 0xd5, 0x51, 0x28, 0xcf, 0x2f, 0xca, 0x49,
0x01, 0x00, 0x28, 0xa5, 0x05, 0x5e,
},
nil,
nil,
},
{
"bad header (CINFO)",
"",
[]byte{0x88, 0x98, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01},
nil,
ErrHeader,
},
{
"bad header (FCHECK)",
"",
[]byte{0x78, 0x9f, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01},
nil,
ErrHeader,
},
{
"bad checksum",
"",
[]byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0xff},
nil,
ErrChecksum,
},
{
"not enough data",
"",
[]byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00},
nil,
io.ErrUnexpectedEOF,
},
{
"excess data is silently ignored",
"",
[]byte{
0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
0x78, 0x9c, 0xff,
},
nil,
nil,
},
{
"dictionary",
"Hello, World!\n",
[]byte{
0x78, 0xbb, 0x1c, 0x32, 0x04, 0x27, 0xf3, 0x00,
0xb1, 0x75, 0x20, 0x1c, 0x45, 0x2e, 0x00, 0x24,
0x12, 0x04, 0x74,
},
[]byte{
0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x0a,
},
nil,
},
{
"wrong dictionary",
"",
[]byte{
0x78, 0xbb, 0x1c, 0x32, 0x04, 0x27, 0xf3, 0x00,
0xb1, 0x75, 0x20, 0x1c, 0x45, 0x2e, 0x00, 0x24,
0x12, 0x04, 0x74,
},
[]byte{
0x48, 0x65, 0x6c, 0x6c,
},
ErrDictionary,
},
{
"truncated zlib stream amid raw-block",
"hello",
[]byte{
0x78, 0x9c, 0x00, 0x0c, 0x00, 0xf3, 0xff, 0x68, 0x65, 0x6c, 0x6c, 0x6f,
},
nil,
io.ErrUnexpectedEOF,
},
{
"truncated zlib stream amid fixed-block",
"He",
[]byte{
0x78, 0x9c, 0xf2, 0x48, 0xcd,
},
nil,
io.ErrUnexpectedEOF,
},
}
func TestDecompressor(t *testing.T) {
b := new(bytes.Buffer)
for _, tt := range zlibTests {
in := bytes.NewReader(tt.compressed)
zr, err := NewReaderDict(in, tt.dict)
if err != nil {
if err != tt.err {
t.Errorf("%s: NewReader: %s", tt.desc, err)
}
continue
}
defer zr.Close()
// Read and verify correctness of data.
b.Reset()
n, err := io.Copy(b, zr)
if err != nil {
if err != tt.err {
t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
}
continue
}
s := b.String()
if s != tt.raw {
t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw)
}
// Check for sticky errors.
if n, err := zr.Read([]byte{0}); n != 0 || err != io.EOF {
t.Errorf("%s: Read() = (%d, %v), want (0, io.EOF)", tt.desc, n, err)
}
if err := zr.Close(); err != nil {
t.Errorf("%s: Close() = %v, want nil", tt.desc, err)
}
}
}

193
src/compress/zlib/writer.go Normal file
View File

@@ -0,0 +1,193 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package zlib
import (
"compress/flate"
"encoding/binary"
"fmt"
"hash"
"hash/adler32"
"io"
)
// These constants are copied from the flate package, so that code that imports
// "compress/zlib" does not also have to import "compress/flate".
const (
NoCompression = flate.NoCompression
BestSpeed = flate.BestSpeed
BestCompression = flate.BestCompression
DefaultCompression = flate.DefaultCompression
HuffmanOnly = flate.HuffmanOnly
)
// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see NewWriter).
type Writer struct {
w io.Writer
level int
dict []byte
compressor *flate.Writer
digest hash.Hash32
err error
scratch [4]byte
wroteHeader bool
}
// NewWriter creates a new Writer.
// Writes to the returned Writer are compressed and written to w.
//
// It is the caller's responsibility to call Close on the Writer when done.
// Writes may be buffered and not flushed until Close.
func NewWriter(w io.Writer) *Writer {
z, _ := NewWriterLevelDict(w, DefaultCompression, nil)
return z
}
// NewWriterLevel is like NewWriter but specifies the compression level instead
// of assuming DefaultCompression.
//
// The compression level can be DefaultCompression, NoCompression, HuffmanOnly
// or any integer value between BestSpeed and BestCompression inclusive.
// The error returned will be nil if the level is valid.
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
return NewWriterLevelDict(w, level, nil)
}
// NewWriterLevelDict is like NewWriterLevel but specifies a dictionary to
// compress with.
//
// The dictionary may be nil. If not, its contents should not be modified until
// the Writer is closed.
func NewWriterLevelDict(w io.Writer, level int, dict []byte) (*Writer, error) {
if level < HuffmanOnly || level > BestCompression {
return nil, fmt.Errorf("zlib: invalid compression level: %d", level)
}
return &Writer{
w: w,
level: level,
dict: dict,
}, nil
}
// Reset clears the state of the Writer z such that it is equivalent to its
// initial state from NewWriterLevel or NewWriterLevelDict, but instead writing
// to w.
func (z *Writer) Reset(w io.Writer) {
z.w = w
// z.level and z.dict left unchanged.
if z.compressor != nil {
z.compressor.Reset(w)
}
if z.digest != nil {
z.digest.Reset()
}
z.err = nil
z.scratch = [4]byte{}
z.wroteHeader = false
}
// writeHeader writes the ZLIB header.
func (z *Writer) writeHeader() (err error) {
z.wroteHeader = true
// ZLIB has a two-byte header (as documented in RFC 1950).
// The first four bits is the CINFO (compression info), which is 7 for the default deflate window size.
// The next four bits is the CM (compression method), which is 8 for deflate.
z.scratch[0] = 0x78
// The next two bits is the FLEVEL (compression level). The four values are:
// 0=fastest, 1=fast, 2=default, 3=best.
// The next bit, FDICT, is set if a dictionary is given.
// The final five FCHECK bits form a mod-31 checksum.
switch z.level {
case -2, 0, 1:
z.scratch[1] = 0 << 6
case 2, 3, 4, 5:
z.scratch[1] = 1 << 6
case 6, -1:
z.scratch[1] = 2 << 6
case 7, 8, 9:
z.scratch[1] = 3 << 6
default:
panic("unreachable")
}
if z.dict != nil {
z.scratch[1] |= 1 << 5
}
z.scratch[1] += uint8(31 - binary.BigEndian.Uint16(z.scratch[:2])%31)
if _, err = z.w.Write(z.scratch[0:2]); err != nil {
return err
}
if z.dict != nil {
// The next four bytes are the Adler-32 checksum of the dictionary.
binary.BigEndian.PutUint32(z.scratch[:], adler32.Checksum(z.dict))
if _, err = z.w.Write(z.scratch[0:4]); err != nil {
return err
}
}
if z.compressor == nil {
// Initialize deflater unless the Writer is being reused
// after a Reset call.
z.compressor, err = flate.NewWriterDict(z.w, z.level, z.dict)
if err != nil {
return err
}
z.digest = adler32.New()
}
return nil
}
// Write writes a compressed form of p to the underlying io.Writer. The
// compressed bytes are not necessarily flushed until the Writer is closed or
// explicitly flushed.
func (z *Writer) Write(p []byte) (n int, err error) {
if !z.wroteHeader {
z.err = z.writeHeader()
}
if z.err != nil {
return 0, z.err
}
if len(p) == 0 {
return 0, nil
}
n, err = z.compressor.Write(p)
if err != nil {
z.err = err
return
}
z.digest.Write(p)
return
}
// Flush flushes the Writer to its underlying io.Writer.
func (z *Writer) Flush() error {
if !z.wroteHeader {
z.err = z.writeHeader()
}
if z.err != nil {
return z.err
}
z.err = z.compressor.Flush()
return z.err
}
// Close closes the Writer, flushing any unwritten data to the underlying
// io.Writer, but does not close the underlying io.Writer.
func (z *Writer) Close() error {
if !z.wroteHeader {
z.err = z.writeHeader()
}
if z.err != nil {
return z.err
}
z.err = z.compressor.Close()
if z.err != nil {
return z.err
}
checksum := z.digest.Sum32()
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
binary.BigEndian.PutUint32(z.scratch[:], checksum)
_, z.err = z.w.Write(z.scratch[0:4])
return z.err
}

Some files were not shown because too many files have changed in this diff Show More