-
Notifications
You must be signed in to change notification settings - Fork 3
/
detecreaddatatype.go
97 lines (84 loc) · 2.24 KB
/
detecreaddatatype.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package genomisc
import (
"compress/bzip2"
"compress/gzip"
"compress/zlib"
"io"
"github.com/krolaw/zipstream"
"github.com/xi2/xz"
)
type DataType byte
const (
DataTypeInvalid DataType = iota
DataTypeNoCompression
DataTypeGzip
DataTypeZip
DataTypeXZ
DataTypeZ
DataTypeBZip2
)
var byteCodeSigs = map[DataType][]byte{
DataTypeGzip: {0x1f, 0x8b, 0x08},
DataTypeZip: {0x50, 0x4b, 0x03, 0x04},
DataTypeXZ: {0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00},
DataTypeZ: {0x1f, 0x9d},
DataTypeBZip2: {0x42, 0x5a, 0x68},
}
// DetectDataType attempts to detect the data type of a stream by checking
// against a set of known data types. Byte code signatures from
// https://stackoverflow.com/a/19127748/199475
func DetectDataType(r io.Reader) (DataType, error) {
buff := make([]byte, 6)
if _, err := r.Read(buff); err != nil {
return DataTypeInvalid, err
}
// Match known signatures
Outer:
for dt, sig := range byteCodeSigs {
for position := range sig {
if buff[position] != sig[position] {
continue Outer
}
}
return dt, nil
}
return DataTypeNoCompression, nil
}
// MaybeDecompressReadCloserFromFile detects whether a file-like object (must
// implement io.Reader, io.Seeker, and io.Closer) is compressed with GZip, Zip,
// BZip2, XZ, or Z and decompresses it. If not, it returns the file as-is. It
// uses the seek method to ensure that the reader is reset to the starting byte
// so that it does not discard bytes.
func MaybeDecompressReadCloserFromFile(f io.ReadSeekCloser) (io.ReadCloser, error) {
dt, err := DetectDataType(f)
if err != nil {
return nil, err
}
// Reset your original reader
f.Seek(0, 0)
switch dt {
case DataTypeGzip:
return gzip.NewReader(f)
case DataTypeZip:
return &readCloserFaker{zipstream.NewReader(f)}, nil
case DataTypeBZip2:
return &readCloserFaker{bzip2.NewReader(f)}, nil
case DataTypeXZ:
reader, err := xz.NewReader(f, 0)
if err != nil {
return nil, err
}
return &readCloserFaker{reader}, nil
case DataTypeZ:
return zlib.NewReader(f)
}
// No data type detected. For now, we assume this is uncompressed.
return f, nil
}
// readCloserFaker "upgrades" readers that don't need to be closed
type readCloserFaker struct {
io.Reader
}
func (c *readCloserFaker) Close() error {
return nil
}