diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 diff --git a/rs_amd64.go b/rs_amd64.go index b3a486f..75301d4 100644 --- a/rs_amd64.go +++ b/rs_amd64.go @@ -3,6 +3,8 @@ package reedsolomon import ( "errors" "sync" + + "github.com/templexxx/cpufeat" ) // SIMD Instruction Extensions @@ -12,22 +14,25 @@ const ( ssse3 ) -func getEXT() int { - if hasAVX2() { - return avx2 - } else if hasSSSE3() { - return ssse3 +var extension = none + +func init() { + getEXT() +} + +func getEXT() { + if cpufeat.X86.HasAVX2 { + extension = avx2 + return + } else if cpufeat.X86.HasSSSE3 { + extension = ssse3 + return } else { - return none + extension = none + return } } -//go:noescape -func hasAVX2() bool - -//go:noescape -func hasSSSE3() bool - //go:noescape func copy32B(dst, src []byte) // Need SSE2(introduced in 2001) @@ -75,14 +80,13 @@ type ( func newRS(d, p int, em matrix) (enc Encoder) { g := em[d*d:] - ext := getEXT() - if ext == none { + if extension == none { return &encBase{data: d, parity: p, encode: em, gen: g} } t := make([]byte, d*p*32) initTbl(g, p, d, t) ok := okCache(d, p) - if ext == avx2 { + if extension == avx2 { e := &encAVX2{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok, inverseCache: iCache{data: make(map[uint32][]byte)}} return e diff --git a/rs_amd64.s b/rs_amd64.s index 7d7e7a1..b8f4885 100644 --- a/rs_amd64.s +++ b/rs_amd64.s @@ -323,16 +323,6 @@ one16b: JNE ymm RET -TEXT ·hasAVX2(SB), NOSPLIT, $0 - XORQ AX, AX - XORQ CX, CX - ADDL $7, AX - CPUID - SHRQ $5, BX - ANDQ $1, BX - MOVB BX, ret+0(FP) - RET - // func mulVectSSSE3(tbl, d, p []byte) TEXT ·mulVectSSSE3(SB), NOSPLIT, $0 MOVQ i+24(FP), in @@ -399,15 +389,6 @@ loop: JNZ loop RET -TEXT ·hasSSSE3(SB), NOSPLIT, $0 - XORQ AX, AX - INCL AX - CPUID - SHRQ $9, CX - ANDQ $1, CX - MOVB CX, ret+0(FP) - RET - // func copy32B(dst, src []byte) TEXT ·copy32B(SB), NOSPLIT, $0 MOVQ dst+0(FP), SI