From f29164b18793e4aeba89eacc0ed64a8ec7d62cfe Mon Sep 17 00:00:00 2001 From: templexxx Date: Wed, 6 Sep 2017 13:32:58 +0800 Subject: [PATCH 1/4] change way to get cpu ext instruction --- rs_amd64.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rs_amd64.go b/rs_amd64.go index b3a486f..7a9fa98 100755 --- a/rs_amd64.go +++ b/rs_amd64.go @@ -3,6 +3,7 @@ package reedsolomon import ( "errors" "sync" + "github.com/klauspost/cpuid" ) // SIMD Instruction Extensions @@ -13,9 +14,9 @@ const ( ) func getEXT() int { - if hasAVX2() { + if cpuid.CPU.AVX2() { return avx2 - } else if hasSSSE3() { + } else if cpuid.CPU.SSSE3() { return ssse3 } else { return none From 7d973f39fd0c9212f43187448f229c294e142a20 Mon Sep 17 00:00:00 2001 From: templexxx Date: Wed, 6 Sep 2017 20:04:09 +0800 Subject: [PATCH 2/4] File mode Executable to Regular --- .gitignore | 0 LICENSE | 0 README.md | 0 matrix.go | 0 matrix_test.go | 0 rs.go | 0 rs_amd64.go | 0 rs_amd64.s | 0 rs_other.go | 0 rs_test.go | 0 tbl.go | 0 11 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 .gitignore mode change 100755 => 100644 LICENSE mode change 100755 => 100644 README.md mode change 100755 => 100644 matrix.go mode change 100755 => 100644 matrix_test.go mode change 100755 => 100644 rs.go mode change 100755 => 100644 rs_amd64.go mode change 100755 => 100644 rs_amd64.s mode change 100755 => 100644 rs_other.go mode change 100755 => 100644 rs_test.go mode change 100755 => 100644 tbl.go diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 diff --git a/LICENSE b/LICENSE old mode 100755 new mode 100644 diff --git a/README.md b/README.md old mode 100755 new mode 100644 diff --git a/matrix.go b/matrix.go old mode 100755 new mode 100644 diff --git a/matrix_test.go b/matrix_test.go old mode 100755 new mode 100644 diff --git a/rs.go b/rs.go old mode 100755 new mode 100644 diff --git a/rs_amd64.go b/rs_amd64.go old mode 100755 new mode 100644 diff --git a/rs_amd64.s b/rs_amd64.s old mode 100755 new mode 100644 diff --git a/rs_other.go b/rs_other.go old mode 100755 new mode 100644 diff --git a/rs_test.go b/rs_test.go old mode 100755 new mode 100644 diff --git a/tbl.go b/tbl.go old mode 100755 new mode 100644 From 3b5a60ac8a85400651f725ddedc132325787e911 Mon Sep 17 00:00:00 2001 From: templexxx Date: Wed, 6 Sep 2017 20:24:32 +0800 Subject: [PATCH 3/4] File mode Executable to Regular --- mathtool/ccmbination.jpg | Bin mathtool/cntinverse.go | 0 mathtool/gentbls.go | 0 3 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 mathtool/ccmbination.jpg mode change 100755 => 100644 mathtool/cntinverse.go mode change 100755 => 100644 mathtool/gentbls.go diff --git a/mathtool/ccmbination.jpg b/mathtool/ccmbination.jpg old mode 100755 new mode 100644 diff --git a/mathtool/cntinverse.go b/mathtool/cntinverse.go old mode 100755 new mode 100644 diff --git a/mathtool/gentbls.go b/mathtool/gentbls.go old mode 100755 new mode 100644 From 979895f353956b192c1bce48347c626d9328ec20 Mon Sep 17 00:00:00 2001 From: templexxx Date: Tue, 26 Sep 2017 09:56:37 +0800 Subject: [PATCH 4/4] use golang internal/cpu to detect cpu feature --- rs_amd64.go | 35 +++++++++++++++++++---------------- rs_amd64.s | 19 ------------------- 2 files changed, 19 insertions(+), 35 deletions(-) diff --git a/rs_amd64.go b/rs_amd64.go index 7a9fa98..75301d4 100644 --- a/rs_amd64.go +++ b/rs_amd64.go @@ -3,7 +3,8 @@ package reedsolomon import ( "errors" "sync" - "github.com/klauspost/cpuid" + + "github.com/templexxx/cpufeat" ) // SIMD Instruction Extensions @@ -13,22 +14,25 @@ const ( ssse3 ) -func getEXT() int { - if cpuid.CPU.AVX2() { - return avx2 - } else if cpuid.CPU.SSSE3() { - return ssse3 +var extension = none + +func init() { + getEXT() +} + +func getEXT() { + if cpufeat.X86.HasAVX2 { + extension = avx2 + return + } else if cpufeat.X86.HasSSSE3 { + extension = ssse3 + return } else { - return none + extension = none + return } } -//go:noescape -func hasAVX2() bool - -//go:noescape -func hasSSSE3() bool - //go:noescape func copy32B(dst, src []byte) // Need SSE2(introduced in 2001) @@ -76,14 +80,13 @@ type ( func newRS(d, p int, em matrix) (enc Encoder) { g := em[d*d:] - ext := getEXT() - if ext == none { + if extension == none { return &encBase{data: d, parity: p, encode: em, gen: g} } t := make([]byte, d*p*32) initTbl(g, p, d, t) ok := okCache(d, p) - if ext == avx2 { + if extension == avx2 { e := &encAVX2{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok, inverseCache: iCache{data: make(map[uint32][]byte)}} return e diff --git a/rs_amd64.s b/rs_amd64.s index 7d7e7a1..b8f4885 100644 --- a/rs_amd64.s +++ b/rs_amd64.s @@ -323,16 +323,6 @@ one16b: JNE ymm RET -TEXT ·hasAVX2(SB), NOSPLIT, $0 - XORQ AX, AX - XORQ CX, CX - ADDL $7, AX - CPUID - SHRQ $5, BX - ANDQ $1, BX - MOVB BX, ret+0(FP) - RET - // func mulVectSSSE3(tbl, d, p []byte) TEXT ·mulVectSSSE3(SB), NOSPLIT, $0 MOVQ i+24(FP), in @@ -399,15 +389,6 @@ loop: JNZ loop RET -TEXT ·hasSSSE3(SB), NOSPLIT, $0 - XORQ AX, AX - INCL AX - CPUID - SHRQ $9, CX - ANDQ $1, CX - MOVB CX, ret+0(FP) - RET - // func copy32B(dst, src []byte) TEXT ·copy32B(SB), NOSPLIT, $0 MOVQ dst+0(FP), SI