From 17c05fcf7b14b987feb6fe0d76928ab9d2d4ad11 Mon Sep 17 00:00:00 2001 From: ehg54 Date: Tue, 28 May 2024 11:38:21 -0400 Subject: [PATCH] cleanup --- apps/blas/.DS_Store | Bin 6148 -> 0 bytes apps/blas/syr/syr-1-1-gen.fil | 112 ------------------------------ apps/blas/syr/syr-1-1.fil | 111 ----------------------------- apps/blas/syr/syr-comb.fil | 59 ---------------- apps/blas/syr/syr-gen.fil | 127 ---------------------------------- 5 files changed, 409 deletions(-) delete mode 100644 apps/blas/.DS_Store delete mode 100644 apps/blas/syr/syr-1-1-gen.fil delete mode 100644 apps/blas/syr/syr-1-1.fil delete mode 100644 apps/blas/syr/syr-comb.fil delete mode 100644 apps/blas/syr/syr-gen.fil diff --git a/apps/blas/.DS_Store b/apps/blas/.DS_Store deleted file mode 100644 index 58d3333320aa2a98ed7acfb3a1b5987f2bf64ad4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~JqiLr422WdLa^D=avBfd4F=H@cmYwd5(LGWCl0N(!#(Lc_(*y%I$o*9}m~-#Y%3Iq!r-34EA%IpaN8Y3Qz$mKm|sm zKpy0?`G}s0k3t2gz&I4J??ZtbYqACX(}Cb4060b24Qrnzz+wfkCR-2{m_{oYt?FZl z)x8}od0kDmV6=iewPy1_xe9O3OJ0Er#N3%DEhi!H#KY(Z3D{1I>&7^uKc6?g&C C3lWR} diff --git a/apps/blas/syr/syr-1-1-gen.fil b/apps/blas/syr/syr-1-1-gen.fil deleted file mode 100644 index 681db332..00000000 --- a/apps/blas/syr/syr-1-1-gen.fil +++ /dev/null @@ -1,112 +0,0 @@ -import "apps/blas/util.fil"; - -// Computes alpha * x * x^T + A in one cycle, where: -// alpha is a scalar -// x is a vector of length N -// A is a symmetric matrix -// W: width of nums -// N: length of vector -comp Syr_1_1[W, N]<'G:II>( - go: interface['G], - alpha: ['G, 'G+1] W, - x[N]: ['G, 'G+1] W, - A[N][N]: ['G, 'G+1] W -) -> ( - A_out[N][N]: ['G+L, 'G+L+1] W -) with { - some L where L > 0; - some II where II > 0; -} where W > 0, - N > 0 -{ - assume Mults::L > mult_uses*Mults::II; - - let mult_uses = N / 1; - let add_uses = N / 1; - // in ['G, 'G+7]; - Mults := new Multipliers[W, 1] in ['G, 'G + Mults::L + (mult_uses*mult_uses)]; - Adds := new Adders[W, 1] in ['G + Mults::L + Mults::L, 'G + Mults::L + Mults::L + N*N]; - - bundle alpha_x[mult_uses][1]: for ['G+k+Mults::L, 'G+k+Mults::L+1] W; - - // alpha * x stage - for i in 0..mult_uses { - let mult_start = i; - let mult_end = i + Mults::L; - - alpha_reg := new Shift[W, mult_start]<'G>(alpha); - bundle alpha_bundle[1]: for ['G+mult_start, 'G+mult_start+1] W; - - if i == 0 { - for j in 0..1 { - alpha_bundle{j} = alpha; - } - m := Mults<'G+mult_start>(x{i..(i+1)}, alpha_bundle{i..(i+1)}); - alpha_x{i}{0..1} = m.out{0..1}; - } else { - alpha_reg := new Shift[W, mult_start]<'G>(alpha); - for j in 0..1 { - alpha_bundle{j} = alpha_reg.out; - } - x_reg := new Shift[W, mult_start, 1]<'G>(x{i..(i+1)}); - m := Mults<'G+mult_start>(x_reg.out{0..1}, alpha_reg.out{0..1}); - alpha_x{i}{0..1} = m.out{0..1}; - } - } - - // (alpha*x) * x^T stage - bundle vec_mult[N*N][1]: for ['G+k+Mults::L + Mults::L, 'G+k+Mults::L +Mults::L+1] W; - - // each iteration corresponds to computing a row in resulting vector - for i in 0..N { - let second_mult_start = i+Mults::L; // latency is 3, so first packet ready after 3 cycles - // for each mult use, - for j in 0..mult_uses { - let offset = i*(mult_uses-1); - x_reg := new Shift[W, i*N+j+Mults::L, 1]<'G>(x{j}); - alpha_x_reg := new Shift[W, i*N+j+Mults::L-second_mult_start, 1]<'G+second_mult_start>(alpha_x{i}{0..1}); - - mult := Mults<'G+i*N+j+Mults::L>(alpha_x_reg.out{0..1}, x_reg.out{0..1}); - vec_mult{i*N+j}{0..1} = mult.out{0..1}; - } - } - - // N = 2 - // MultsN = 1 - // mult_uses = 2 - // ax0 happens at G+0, done at G+3 - // ax1 happens at G+1, done at G+4 - // ax0 * x0 happens at G+3, done at G+6 - // ax0 * x1 happens at G+4, done at G+7 - // ax1 * x0 happens at G+5, done at G+8 - // ax1 * x1 happens at G+6, done at G+9 - - bundle matrix_add[N*N][1]: for ['G+k+Mults::L+Mults::L, 'G+k+Mults::L+Mults::L+1] W; - - // add stage - for i in 0..N { - for j in 0..add_uses { - a_reg := new Shift[W, i*N+j+Mults::L + Mults::L, 1]<'G>(A{i}{j}); - add := Adds<'G+i*N+j+Mults::L + Mults::L>(vec_mult{i*N+j}{0..1}, a_reg.out{0..1}); - matrix_add{i*N+j}{0..1} = add.out{0..1}; - } - } - - // i j i*N+j - // 0 0 0 - // 0 1 1 - // 1 0 2 - // 1 1 3 - - // synchronize everything - let latency = Mults::L + Mults::L + N*N-1; - for i in 0..N { - for j in 0..N { - reg := new Shift[W, latency - (i*N+j+Mults::L + Mults::L), 1]<'G+i*N+j+Mults::L + Mults::L>(matrix_add{i*N+j}{0..1}); - A_out{i}{j} = reg.out{0..1}; - } - } - - L := latency; - II := Mults::L + (mult_uses*mult_uses); -} \ No newline at end of file diff --git a/apps/blas/syr/syr-1-1.fil b/apps/blas/syr/syr-1-1.fil deleted file mode 100644 index 520cffeb..00000000 --- a/apps/blas/syr/syr-1-1.fil +++ /dev/null @@ -1,111 +0,0 @@ -import "apps/blas/util.fil"; - -// Computes alpha * x * x^T + A in one cycle, where: -// alpha is a scalar -// x is a vector of length N -// A is a symmetric matrix -// W: width of nums -// N: length of vector -comp Syr_1_1[W, N]<'G:II>( - go: interface['G], - alpha: ['G, 'G+1] W, - x[N]: ['G, 'G+1] W, - A[N][N]: ['G, 'G+1] W -) -> ( - A_out[N][N]: ['G+L, 'G+L+1] W -) with { - some L where L > 0; - some II where II > 0; -} where W > 0, - N > 0 -{ - assume Mults::L == 3; - assume N == 2; - - let mult_uses = N / 1; - let add_uses = N / 1; - Mults := new Multipliers[W, 1] in ['G, 'G + 7]; - Adds := new Adders[W, 1] in ['G + 6, 'G + 10]; - - bundle alpha_x[mult_uses][1]: for ['G+k+Mults::L, 'G+k+Mults::L+1] W; - - // alpha * x stage - for i in 0..mult_uses { - let mult_start = i; - let mult_end = i + 3; - - alpha_reg := new Shift[W, mult_start]<'G>(alpha); - bundle alpha_bundle[1]: for ['G+mult_start, 'G+mult_start+1] W; - - if i == 0 { - for j in 0..1 { - alpha_bundle{j} = alpha; - } - m := Mults<'G+mult_start>(x{i..(i+1)}, alpha_bundle{i..(i+1)}); - alpha_x{i}{0..1} = m.out{0..1}; - } else { - alpha_reg := new Shift[W, mult_start]<'G>(alpha); - for j in 0..1 { - alpha_bundle{j} = alpha_reg.out; - } - x_reg := new Shift[W, mult_start, 1]<'G>(x{i..(i+1)}); - m := Mults<'G+mult_start>(x_reg.out{0..1}, alpha_reg.out{0..1}); - alpha_x{i}{0..1} = m.out{0..1}; - } - } - - // (alpha*x) * x^T stage - bundle vec_mult[N*N][1]: for ['G+k+6, 'G+k+7] W; - - // each iteration corresponds to computing a row in resulting vector - for i in 0..N { - let second_mult_start = i+3; // latency is 3, so first packet ready after 3 cycles - // for each mult use, - for j in 0..mult_uses { - let offset = i*(mult_uses-1); - x_reg := new Shift[W, i*N+j+3, 1]<'G>(x{j}); - alpha_x_reg := new Shift[W, i*N+j+3-(i+3), 1]<'G+i+3>(alpha_x{i}{0..1}); - - mult := Mults<'G+i*N+j+3>(alpha_x_reg.out{0..1}, x_reg.out{0..1}); - vec_mult{i*N+j}{0..1} = mult.out{0..1}; - } - } - - // ax0 happens at G+0, done at G+3 - // ax1 happens at G+1, done at G+4 - // ax0 * x0 happens at G+3, done at G+6 - // ax0 * x1 happens at G+4, done at G+7 - // ax1 * x0 happens at G+5, done at G+8 - // ax1 * x1 happens at G+6, done at G+9 - - bundle matrix_add[N*N][1]: for ['G+k+Mults::L+(mult_uses*mult_uses)-1, 'G+k+Mults::L+(mult_uses*mult_uses)] W; - - // add stage - for i in 0..N { - for j in 0..add_uses { - a_reg := new Shift[W, i*N+j+6, 1]<'G>(A{i}{j}); - add := Adds<'G+i*N+j+6>(vec_mult{i*N+j}{0..1}, a_reg.out{0..1}); - matrix_add{i*N+j}{0..1} = add.out{0..1}; - } - } - - // i j i*N+j - // 0 0 0 - // 0 1 1 - // 1 0 2 - // 1 1 3 - - // synchronize everything - - // start of last mult in first stage, then 3 cycles to finish, then 3 cycles to finish - let latency = Mults::L + (mult_uses*mult_uses)-1 + Mults::L; - for i in 0..N { - for j in 0..N { - reg := new Shift[W, latency - (i*N+j+6), 1]<'G+i*N+j+6>(matrix_add{i*N+j}{0..1}); - A_out{i}{j} = reg.out{0..1}; - } - } - - L := latency; - II := 7; -} \ No newline at end of file diff --git a/apps/blas/syr/syr-comb.fil b/apps/blas/syr/syr-comb.fil deleted file mode 100644 index a0581e9d..00000000 --- a/apps/blas/syr/syr-comb.fil +++ /dev/null @@ -1,59 +0,0 @@ -import "apps/blas/util.fil"; - -// Computes alpha * x * x^T + A in one cycle, where: -// alpha is a scalar -// x is a vector of length N -// A is a symmetric matrix -// W: width of nums -// N: length of vector -comp SyrComb[W, N]<'G:1>( - alpha: ['G, 'G+1] W, - x[N]: ['G, 'G+1] W, - A[N][N]: ['G, 'G+1] W -) -> ( - A_out[N][N]: ['G, 'G+1] W -) with { - some L; - some II; -} where W > 0 { - II := 1; - L := 0; - - bundle alpha_bundle[N]: ['G, 'G+1] W; - bundle alpha_x[N]: ['G, 'G+1] W; - - bundle vec_mult[N][N]: ['G, 'G+1] W; - - for i in 0..N { - alpha_bundle{i} = alpha; - } - - Mults1 := new MultipliersComb[W, N]<'G>(alpha_bundle{0..N}, x{0..N}); - alpha_x{0..N} = Mults1.out{0..N}; - - for i in 0..N { - bundle left_bundle[N]: ['G, 'G+1] W; - bundle right_bundle[N]: ['G, 'G+1] W; - - // get ax[0], ax[1], ... ax[N-1] for each iter of i - for j in 0..N { - left_bundle{j} = alpha_x{i}; - } - - // got x[0], x[1], ..., x[N-1] in one bundle to do the mult - for j in 0..N { - right_bundle{j} = x{j}; - } - - // do the vector mult, will give us row i of the resulting matrix - Mults2 := new MultipliersComb[W, N]<'G>(left_bundle{0..N}, right_bundle{0..N}); - vec_mult{i}{0..N} = Mults2.out{0..N}; - } - - for i in 0..N { - Adders1 := new Adders[W, N]<'G>(vec_mult{i}{0..N}, A{i}{0..N}); - A_out{i}{0..N} = Adders1.out{0..N}; - } - - -} \ No newline at end of file diff --git a/apps/blas/syr/syr-gen.fil b/apps/blas/syr/syr-gen.fil deleted file mode 100644 index 24977c36..00000000 --- a/apps/blas/syr/syr-gen.fil +++ /dev/null @@ -1,127 +0,0 @@ -import "apps/blas/util.fil"; - -// Computes alpha * x * x^T + A in one cycle, where: -// alpha is a scalar -// x is a vector of length N -// A is a symmetric matrix -// W: width of nums -// N: length of vector -comp Syr_Gen[W, N, MultsN]<'G:II>( - go: interface['G], - alpha: ['G, 'G+1] W, - x[N]: ['G, 'G+1] W, - A[N][N]: ['G, 'G+1] W -) -> ( - A_out[N][N]: ['G+L, 'G+L+1] W -) with { - some L where L > 0; - some II where II > 0; -} where W > 0, - N > 0, - N % MultsN == 0 -{ - assume Mults::L > mult_uses*Mults::II; - - let mult_uses = N / MultsN; - let add_uses = N / MultsN; - // in ['G, 'G+7]; - Mults := new Multipliers[W, MultsN] in ['G, 'G + Mults::L + (mult_uses*N*Mults::II)]; - Adds := new Adders[W, MultsN] in ['G + Mults::L + Mults::L, 'G + Mults::L + Mults::L + mult_uses*N]; - - bundle alpha_x[mult_uses][MultsN]: for ['G+k+Mults::L, 'G+k+Mults::L+1] W; - - // alpha * x stage - for i in 0..mult_uses { - let mult_start = i; - let mult_end = i + Mults::L; - - alpha_reg := new Shift[W, mult_start]<'G>(alpha); - bundle alpha_bundle[MultsN]: for ['G+mult_start, 'G+mult_start+1] W; - - if i == 0 { - for j in 0..MultsN { - alpha_bundle{j} = alpha; - } - m := Mults<'G+mult_start>(x{i*MultsN..(i+1)*MultsN}, alpha_bundle{0..MultsN}); - alpha_x{i}{0..MultsN} = m.out{0..MultsN}; - } else { - alpha_reg := new Shift[W, mult_start]<'G>(alpha); - for j in 0..MultsN { - alpha_bundle{j} = alpha_reg.out; - } - x_reg := new Shift[W, mult_start, MultsN]<'G>(x{i*MultsN..(i+1)*MultsN}); - m := Mults<'G+mult_start>(x_reg.out{0..MultsN}, alpha_bundle{0..MultsN}); - alpha_x{i}{0..MultsN} = m.out{0..MultsN}; - } - } - - // (alpha*x) * x^T stage - bundle vec_mult[N*mult_uses][MultsN]: for ['G+k+Mults::L + Mults::L, 'G+k+Mults::L +Mults::L+1] W; - - // each iteration corresponds to computing a row in resulting vector - for i in 0..N { - for j in 0..mult_uses { - let second_mult_start = j+Mults::L; // latency is 3, so first packet ready after 3 cycles - x_reg := new Shift[W, i*mult_uses+j+Mults::L, 1]<'G>(x{i}); - - bundle x_i[MultsN]: ['G+i*mult_uses+j+Mults::L, 'G+i*mult_uses+j+Mults::L+1] W; - for k in 0..MultsN { - x_i{k} = x_reg.out{0..1}; - } - - alpha_x_reg := new Shift[W, i*mult_uses+j+Mults::L-second_mult_start, MultsN]<'G+second_mult_start>(alpha_x{j}{0..MultsN}); - - mult := Mults<'G+i*mult_uses+j+Mults::L>(alpha_x_reg.out{0..MultsN}, x_i{0..MultsN}); - vec_mult{i*mult_uses+j}{0..MultsN} = mult.out{0..MultsN}; - } - } - - // N = 2 - // MultsN = 1 - // mult_uses = 2 - // ax0 happens at G+0, done at G+3 - // ax1 happens at G+1, done at G+4 - // ax0 * x0 happens at G+3, done at G+6 - // ax0 * x1 happens at G+4, done at G+7 - // ax1 * x0 happens at G+5, done at G+8 - // ax1 * x1 happens at G+6, done at G+9 - - // N = 2 - // MultsN = 2 - // mult_uses = 1 - // ax0 happens at G+0, done at G+3 - // ax1 happens at G+0, done at G+3 - // ax0 * x0 happens at G+3, done at G+6 - // ax0 * x1 happens at G+3, done at G+6 - // ax1 * x0 happens at G+4, done at G+7 - // ax1 * x1 happens at G+4, done at G+7 - - bundle matrix_add[N*mult_uses][MultsN]: for ['G+k+Mults::L+Mults::L, 'G+k+Mults::L+Mults::L+1] W; - - // add stage - for i in 0..N { - for j in 0..add_uses { - a_reg := new Shift[W, i*mult_uses+j+Mults::L + Mults::L, MultsN]<'G>(A{i}{j*MultsN..(j+1)*MultsN}); - add := Adds<'G+i*mult_uses+j+Mults::L + Mults::L>(vec_mult{i*mult_uses+j}{0..MultsN}, a_reg.out{0..MultsN}); - matrix_add{i*mult_uses+j}{0..MultsN} = add.out{0..MultsN}; - } - } - - // i j i*N+j - // 0 0 0 - // 0 1 1 - // 1 0 2 - // 1 1 3 - - // synchronize everything - let latency = Mults::L + Mults::L + N*mult_uses-1; - for i in 0..N { - for j in 0..add_uses { - reg := new Shift[W, latency - (i*mult_uses+j+Mults::L + Mults::L), MultsN]<'G+i*mult_uses+j+Mults::L + Mults::L>(matrix_add{i*mult_uses+j}{0..MultsN}); - A_out{i}{j*MultsN..(j+1)*MultsN} = reg.out{0..MultsN}; - } - } - - L := latency; - II := Mults::L + (mult_uses*N*Mults::II); -} \ No newline at end of file