Skip to content

Commit

Permalink
Merge pull request #5088 from michalowski-arm/develop
Browse files Browse the repository at this point in the history
Add thread throttling profile for SGEMV on `NEOVERSEV1`
  • Loading branch information
martin-frbg authored Jan 24, 2025
2 parents 5930c16 + 838bb57 commit 9b11fd5
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 7 deletions.
7 changes: 5 additions & 2 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,10 @@ In chronological order:
* [2024-01-24] Optimize GEMV forwarding on ARM64 systems

* Aniket P. Garade <https://github.com/garadeaniket> Sushil Pratap Singh <https://github.com/SushilPratap04> Juliya James <https://github.com/Juliya32>
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE

* Annop Wongwathanarat <[email protected]>
* [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1
* [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1

* Marek Michalowski <https://github.com/michalowski-arm>
* [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1`
36 changes: 31 additions & 5 deletions interface/gemv.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,36 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT
};
#endif

#ifdef DYNAMIC_ARCH
extern char* gotoblas_corename(void);
#endif

#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
return
MN < 25600L ? 1
: MN < 63001L ? MIN(ncpu, 4)
: MN < 459684L ? MIN(ncpu, 16)
: ncpu;
}
#endif

static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
int ncpu = num_cpu_avail(3);
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
}
#endif

if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )
return 1;
else
return num_cpu_avail(2);
}

#ifndef CBLAS

void NAME(char *TRANS, blasint *M, blasint *N,
Expand Down Expand Up @@ -225,11 +255,7 @@ void CNAME(enum CBLAS_ORDER order,
STACK_ALLOC(buffer_size, FLOAT, buffer);

#ifdef SMP

if ( 1L * m * n < 115200L * GEMM_MULTITHREAD_THRESHOLD )
nthreads = 1;
else
nthreads = num_cpu_avail(2);
nthreads = get_gemv_optimal_nthreads(1L * m * n);

if (nthreads == 1) {
#endif
Expand Down

0 comments on commit 9b11fd5

Please sign in to comment.