Skip to content

Commit

Permalink
Merge pull request #5127 from Harishmcw/gesv-threshold
Browse files Browse the repository at this point in the history
Refined GESV Parallelization Logic for Windows on ARM64
  • Loading branch information
martin-frbg authored Feb 12, 2025
2 parents 453efbd + daf16b8 commit a64b75a
Showing 1 changed file with 19 additions and 7 deletions.
26 changes: 19 additions & 7 deletions interface/lapack/gesv.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,21 +107,33 @@ int NAME(blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, blasint *ipiv,

#ifndef PPC440
buffer = (FLOAT *)blas_memory_alloc(1);

sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif

#ifdef SMP
args.common = NULL;
#ifndef DOUBLE
if (args.m*args.n < 40000)

#if defined(_WIN64) && defined(_M_ARM64)
#ifdef COMPLEX
if (args.m * args.n > 600)
#else
if (args.m * args.n > 1000)
#endif
args.nthreads = num_cpu_avail(4);
else
args.nthreads = 1;
#else
if (args.m*args.n < 10000)
#ifndef DOUBLE
if (args.m * args.n < 40000)
#else
if (args.m * args.n < 10000)
#endif
args.nthreads = 1;
else
args.nthreads = num_cpu_avail(4);
#endif
args.nthreads=1;
else
args.nthreads = num_cpu_avail(4);

if (args.nthreads == 1) {
#endif
Expand Down

0 comments on commit a64b75a

Please # to comment.