From b691cb1f3997cfec50213ae430bed07cf9edaa8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20Hamb=C3=BCchen?= Date: Tue, 12 May 2026 04:18:39 +0200 Subject: [PATCH 1/3] Document openblas_set_num_threads_local(). See #4425 --- cblas.h | 2 ++ driver/others/blas_server_omp.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cblas.h b/cblas.h index 8395f1b8b2..068fb34bde 100644 --- a/cblas.h +++ b/cblas.h @@ -40,6 +40,8 @@ extern "C" { /*Set the number of threads on runtime.*/ void openblas_set_num_threads(int num_threads); void goto_set_num_threads(int num_threads); +// "Local" means this number is used when OpenBLAS notices that +// it is already in an OpenMP parallel region (`omp_in_parallel()`). int openblas_set_num_threads_local(int num_threads); /*Get the number of threads on runtime.*/ diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 38b48fc842..a430f283fb 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -69,7 +69,7 @@ int blas_server_avail = 0; int blas_omp_number_max = 0; -int blas_omp_threads_local = 1; +int blas_omp_threads_local = 1; // num threads to use when already inside omp_in_parallel() extern int openblas_omp_adaptive_env(void); From 7e4e244b815947f248cec7a628020a8a32088b02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20Hamb=C3=BCchen?= Date: Tue, 12 May 2026 05:18:55 +0200 Subject: [PATCH 2/3] Make OpenBLAS's usage of OpenMP respect openblas_set_num_threads(). Fixes #5806. Until now, the code in `num_cpu_avail()`, if (blas_cpu_number != openmp_nthreads) { goto_set_num_threads(openmp_nthreads); } would just always set the threads back to OpenMP's thread count. --- common_thread.h | 9 +++++++++ driver/others/blas_server_omp.c | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/common_thread.h b/common_thread.h index 4a8db682bf..cefe881c7b 100644 --- a/common_thread.h +++ b/common_thread.h @@ -138,10 +138,19 @@ typedef struct blas_queue { extern int blas_server_avail; extern int blas_omp_number_max; extern int blas_omp_threads_local; +extern int blas_is_num_threads_set_explicitly; static __inline int num_cpu_avail(int level) { #ifdef USE_OPENMP + /* If the user explicitly called openblas_set_num_threads(), + respect that setting instead of overriding it with + `omp_get_max_threads()` below (which is to get a default + in case the user hasn't made an explicit choice). */ + if (blas_is_num_threads_set_explicitly) { + return blas_cpu_number; + } + int openmp_nthreads; openmp_nthreads=omp_get_max_threads(); if (omp_in_parallel()) openmp_nthreads = blas_omp_threads_local; diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index a430f283fb..1f6e35e184 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -70,6 +70,7 @@ int blas_server_avail = 0; int blas_omp_number_max = 0; int blas_omp_threads_local = 1; // num threads to use when already inside omp_in_parallel() +int blas_is_num_threads_set_explicitly = 0; // tracks whether the user called openblas_set_num_threads() extern int openblas_omp_adaptive_env(void); @@ -122,7 +123,7 @@ void goto_set_num_threads(int num_threads) { } void openblas_set_num_threads(int num_threads) { - + blas_is_num_threads_set_explicitly = 1; goto_set_num_threads(num_threads); } From d18385be3c3cc1b5939b1aa39f97135547cab14f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20Hamb=C3=BCchen?= Date: Tue, 12 May 2026 05:49:32 +0200 Subject: [PATCH 3/3] Refactor: Simplify num_cpu_avail() syntax --- common_thread.h | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/common_thread.h b/common_thread.h index cefe881c7b..3d34ebafe2 100644 --- a/common_thread.h +++ b/common_thread.h @@ -151,27 +151,17 @@ static __inline int num_cpu_avail(int level) { return blas_cpu_number; } -int openmp_nthreads; - openmp_nthreads=omp_get_max_threads(); - if (omp_in_parallel()) openmp_nthreads = blas_omp_threads_local; -#endif - -#ifndef USE_OPENMP - if (blas_cpu_number == 1 -#else - if (openmp_nthreads == 1 -#endif - ) return 1; + int openmp_nthreads = omp_in_parallel() ? blas_omp_threads_local : omp_get_max_threads(); -#ifdef USE_OPENMP - if (openmp_nthreads > blas_omp_number_max){ + if (openmp_nthreads > blas_omp_number_max) { #ifdef DEBUG - fprintf(stderr,"WARNING - more OpenMP threads requested (%d) than available (%d)\n",openmp_nthreads,blas_omp_number_max); + fprintf(stderr, "WARNING - more OpenMP threads requested (%d) than available (%d)\n", openmp_nthreads, blas_omp_number_max); #endif - openmp_nthreads = blas_omp_number_max; - } - if (blas_cpu_number != openmp_nthreads) { - goto_set_num_threads(openmp_nthreads); + openmp_nthreads = blas_omp_number_max; + } + + if (blas_cpu_number != openmp_nthreads) { + goto_set_num_threads(openmp_nthreads); // mutates `blas_cpu_number` } #endif