From b04ac31f6e60e9f114fe21a5f4b5e29f7d541252 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Fri, 8 Aug 2025 13:14:56 +0300 Subject: [PATCH] add level3 defaults for x86 On some x86 configurations, this fails with (see error below) It seems that there are many x86 configurations supported by OpenBLAS, specific for various CPU family names. But, if (on some x86 builds) this isn't met, then some parameters become undefined. Link: https://github.com/openwrt/packages/pull/27179#issuecomment-3163947279 ``` In file included from ../../common.h:586, from gemm3m.c:40: gemm3m_level3.c: In function 'cgemm3m_nn': ../../common_param.h:1435:33: error: 'CGEMM3M_DEFAULT_R' undeclared (first use in this function); did you mean 'CGEMM_DEFAULT_R'? 1435 | #define CGEMM3M_R CGEMM3M_DEFAULT_R | ^~~~~~~~~~~~~~~~~ ../../common_param.h:1671:25: note: in expansion of macro 'CGEMM3M_R' 1671 | #define GEMM3M_R CGEMM3M_R | ^~~~~~~~~ gemm3m_level3.c:306:37: note: in expansion of macro 'GEMM3M_R' 306 | for(js = n_from; js < n_to; js += GEMM3M_R){ | ^~~~~~~~ ../../common_param.h:1435:33: note: each undeclared identifier is reported only once for each function it appears in 1435 | #define CGEMM3M_R CGEMM3M_DEFAULT_R | ^~~~~~~~~~~~~~~~~ ../../common_param.h:1671:25: note: in expansion of macro 'CGEMM3M_R' 1671 | #define GEMM3M_R CGEMM3M_R | ^~~~~~~~~ gemm3m_level3.c:306:37: note: in expansion of macro 'GEMM3M_R' 306 | for(js = n_from; js < n_to; js += GEMM3M_R){ | ^~~~~~~~ ../../common_param.h:1434:33: error: 'CGEMM3M_DEFAULT_Q' undeclared (first use in this function); did you mean 'CGEMM_DEFAULT_Q'? 1434 | #define CGEMM3M_Q CGEMM3M_DEFAULT_Q | ^~~~~~~~~~~~~~~~~ ../../common_param.h:1661:25: note: in expansion of macro 'CGEMM3M_Q' 1661 | #define GEMM3M_Q CGEMM3M_Q | ^~~~~~~~~ gemm3m_level3.c:313:20: note: in expansion of macro 'GEMM3M_Q' 313 | if (min_l >= GEMM3M_Q * 2) { | ^~~~~~~~ i486-openwrt-linux-musl-gcc -Os -pipe -march=pentium-mmx -fno-caller-saves -fno-plt -fhonour-copts -ffile-prefix-map=/builder/build_dir/target-i386_pentium-mmx_musl/OpenBLAS-0.3.30=OpenBLAS-0.3.30 -Wformat -Werror=format-security -fstack-protector -D_FORTIFY_SOURCE=1 -Wl,-z,now -Wl,-z,relro -I/builder/staging_dir/toolchain-i386_pentium-mmx_gcc-14.3.0_musl/usr/include -I/builder/staging_dir/toolchain-i386_pentium-mmx_gcc-14.3.0_musl/include -I/builder/staging_dir/toolchain-i386_pentium-mmx_gcc-14.3.0_musl/include/fortify -DMAX_STACK_ALLOC=2048 -DEXPRECISION -m128bit-long-double -Wall -m32 -DF_INTERFACE_GFORT -fPIC -DC_LAPACK -DNO_LAPACK -DNO_LAPACKE -DNO_AVX -DNO_AVX512 -DSMP_SERVER -DNO_WARMUP -DMAX_CPU_NUMBER=2 -DMAX_PARALLEL_NUMBER=1 -DBUILD_SINGLE=1 -DBUILD_DOUBLE=1 -DBUILD_COMPLEX=1 -DBUILD_COMPLEX16=1 -DVERSION=\"0.3.30\" -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME -DASMNAME= -DASMFNAME=_ -DNAME=_ -DCNAME= -DCHAR_NAME=\"_\" -DCHAR_CNAME=\"\" -DNO_AFFINITY -I. -DMAX_STACK_ALLOC=2048 -DEXPRECISION -m128bit-long-double -Wall -m32 -DF_INTERFACE_GFORT -fPIC -DC_LAPACK -DNO_LAPACK -DNO_LAPACKE -DNO_AVX -DNO_AVX512 -DSMP_SERVER -DNO_WARMUP -DMAX_CPU_NUMBER=2 -DMAX_PARALLEL_NUMBER=1 -DBUILD_SINGLE=1 -DBUILD_DOUBLE=1 -DBUILD_COMPLEX=1 -DBUILD_COMPLEX16=1 -DVERSION=\"0.3.30\" -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME -DASMNAME=cgemm3m_cn -DASMFNAME=cgemm3m_cn_ -DNAME=cgemm3m_cn_ -DCNAME=cgemm3m_cn -DCHAR_NAME=\"cgemm3m_cn_\" -DCHAR_CNAME=\"cgemm3m_cn\" -DNO_AFFINITY -I../.. -UDOUBLE -DCOMPLEX -c -UDOUBLE -DCOMPLEX -DCN gemm3m.c -o cgemm3m_cn.o ../../common_param.h:1433:33: error: 'CGEMM3M_DEFAULT_P' undeclared (first use in this function); did you mean 'CGEMM_DEFAULT_P'? 1433 | #define CGEMM3M_P CGEMM3M_DEFAULT_P | ^~~~~~~~~~~~~~~~~ ../../common_param.h:1651:25: note: in expansion of macro 'CGEMM3M_P' 1651 | #define GEMM3M_P CGEMM3M_P | ^~~~~~~~~ gemm3m_level3.c:325:20: note: in expansion of macro 'GEMM3M_P' 325 | if (min_i >= GEMM3M_P * 2) { | ^~~~~~~~ ../../common_param.h:1436:33: error: 'CGEMM3M_DEFAULT_UNROLL_M' undeclared (first use in this function); did you mean 'CGEMM3M_DEFAULT_UNROLL_N'? 1436 | #define CGEMM3M_UNROLL_M CGEMM3M_DEFAULT_UNROLL_M | ^~~~~~~~~~~~~~~~~~~~~~~~ ../../common_param.h:1580:25: note: in expansion of macro 'CGEMM3M_UNROLL_M' 1580 | #define GEMM3M_UNROLL_M CGEMM3M_UNROLL_M | ^~~~~~~~~~~~~~~~ gemm3m_level3.c:329:33: note: in expansion of macro 'GEMM3M_UNROLL_M' 329 | min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | ^~~~~~~~~~~~~~~ make[4]: *** [Makefile:1865: cgemm3m_nn.o] Error 1 make[4]: *** Waiting for unfinished jobs.... In file included from ../../common.h:586, from gemm3m.c:40: gemm3m_level3.c: In function 'cgemm3m_cn': ../../common_param.h:1435:33: error: 'CGEMM3M_DEFAULT_R' undeclared (first use in this function); did you mean 'CGEMM_DEFAULT_R'? 1435 | #define CGEMM3M_R CGEMM3M_DEFAULT_R | ^~~~~~~~~~~~~~~~~ ../../common_param.h:1671:25: note: in expansion of macro 'CGEMM3M_R' 1671 | #define GEMM3M_R CGEMM3M_R | ^~~~~~~~~ gemm3m_level3.c:306:37: note: in expansion of macro 'GEMM3M_R' 306 | for(js = n_from; js < n_to; js += GEMM3M_R){ | ^~~~~~~~ ../../common_param.h:1435:33: note: each undeclared identifier is reported only once for each function it appears in 1435 | #define CGEMM3M_R CGEMM3M_DEFAULT_R | ^~~~~~~~~~~~~~~~~ ../../common_param.h:1671:25: note: in expansion of macro 'CGEMM3M_R' 1671 | #define GEMM3M_R CGEMM3M_R | ^~~~~~~~~ gemm3m_level3.c:306:37: note: in expansion of macro 'GEMM3M_R' 306 | for(js = n_from; js < n_to; js += GEMM3M_R){ | ^~~~~~~~ ../../common_param.h:1434:33: error: 'CGEMM3M_DEFAULT_Q' undeclared (first use in this function); did you mean 'CGEMM_DEFAULT_Q'? 1434 | #define CGEMM3M_Q CGEMM3M_DEFAULT_Q | ^~~~~~~~~~~~~~~~~ ../../common_param.h:1661:25: note: in expansion of macro 'CGEMM3M_Q' 1661 | #define GEMM3M_Q CGEMM3M_Q | ^~~~~~~~~ gemm3m_level3.c:313:20: note: in expansion of macro 'GEMM3M_Q' 313 | if (min_l >= GEMM3M_Q * 2) { | ^~~~~~~~ ``` --- param.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/param.h b/param.h index 77b04fe433..d40856ab80 100644 --- a/param.h +++ b/param.h @@ -4274,5 +4274,38 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #define SHUFPS_39 shufps $0x39, #endif +#if defined(ARCH_X86) +#ifndef CGEMM3M_DEFAULT_R +#define CGEMM3M_DEFAULT_R 12288 +#endif + +#ifndef CGEMM3M_DEFAULT_UNROLL_M +#define CGEMM3M_DEFAULT_UNROLL_M 8 +#endif + +#ifndef CGEMM3M_DEFAULT_P +#define CGEMM3M_DEFAULT_P 320 +#endif + +#ifndef CGEMM3M_DEFAULT_Q +#define CGEMM3M_DEFAULT_Q 224 +#endif + +#ifndef ZGEMM3M_DEFAULT_R +#define ZGEMM3M_DEFAULT_R 12288 +#endif + +#ifndef ZGEMM3M_DEFAULT_Q +#define ZGEMM3M_DEFAULT_Q 224 +#endif + +#ifndef ZGEMM3M_DEFAULT_P +#define ZGEMM3M_DEFAULT_P 224 +#endif + +#ifndef ZGEMM3M_DEFAULT_UNROLL_M +#define ZGEMM3M_DEFAULT_UNROLL_M 4 +#endif +#endif #endif