Skip to content

Commit 4bb9754

Browse files
vtjnashclaude
andcommitted
cmake: document options and set defaults for influential variables
Add documentation strings and defaults for CMake cache variables including TARGET, BINARY, threading options, vector extensions, memory tuning, and precision types. Treat empty string values as "not set" to allow auto-detection while keeping options visible in ccmake. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent c7b0304 commit 4bb9754

File tree

2 files changed

+92
-19
lines changed

2 files changed

+92
-19
lines changed

CMakeLists.txt

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,61 @@ set(SYMBOLPREFIX "" CACHE STRING "Add a prefix to all exported symbol names in
7878

7979
set(SYMBOLSUFFIX "" CACHE STRING "Add a suffix to all exported symbol names in the shared library, e.g. _64 for INTERFACE64 builds" )
8080

81+
# Target architecture - auto-detected if not specified
82+
set(TARGET "" CACHE STRING "Target CPU architecture (e.g. HASWELL, SANDYBRIDGE, NEHALEM, ARMV8, POWER9). Auto-detected if not specified.")
83+
84+
# Binary type (32-bit or 64-bit)
85+
set(BINARY "" CACHE STRING "Build a 32-bit or 64-bit library (32 or 64). Auto-detected if not specified. Note: 32-bit disables AVX.")
86+
87+
# Threading options
88+
set(USE_THREAD "" CACHE STRING "Enable multi-threading (0=disabled, 1=enabled). Auto-detected based on NUM_THREADS if not specified.")
89+
option(USE_OPENMP "Use OpenMP for threading instead of pthreads" OFF)
90+
set(NUM_THREADS "" CACHE STRING "Maximum number of threads. Auto-detected from CPU cores if not specified.")
91+
set(NUM_PARALLEL "1" CACHE STRING "Number of parallel OpenBLAS instances when using OpenMP (default: 1)")
92+
93+
# 64-bit integer interface
94+
option(INTERFACE64 "Use 64-bit integers for array indices (equivalent to -i8 in ifort)" OFF)
95+
96+
# Vector extension control
97+
option(NO_AVX "Disable AVX kernel support (use for compatibility with older systems)" OFF)
98+
option(NO_AVX2 "Disable AVX2 optimizations" OFF)
99+
option(NO_AVX512 "Disable AVX512 optimizations" OFF)
100+
101+
# Memory tuning options
102+
set(BUFFERSIZE "" CACHE STRING "Memory buffer size factor (32<<n bytes, default: architecture-dependent, typically 25)")
103+
set(MAX_STACK_ALLOC "2048" CACHE STRING "Maximum stack allocation in bytes (0 to disable, may reduce GER/GEMV performance)")
104+
set(BLAS3_MEM_ALLOC_THRESHOLD "32" CACHE STRING "Thread count threshold for heap allocation of job arrays (default: 32)")
105+
set(GEMM_MULTITHREAD_THRESHOLD "4" CACHE STRING "Threshold below which GEMM runs single-threaded (default: 4)")
106+
107+
# Threading implementation options
108+
option(USE_SIMPLE_THREADED_LEVEL3 "Use legacy threaded Level 3 implementation" OFF)
109+
option(USE_TLS "Use thread-local storage instead of central memory buffer (requires glibc 2.21+)" OFF)
110+
option(CONSISTENT_FPCSR "Synchronize floating-point CSR between threads (x86/x86_64/aarch64 only)" OFF)
111+
112+
# System configuration
113+
option(BIGNUMA "Support systems with more than 16 NUMA nodes or more than 256 CPUs (Linux only)" OFF)
114+
option(EMBEDDED "Build for embedded/bare-metal systems (requires custom malloc/free)" OFF)
115+
116+
# Precision type options (default: build all types if all OFF)
117+
option(BUILD_SINGLE "Build single precision (REAL) functions" OFF)
118+
option(BUILD_DOUBLE "Build double precision (DOUBLE PRECISION) functions" OFF)
119+
option(BUILD_COMPLEX "Build complex (COMPLEX) functions" OFF)
120+
option(BUILD_COMPLEX16 "Build double complex (COMPLEX*16) functions" OFF)
121+
option(BUILD_BFLOAT16 "Build experimental BFLOAT16 functions" OFF)
122+
option(BUILD_HFLOAT16 "Build experimental HFLOAT16 functions" OFF)
123+
124+
# CBLAS-only mode
125+
option(ONLY_CBLAS "Build only CBLAS interface (no Fortran BLAS, implies NO_LAPACK)" OFF)
126+
127+
# Profiling and debugging
128+
option(FUNCTION_PROFILE "Enable function-level performance profiling" OFF)
129+
option(SANITY_CHECK "Compare results against reference BLAS (slow, for testing only)" OFF)
130+
131+
# Memory allocation methods
132+
option(HUGETLB_ALLOCATION "Use huge pages for thread buffers via shared memory" OFF)
133+
set(HUGETLBFILE_ALLOCATION "" CACHE STRING "Path to hugetlbfs mount for huge page allocation (e.g. /hugepages)")
134+
option(DEVICEDRIVER_ALLOCATION "Use device driver for physically contiguous memory allocation" OFF)
135+
81136
if (CMAKE_SYSTEM_NAME MATCHES "Windows" AND BUILD_SHARED_LIBS AND NOT ("${SYMBOLPREFIX}${SYMBOLSUFFIX}" STREQUAL ""))
82137
set (DELETE_STATIC_LIBS "")
83138
if (NOT BUILD_STATIC_LIBS)
@@ -137,7 +192,7 @@ if (NOT DYNAMIC_ARCH)
137192
list(APPEND BLASDIRS kernel)
138193
endif ()
139194

140-
if (DEFINED SANITY_CHECK)
195+
if (SANITY_CHECK)
141196
list(APPEND BLASDIRS reference)
142197
endif ()
143198

@@ -149,16 +204,10 @@ if (NOT NO_LAPACK)
149204
list(APPEND SUBDIRS lapack)
150205
endif ()
151206

152-
if (NOT DEFINED BUILD_BFLOAT16)
153-
set (BUILD_BFLOAT16 false)
154-
endif ()
155-
if (NOT DEFINED BUILD_HFLOAT16)
156-
set (BUILD_HFLOAT16 false)
157-
endif ()
158207
# set which float types we want to build for
159-
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
160-
# if none are defined, build for all
161-
# set(BUILD_BFLOAT16 true)
208+
# If none of the BUILD_* precision options are ON, build all (except BFLOAT16)
209+
if (NOT BUILD_SINGLE AND NOT BUILD_DOUBLE AND NOT BUILD_COMPLEX AND NOT BUILD_COMPLEX16)
210+
# if none are enabled, build for all
162211
set(BUILD_SINGLE true)
163212
set(BUILD_DOUBLE true)
164213
set(BUILD_COMPLEX true)

cmake/system.cmake

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,44 @@ set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib")
3434
# System detection, via CMake.
3535
include("${PROJECT_SOURCE_DIR}/cmake/system_check.cmake")
3636

37-
if(CMAKE_CROSSCOMPILING AND NOT DEFINED TARGET)
37+
# Handle cache options that should be auto-detected if empty
38+
# We keep the cache entry visible in ccmake but treat empty values as "not set"
39+
set(_TARGET_SET FALSE)
40+
if (DEFINED TARGET AND NOT "${TARGET}" STREQUAL "")
41+
set(_TARGET_SET TRUE)
42+
endif()
43+
44+
if(CMAKE_CROSSCOMPILING AND NOT _TARGET_SET)
3845
# Detect target without running getarch
3946
if (ARM64)
4047
set(TARGET "ARMV8")
48+
set(_TARGET_SET TRUE)
4149
elseif(ARM)
4250
set(TARGET "ARMV7") # TODO: Ask compiler which arch this is
51+
set(_TARGET_SET TRUE)
4352
else()
4453
message(FATAL_ERROR "When cross compiling, a TARGET is required.")
4554
endif()
4655
endif()
4756

57+
set(_BINARY_SET FALSE)
58+
if (DEFINED BINARY AND NOT "${BINARY}" STREQUAL "")
59+
set(_BINARY_SET TRUE)
60+
endif()
61+
62+
set(_USE_THREAD_SET FALSE)
63+
if (DEFINED USE_THREAD AND NOT "${USE_THREAD}" STREQUAL "")
64+
set(_USE_THREAD_SET TRUE)
65+
endif()
66+
67+
set(_NUM_THREADS_SET FALSE)
68+
if (DEFINED NUM_THREADS AND NOT "${NUM_THREADS}" STREQUAL "")
69+
set(_NUM_THREADS_SET TRUE)
70+
endif()
71+
4872
# Other files expect CORE, which is actually TARGET and will become TARGET_CORE for kernel build. Confused yet?
4973
# It seems we are meant to use TARGET as input and CORE internally as kernel.
50-
if(NOT DEFINED CORE AND DEFINED TARGET)
74+
if(NOT DEFINED CORE AND _TARGET_SET)
5175
if (${TARGET} STREQUAL "LOONGSON3R5")
5276
set(CORE "LA464")
5377
elseif (${TARGET} STREQUAL "LOONGSON2K1000")
@@ -65,7 +89,7 @@ if (DEFINED TARGET_CORE)
6589
endif ()
6690

6791
# Force fallbacks for 32bit
68-
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
92+
if (_BINARY_SET AND _TARGET_SET AND BINARY EQUAL 32)
6993
message(STATUS "Compiling a ${BINARY}-bit binary.")
7094
set(NO_AVX 1)
7195
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE" OR ${TARGET} STREQUAL "SAPPHIRERAPIDS")
@@ -83,7 +107,7 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
83107
endif ()
84108

85109

86-
if (DEFINED TARGET)
110+
if (_TARGET_SET)
87111
message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --")
88112
message(STATUS "Targeting the ${TARGET} architecture.")
89113
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
@@ -172,7 +196,7 @@ if (NOT DEFINED NUM_PARALLEL)
172196
set(NUM_PARALLEL 1)
173197
endif()
174198

175-
if (NOT DEFINED NUM_THREADS)
199+
if (NOT _NUM_THREADS_SET)
176200
if (DEFINED NUM_CORES AND NOT NUM_CORES EQUAL 0)
177201
# HT?
178202
set(NUM_THREADS ${NUM_CORES})
@@ -183,7 +207,7 @@ endif()
183207

184208
if (${NUM_THREADS} LESS 2)
185209
set(USE_THREAD 0)
186-
elseif(NOT DEFINED USE_THREAD)
210+
elseif(NOT _USE_THREAD_SET)
187211
set(USE_THREAD 1)
188212
endif ()
189213

@@ -202,8 +226,8 @@ if (C_LAPACK)
202226
endif ()
203227

204228
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
205-
if (DEFINED TARGET)
206-
if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512)
229+
if (DEFINED TARGET AND NOT "${TARGET}" STREQUAL "")
230+
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
207231
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
208232
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 10.09)
209233
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
@@ -683,7 +707,7 @@ if (HUGETLB_ALLOCATION)
683707
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB")
684708
endif ()
685709

686-
if (DEFINED HUGETLBFILE_ALLOCATION)
710+
if (DEFINED HUGETLBFILE_ALLOCATION AND NOT "${HUGETLBFILE_ALLOCATION}" STREQUAL "")
687711
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})")
688712
endif ()
689713

0 commit comments

Comments
 (0)