From e8e78cfb0b8baef298511ac98c4b55d5fa6a1bbf Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sun, 21 Dec 2025 16:14:01 +0100 Subject: [PATCH 1/5] use nodes option and factor out common slurm config --- config/aws_citc.py | 96 ++------------------------------ config/aws_mc.py | 12 ++-- config/azure_mc.py | 12 ++-- config/bsc_marenostrum5.py | 25 +++------ config/it4i_karolina.py | 23 ++------ config/izum_vega.py | 24 ++------ config/macc_deucalion.py | 14 ++--- config/settings_example.py | 22 ++------ config/surf_snellius.py | 52 ++--------------- config/vsc_hortense.py | 75 ++----------------------- eessi/testsuite/common_config.py | 29 +++++++++- 11 files changed, 86 insertions(+), 298 deletions(-) diff --git a/config/aws_citc.py b/config/aws_citc.py index 0fc085ee..a056155f 100644 --- a/config/aws_citc.py +++ b/config/aws_citc.py @@ -12,9 +12,10 @@ import os -from eessi.testsuite.common_config import common_logging_config, common_eessi_init +from eessi.testsuite.common_config import common_eessi_init, common_logging_config, update_common_slurm_partition_config from eessi.testsuite.constants import FEATURES + # This config will write all staging, output and logging to subdirs under this prefix # Override with RFM_PREFIX environment variable reframe_prefix = os.path.join(os.environ['HOME'], 'reframe_runs') @@ -33,166 +34,76 @@ 'name': 'x86_64-haswell-8c-15gb', 'access': ['--constraint=shape=c4.2xlarge', '--export=NONE'], 'descr': 'Haswell, 8 cores, 15 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'x86_64-haswell-16c-30gb', 'access': ['--constraint=shape=c4.4xlarge', '--export=NONE'], 'descr': 'Haswell, 16 cores, 30 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'x86_64-zen2-8c-16gb', 'access': ['--constraint=shape=c5a.2xlarge', '--export=NONE'], 'descr': 'Zen2, 8 cores, 16 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'x86_64-zen2-16c-32gb', 'access': ['--constraint=shape=c5a.4xlarge', '--export=NONE'], 'descr': 'Zen2, 16 cores, 32 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'x86_64-zen3-8c-16gb', 'access': ['--constraint=shape=c6a.2xlarge', '--export=NONE'], 'descr': 'Zen3, 8 cores, 16 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'X86_64-zen3-16c-32gb', 'access': ['--constraint=shape=c6a.4xlarge', '--export=NONE'], 'descr': 'Zen3, 16 cores, 32 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'x86_64-skylake-cascadelake-8c-16gb', 'access': ['--constraint=shape=c5.2xlarge', '--export=NONE'], 'descr': 'Skylake/Cascade lake, 8 cores, 16 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'x86_64-skylake-cascadelake-16c-32gb', 'access': ['--constraint=shape=c5.4xlarge', '--export=NONE'], 'descr': 'Skylake/Cascade lake, 16 cores, 32 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'x86_64-skylake-cascadelake-8c-16gb-nvme', 'access': ['--constraint=shape=c5d.2xlarge', '--export=NONE'], 'descr': 'Skylake/Cascade lake, 8 cores, 16 GiB, 200GB NVMe', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'x86_64-icelake-8c-16gb', 'access': ['--constraint=shape=c6i.2xlarge', '--export=NONE'], 'descr': 'Icelake, 8 cores, 16 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'aarch64-graviton2-8c-16gb', 'access': ['--constraint=shape=c6g.2xlarge', '--export=NONE'], 'descr': 'Graviton2, 8 cores, 16 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'aarch64-graviton2-16c-32gb', 'access': ['--constraint=shape=c6g.4xlarge', '--export=NONE'], 'descr': 'Graviton2, 16 cores, 32 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'aarch64-graviton2-32c-64gb', 'access': ['--constraint=shape=c6g.8xlarge', '--export=NONE'], 'descr': 'Graviton2, 32 cores, 64 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'aarch64-graviton3-8c-16gb', 'access': ['--constraint=shape=c7g.2xlarge', '--export=NONE'], 'descr': 'Graviton3, 8 cores, 16 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, { 'name': 'aarch64-graviton3-16c-32gb', 'access': ['--constraint=shape=c7g.4xlarge', '--export=NONE'], 'descr': 'Graviton3, 16 cores, 32 GiB', - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], }, ] }, @@ -233,3 +144,6 @@ for system in site_configuration['systems']: for partition in system['partitions']: partition.update(partition_defaults) + +# Set common Slurm config options +update_common_slurm_partition_config(site_configuration) diff --git a/config/aws_mc.py b/config/aws_mc.py index 62643a52..eefe952c 100644 --- a/config/aws_mc.py +++ b/config/aws_mc.py @@ -12,7 +12,8 @@ import os -from eessi.testsuite.common_config import common_logging_config, common_general_config, common_eessi_init +from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, + update_common_slurm_partition_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES # This config will write all staging, output and logging to subdirs under this prefix @@ -105,12 +106,6 @@ # steps inherit environment. It doesn't hurt to define this even if srun is not used 'export SLURM_EXPORT_ENV=ALL' ], - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'extras': { # Node types have somewhat varying amounts of memory, but we'll make it easy on ourselves # All should _at least_ have this amount (30GB * 1E9 / (1024*1024) = 28610 MiB) @@ -121,3 +116,6 @@ for system in site_configuration['systems']: for partition in system['partitions']: partition.update(partition_defaults) + +# Set common Slurm config options +update_common_slurm_partition_config(site_configuration) diff --git a/config/azure_mc.py b/config/azure_mc.py index 6ddfd416..862b32d5 100644 --- a/config/azure_mc.py +++ b/config/azure_mc.py @@ -12,7 +12,8 @@ import os -from eessi.testsuite.common_config import common_logging_config, common_general_config, common_eessi_init +from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, + update_common_slurm_partition_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES # This config will write all staging, output and logging to subdirs under this prefix @@ -100,14 +101,11 @@ 'features': [ FEATURES.CPU ] + list(SCALES.keys()), - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'max_jobs': 1, } for system in site_configuration['systems']: for partition in system['partitions']: partition.update(partition_defaults) + +# Set common Slurm config options +update_common_slurm_partition_config(site_configuration) diff --git a/config/bsc_marenostrum5.py b/config/bsc_marenostrum5.py index d512f657..2bea7f78 100644 --- a/config/bsc_marenostrum5.py +++ b/config/bsc_marenostrum5.py @@ -1,10 +1,8 @@ import os -from eessi.testsuite.common_config import (common_logging_config, - common_general_config, - common_eessi_init, - get_sbatch_account) -from eessi.testsuite.constants import * +from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, + get_sbatch_account, update_common_slurm_partition_config) +from eessi.testsuite.constants import DEVICE_TYPES, EXTRAS, FEATURES, GPU_VENDORS, SCALES # Note that we rely on the SBATCH_ACCOUNT environment variable to be specified # From ReFrame 4.8.1 we can no longer rely on SBATCH_ACCOUNT completely @@ -43,10 +41,6 @@ ], 'environs': ['default'], 'max_jobs': 4, - 'resources': [ - # memory cannot be set on MareNostrum - # The test-suite will give warning which can be ignored - ], # list(SCALES.keys()) adds all the scales from eessi.testsuite.constants as valid for thi partition # Can be modified if not all scales can run on this partition, see e.g. the surf_snellius.py config 'features': [ @@ -78,14 +72,6 @@ ], 'environs': ['default'], 'max_jobs': 4, - 'resources': [ - { - 'name': '_rfm_gpu', - 'options': ['--gpus-per-node={num_gpus_per_node}'], - }, - # memory cannot be set on MareNostrum - # The test-suite will give warning which can be ignored - ], 'devices': [ { 'type': DEVICE_TYPES.GPU, @@ -124,3 +110,8 @@ } ], } + +# Set common Slurm config options +# memory cannot be set on MareNostrum +# The test-suite will give warning which can be ignored +update_common_slurm_partition_config(site_configuration, set_memory=False) diff --git a/config/it4i_karolina.py b/config/it4i_karolina.py index 03f00837..dd69b027 100644 --- a/config/it4i_karolina.py +++ b/config/it4i_karolina.py @@ -15,11 +15,9 @@ import os -from eessi.testsuite.common_config import (common_eessi_init, - common_general_config, - common_logging_config, - get_sbatch_account) -from eessi.testsuite.constants import * # noqa: F403 +from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, + get_sbatch_account, update_common_slurm_partition_config) +from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES # This config will write all staging, output and logging to subdirs under this prefix # Override with RFM_PREFIX environment variable @@ -68,12 +66,6 @@ 'features': [ FEATURES.CPU, ] + list(SCALES.keys()), - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'extras': { # Make sure to round down, otherwise a job might ask for more mem than is available # per node @@ -107,12 +99,6 @@ # 'num_devices': 8, # } # ], - # 'resources': [ - # { - # 'name': '_rfm_gpu', - # 'options': ['--gpus-per-node={num_gpus_per_node}'], - # } - # ], # 'features': [ # FEATURES.GPU, # ] + list(SCALES.keys()), @@ -139,3 +125,6 @@ } ], } + +# Set common Slurm config options +update_common_slurm_partition_config(site_configuration) diff --git a/config/izum_vega.py b/config/izum_vega.py index d6911959..2b80922e 100644 --- a/config/izum_vega.py +++ b/config/izum_vega.py @@ -15,8 +15,9 @@ import os -from eessi.testsuite.common_config import common_logging_config, common_general_config, common_eessi_init -from eessi.testsuite.constants import * # noqa: F403 +from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, + update_common_slurm_partition_config) +from eessi.testsuite.constants import EXTRAS, DEVICE_TYPES, FEATURES, GPU_VENDORS, SCALES # This config will write all staging, output and logging to subdirs under this prefix # Override with RFM_PREFIX environment variable @@ -50,12 +51,6 @@ 'access': ['-p cpu', '--export=None'], 'environs': ['default'], 'max_jobs': 120, - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.CPU, ] + list(SCALES.keys()), @@ -92,16 +87,6 @@ 'num_devices': 4, } ], - 'resources': [ - { - 'name': '_rfm_gpu', - 'options': ['--gpus-per-node={num_gpus_per_node}'], - }, - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.GPU, ] + list(SCALES.keys()), @@ -134,3 +119,6 @@ } ], } + +# Set common Slurm config options +update_common_slurm_partition_config(site_configuration) diff --git a/config/macc_deucalion.py b/config/macc_deucalion.py index 90d94c90..51c81d5d 100644 --- a/config/macc_deucalion.py +++ b/config/macc_deucalion.py @@ -1,7 +1,8 @@ import os -from eessi.testsuite.common_config import common_logging_config, common_general_config, common_eessi_init -from eessi.testsuite.constants import * # noqa: F403 +from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, + update_common_slurm_partition_config) +from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES # This config will write all staging, output and logging to subdirs under this prefix # Override with RFM_PREFIX environment variable @@ -34,12 +35,6 @@ 'access': ['-p normal-arm', '--export=None'], 'environs': ['default'], 'max_jobs': 120, - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.CPU, ] + list(SCALES.keys()), @@ -73,3 +68,6 @@ } ], } + +# Set common Slurm config options +update_common_slurm_partition_config(site_configuration) diff --git a/config/settings_example.py b/config/settings_example.py index b9547b00..bbd22a3f 100644 --- a/config/settings_example.py +++ b/config/settings_example.py @@ -19,7 +19,8 @@ """ import os -from eessi.testsuite.common_config import common_logging_config, common_general_config, common_eessi_init +from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, + update_common_slurm_partition_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES, DEVICE_TYPES, GPU_VENDORS @@ -54,12 +55,6 @@ # 'num_cpus_per_socket': 64, # 'num_cpus_per_core': 1, # }, - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'extras': { # If you have slurm, check with scontrol show node for the amount of RealMemory # on nodes in this partition @@ -92,16 +87,6 @@ # 'num_cpus_per_socket': 36, # 'num_cpus_per_core': 1, # }, - 'resources': [ - { - 'name': '_rfm_gpu', - 'options': ['--gpus-per-node={num_gpus_per_node}'], - }, - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'devices': [ { 'type': DEVICE_TYPES.GPU, @@ -142,3 +127,6 @@ } ], } + +# Set common Slurm config options +update_common_slurm_partition_config(site_configuration) diff --git a/config/surf_snellius.py b/config/surf_snellius.py index 2bdce9e8..0b839ea2 100644 --- a/config/surf_snellius.py +++ b/config/surf_snellius.py @@ -15,8 +15,9 @@ import os -from eessi.testsuite.common_config import common_logging_config, common_general_config, common_eessi_init -from eessi.testsuite.constants import * # noqa: F403 +from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, + update_common_slurm_partition_config) +from eessi.testsuite.constants import EXTRAS, DEVICE_TYPES, FEATURES, GPU_VENDORS, SCALES # This config will write all staging, output and logging to subdirs under this prefix # Override with RFM_PREFIX environment variable @@ -43,18 +44,9 @@ 'scheduler': 'slurm', 'prepare_cmds': [common_eessi_init()], 'launcher': 'mpirun', - 'sched_options': { - 'use_nodes_option': True, - }, 'access': ['-p rome', '--export=None'], 'environs': ['default'], 'max_jobs': 120, - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.CPU, ] + list(SCALES.keys()), @@ -75,18 +67,9 @@ common_eessi_init() ], 'launcher': 'mpirun', - 'sched_options': { - 'use_nodes_option': True, - }, 'access': ['-p genoa', '--export=None'], 'environs': ['default'], 'max_jobs': 120, - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.CPU, ] + list(SCALES.keys()), @@ -102,9 +85,6 @@ 'scheduler': 'slurm', 'prepare_cmds': [common_eessi_init()], 'launcher': 'mpirun', - 'sched_options': { - 'use_nodes_option': True, - }, 'access': ['-p gpu_a100', '--export=None'], 'environs': ['default'], 'max_jobs': 60, @@ -114,16 +94,6 @@ 'num_devices': 4, } ], - 'resources': [ - { - 'name': '_rfm_gpu', - 'options': ['--gpus-per-node={num_gpus_per_node}'], - }, - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.GPU, FEATURES.ALWAYS_REQUEST_GPUS, @@ -141,9 +111,6 @@ 'scheduler': 'slurm', 'prepare_cmds': [common_eessi_init()], 'launcher': 'mpirun', - 'sched_options': { - 'use_nodes_option': True, - }, 'access': ['-p gpu_h100', '--export=None'], 'environs': ['default'], 'max_jobs': 60, @@ -153,16 +120,6 @@ 'num_devices': 4, } ], - 'resources': [ - { - 'name': '_rfm_gpu', - 'options': ['--gpus-per-node={num_gpus_per_node}'], - }, - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.GPU, FEATURES.ALWAYS_REQUEST_GPUS, @@ -197,3 +154,6 @@ } ], } + +# Set common Slurm config options +update_common_slurm_partition_config(site_configuration) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 42fcfaf6..79cd5ac8 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -22,11 +22,9 @@ # ``` import os -from eessi.testsuite.common_config import (common_eessi_init, - common_general_config, - common_logging_config, - get_sbatch_account) -from eessi.testsuite.constants import * # noqa: F403 +from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, + get_sbatch_account, update_common_slurm_partition_config) +from eessi.testsuite.constants import EXTRAS, DEVICE_TYPES, FEATURES, GPU_VENDORS, SCALES hortense_access = ['--export=NONE', '--get-user-env=60L'] @@ -84,12 +82,6 @@ 'max_jobs': 20, 'launcher': launcher, 'modules': [mpi_module.format('cpu_rome')], - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.CPU, ] + list(SCALES.keys()), @@ -117,12 +109,6 @@ 'max_jobs': 20, 'launcher': launcher, 'modules': [mpi_module.format('cpu_rome_512')], - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.CPU, ] + list(SCALES.keys()), @@ -150,12 +136,6 @@ 'max_jobs': 20, 'launcher': launcher, 'modules': [mpi_module.format('cpu_milan_rhel9')], - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.CPU, ] + list(SCALES.keys()), @@ -183,12 +163,6 @@ 'max_jobs': 20, 'launcher': launcher, 'modules': [mpi_module.format('cpu_milan_rhel9')], - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'features': [ FEATURES.CPU, ] + list(SCALES.keys()), @@ -225,16 +199,6 @@ # per node EXTRAS.MEM_PER_NODE: 243840, # in MiB }, - 'resources': [ - { - 'name': '_rfm_gpu', - 'options': ['--gpus-per-node={num_gpus_per_node}'], - }, - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'devices': [ { 'type': DEVICE_TYPES.GPU, @@ -270,16 +234,6 @@ # per node EXTRAS.MEM_PER_NODE: 499680, # in MiB }, - 'resources': [ - { - 'name': '_rfm_gpu', - 'options': ['--gpus-per-node={num_gpus_per_node}'], - }, - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'devices': [ { 'type': DEVICE_TYPES.GPU, @@ -298,26 +252,6 @@ 'cxx': 'g++', 'ftn': 'gfortran', }, - { - 'name': 'foss-2021a', - 'cc': 'mpicc', - 'cxx': 'mpicxx', - 'ftn': 'mpif90', - 'modules': ['foss/2021a'] - }, - { - 'name': 'intel-2021a', - 'modules': ['intel'], - 'cc': 'mpiicc', - 'cxx': 'mpiicpc', - 'ftn': 'mpiifort', - }, - { - 'name': 'CUDA', - 'modules': ['CUDA'], - 'cc': 'nvcc', - 'cxx': 'nvcc', - }, ], 'general': [ { @@ -329,3 +263,6 @@ ], 'logging': common_logging_config(), } + +# Set common Slurm config options +update_common_slurm_partition_config(site_configuration) diff --git a/eessi/testsuite/common_config.py b/eessi/testsuite/common_config.py index 2989b85d..abe2f6ad 100644 --- a/eessi/testsuite/common_config.py +++ b/eessi/testsuite/common_config.py @@ -2,6 +2,8 @@ import reframe.core.logging as rflog +from eessi.testsuite.constants import FEATURES + perflog_format = '|'.join([ '%(check_job_completion_time)s', '%(osuser)s', @@ -32,6 +34,31 @@ ]) +def update_common_slurm_partition_config(site_configuration, set_memory=True): + """ + Update ReFrame configuration file: set common config options for partitions using Slurm. + This function must be called at the end of the site configuration file (after defining site_configuration) + :param site_configuration: site configuration dictionary + :param set_memory: set memory resources + """ + for system in site_configuration['systems']: + for partition in system['partitions']: + if partition['scheduler'] in ['slurm', 'squeue']: + # use --nodes option to ensure the exact number of nodes is requested + partition['sched_options'] = {'use_nodes_option': True} + partition['resources'] = partition.get('resources', []) + if set_memory: + partition['resources'] += [{ + 'name': 'memory', + 'options': ['--mem={size}'], + }] + if FEATURES.GPU in partition['features']: + partition['resources'] += [{ + 'name': '_rfm_gpu', + 'options': ['--gpus-per-node={num_gpus_per_node}'], + }] + + def common_logging_config(prefix=None): """ return default logging configuration as a list: stdout, file log, perflog @@ -74,7 +101,7 @@ def common_logging_config(prefix=None): def common_general_config(prefix=None): """ - return common configuration for the 'general' section of the ReFrame configuration file + Return common configuration for the 'general' section of the ReFrame configuration file :param prefix: prefix for the report_file """ prefix = os.getenv('RFM_PREFIX', prefix if prefix else '.') From 3e2ab97118948fa88fee2cbea645198f2a969c33 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sun, 21 Dec 2025 16:20:28 +0100 Subject: [PATCH 2/5] completely overwrite resources for now to avoid issues --- eessi/testsuite/common_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi/testsuite/common_config.py b/eessi/testsuite/common_config.py index abe2f6ad..f1065e1c 100644 --- a/eessi/testsuite/common_config.py +++ b/eessi/testsuite/common_config.py @@ -46,7 +46,7 @@ def update_common_slurm_partition_config(site_configuration, set_memory=True): if partition['scheduler'] in ['slurm', 'squeue']: # use --nodes option to ensure the exact number of nodes is requested partition['sched_options'] = {'use_nodes_option': True} - partition['resources'] = partition.get('resources', []) + partition['resources'] = [] if set_memory: partition['resources'] += [{ 'name': 'memory', From 9d0a926ce33b7cadeb9e5e6e85fbdf37e44e18dc Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Mon, 22 Dec 2025 15:25:18 +0100 Subject: [PATCH 3/5] factor out more common stuff --- config/aws_citc.py | 13 ++----------- config/aws_mc.py | 13 ++----------- config/azure_mc.py | 13 ++----------- config/bsc_marenostrum5.py | 14 ++------------ config/it4i_karolina.py | 14 ++------------ config/izum_vega.py | 14 ++------------ config/macc_deucalion.py | 13 ++----------- config/settings_example.py | 14 ++------------ config/surf_snellius.py | 16 ++-------------- config/vsc_hortense.py | 18 ++---------------- eessi/testsuite/common_config.py | 7 +++++-- 11 files changed, 25 insertions(+), 124 deletions(-) diff --git a/config/aws_citc.py b/config/aws_citc.py index a056155f..02d2a47b 100644 --- a/config/aws_citc.py +++ b/config/aws_citc.py @@ -12,7 +12,7 @@ import os -from eessi.testsuite.common_config import common_eessi_init, common_logging_config, update_common_slurm_partition_config +from eessi.testsuite.common_config import common_eessi_init, common_logging_config, set_common_required_config from eessi.testsuite.constants import FEATURES @@ -108,14 +108,6 @@ ] }, ], - 'environments': [ - { - 'name': 'default', - 'cc': 'cc', - 'cxx': '', - 'ftn': '', - }, - ], 'logging': common_logging_config(reframe_prefix), 'general': [ { @@ -130,7 +122,6 @@ partition_defaults = { 'scheduler': 'squeue', 'launcher': 'mpirun', - 'environs': ['default'], 'features': [ FEATURES.CPU ], @@ -146,4 +137,4 @@ partition.update(partition_defaults) # Set common Slurm config options -update_common_slurm_partition_config(site_configuration) +set_common_required_config(site_configuration) diff --git a/config/aws_mc.py b/config/aws_mc.py index eefe952c..de7c30ae 100644 --- a/config/aws_mc.py +++ b/config/aws_mc.py @@ -13,7 +13,7 @@ import os from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, - update_common_slurm_partition_config) + set_common_required_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES # This config will write all staging, output and logging to subdirs under this prefix @@ -73,14 +73,6 @@ ] }, ], - 'environments': [ - { - 'name': 'default', - 'cc': 'cc', - 'cxx': '', - 'ftn': '', - }, - ], 'logging': common_logging_config(reframe_prefix), 'general': [ { @@ -96,7 +88,6 @@ partition_defaults = { 'scheduler': 'slurm', 'launcher': 'mpirun', - 'environs': ['default'], 'features': [ FEATURES.CPU ] + list(SCALES.keys()), @@ -118,4 +109,4 @@ partition.update(partition_defaults) # Set common Slurm config options -update_common_slurm_partition_config(site_configuration) +set_common_required_config(site_configuration) diff --git a/config/azure_mc.py b/config/azure_mc.py index 862b32d5..9a712110 100644 --- a/config/azure_mc.py +++ b/config/azure_mc.py @@ -13,7 +13,7 @@ import os from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, - update_common_slurm_partition_config) + set_common_required_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES # This config will write all staging, output and logging to subdirs under this prefix @@ -74,14 +74,6 @@ ] }, ], - 'environments': [ - { - 'name': 'default', - 'cc': 'cc', - 'cxx': '', - 'ftn': '', - }, - ], 'logging': common_logging_config(reframe_prefix), 'general': [ { @@ -97,7 +89,6 @@ partition_defaults = { 'scheduler': 'slurm', 'launcher': 'mpirun', - 'environs': ['default'], 'features': [ FEATURES.CPU ] + list(SCALES.keys()), @@ -108,4 +99,4 @@ partition.update(partition_defaults) # Set common Slurm config options -update_common_slurm_partition_config(site_configuration) +set_common_required_config(site_configuration) diff --git a/config/bsc_marenostrum5.py b/config/bsc_marenostrum5.py index 2bea7f78..f8bb68e5 100644 --- a/config/bsc_marenostrum5.py +++ b/config/bsc_marenostrum5.py @@ -1,7 +1,7 @@ import os from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, - get_sbatch_account, update_common_slurm_partition_config) + get_sbatch_account, set_common_required_config) from eessi.testsuite.constants import DEVICE_TYPES, EXTRAS, FEATURES, GPU_VENDORS, SCALES # Note that we rely on the SBATCH_ACCOUNT environment variable to be specified @@ -39,7 +39,6 @@ 'export OMPI_MCA_mtl="^ofi"', 'export OMPI_MCA_btl="^ofi"', ], - 'environs': ['default'], 'max_jobs': 4, # list(SCALES.keys()) adds all the scales from eessi.testsuite.constants as valid for thi partition # Can be modified if not all scales can run on this partition, see e.g. the surf_snellius.py config @@ -70,7 +69,6 @@ 'export OMPI_MCA_mtl="^ofi"', 'export OMPI_MCA_btl="^ofi"', ], - 'environs': ['default'], 'max_jobs': 4, 'devices': [ { @@ -92,14 +90,6 @@ ] }, ], - 'environments': [ - { - 'name': 'default', - 'cc': 'cc', - 'cxx': '', - 'ftn': '', - }, - ], 'logging': common_logging_config(), 'general': [ { @@ -114,4 +104,4 @@ # Set common Slurm config options # memory cannot be set on MareNostrum # The test-suite will give warning which can be ignored -update_common_slurm_partition_config(site_configuration, set_memory=False) +set_common_required_config(site_configuration, set_memory=False) diff --git a/config/it4i_karolina.py b/config/it4i_karolina.py index dd69b027..f0e32204 100644 --- a/config/it4i_karolina.py +++ b/config/it4i_karolina.py @@ -16,7 +16,7 @@ import os from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, - get_sbatch_account, update_common_slurm_partition_config) + get_sbatch_account, set_common_required_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES # This config will write all staging, output and logging to subdirs under this prefix @@ -61,7 +61,6 @@ # Use --export=None to avoid that login environment is passed down to submitted jobs # Note that we rely on the SBATCH_ACCOUNT environment variable to be specified 'access': [f'-A {sbatch_account}', '-p qcpu', '--export=None'], - 'environs': ['default'], 'max_jobs': 120, 'features': [ FEATURES.CPU, @@ -91,7 +90,6 @@ # # Use --export=None to avoid that login environment is passed down to submitted jobs # # Note that we rely on the SBATCH_ACCOUNT environment variable to be specified # 'access': ['-p gpu', '--export=None'], - # 'environs': ['default'], # 'max_jobs': 60, # 'devices': [ # { @@ -107,14 +105,6 @@ ] }, ], - 'environments': [ - { - 'name': 'default', - 'cc': 'cc', - 'cxx': '', - 'ftn': '', - }, - ], 'logging': common_logging_config(reframe_prefix), 'general': [ { @@ -127,4 +117,4 @@ } # Set common Slurm config options -update_common_slurm_partition_config(site_configuration) +set_common_required_config(site_configuration) diff --git a/config/izum_vega.py b/config/izum_vega.py index 2b80922e..09057afd 100644 --- a/config/izum_vega.py +++ b/config/izum_vega.py @@ -16,7 +16,7 @@ import os from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, - update_common_slurm_partition_config) + set_common_required_config) from eessi.testsuite.constants import EXTRAS, DEVICE_TYPES, FEATURES, GPU_VENDORS, SCALES # This config will write all staging, output and logging to subdirs under this prefix @@ -49,7 +49,6 @@ 'launcher': 'mpirun', # Use --export=None to avoid that login environment is passed down to submitted jobs 'access': ['-p cpu', '--export=None'], - 'environs': ['default'], 'max_jobs': 120, 'features': [ FEATURES.CPU, @@ -79,7 +78,6 @@ 'launcher': 'mpirun', # Use --export=None to avoid that login environment is passed down to submitted jobs 'access': ['-p gpu', '--export=None'], - 'environs': ['default'], 'max_jobs': 60, 'devices': [ { @@ -101,14 +99,6 @@ ] }, ], - 'environments': [ - { - 'name': 'default', - 'cc': 'cc', - 'cxx': '', - 'ftn': '', - }, - ], 'logging': common_logging_config(reframe_prefix), 'general': [ { @@ -121,4 +111,4 @@ } # Set common Slurm config options -update_common_slurm_partition_config(site_configuration) +set_common_required_config(site_configuration) diff --git a/config/macc_deucalion.py b/config/macc_deucalion.py index 51c81d5d..a81b27f2 100644 --- a/config/macc_deucalion.py +++ b/config/macc_deucalion.py @@ -1,7 +1,7 @@ import os from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, - update_common_slurm_partition_config) + set_common_required_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES # This config will write all staging, output and logging to subdirs under this prefix @@ -33,7 +33,6 @@ 'launcher': 'mpirun', # Use --export=None to avoid that login environment is passed down to submitted jobs 'access': ['-p normal-arm', '--export=None'], - 'environs': ['default'], 'max_jobs': 120, 'features': [ FEATURES.CPU, @@ -50,14 +49,6 @@ ] }, ], - 'environments': [ - { - 'name': 'default', - 'cc': 'cc', - 'cxx': '', - 'ftn': '', - }, - ], 'logging': common_logging_config(reframe_prefix), 'general': [ { @@ -70,4 +61,4 @@ } # Set common Slurm config options -update_common_slurm_partition_config(site_configuration) +set_common_required_config(site_configuration) diff --git a/config/settings_example.py b/config/settings_example.py index bbd22a3f..4057bd67 100644 --- a/config/settings_example.py +++ b/config/settings_example.py @@ -20,7 +20,7 @@ import os from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, - update_common_slurm_partition_config) + set_common_required_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES, DEVICE_TYPES, GPU_VENDORS @@ -45,7 +45,6 @@ # Pass job environment variables like $PATH, etc., into job steps 'export SLURM_EXPORT_ENV=ALL', ], - 'environs': ['default'], 'max_jobs': 4, # We recommend to rely on ReFrame's CPU autodetection, # and only define the 'processor' field if autodetection fails @@ -77,7 +76,6 @@ # Pass job environment variables like $PATH, etc., into job steps 'export SLURM_EXPORT_ENV=ALL', ], - 'environs': ['default'], 'max_jobs': 4, # We recommend to rely on ReFrame's CPU autodetection, # and only define the 'processor' field if autodetection fails @@ -109,14 +107,6 @@ ] }, ], - 'environments': [ - { - 'name': 'default', - 'cc': 'cc', - 'cxx': '', - 'ftn': '', - }, - ], 'logging': common_logging_config(), 'general': [ { @@ -129,4 +119,4 @@ } # Set common Slurm config options -update_common_slurm_partition_config(site_configuration) +set_common_required_config(site_configuration) diff --git a/config/surf_snellius.py b/config/surf_snellius.py index 0b839ea2..3df247f7 100644 --- a/config/surf_snellius.py +++ b/config/surf_snellius.py @@ -16,7 +16,7 @@ import os from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, - update_common_slurm_partition_config) + set_common_required_config) from eessi.testsuite.constants import EXTRAS, DEVICE_TYPES, FEATURES, GPU_VENDORS, SCALES # This config will write all staging, output and logging to subdirs under this prefix @@ -45,7 +45,6 @@ 'prepare_cmds': [common_eessi_init()], 'launcher': 'mpirun', 'access': ['-p rome', '--export=None'], - 'environs': ['default'], 'max_jobs': 120, 'features': [ FEATURES.CPU, @@ -68,7 +67,6 @@ ], 'launcher': 'mpirun', 'access': ['-p genoa', '--export=None'], - 'environs': ['default'], 'max_jobs': 120, 'features': [ FEATURES.CPU, @@ -86,7 +84,6 @@ 'prepare_cmds': [common_eessi_init()], 'launcher': 'mpirun', 'access': ['-p gpu_a100', '--export=None'], - 'environs': ['default'], 'max_jobs': 60, 'devices': [ { @@ -112,7 +109,6 @@ 'prepare_cmds': [common_eessi_init()], 'launcher': 'mpirun', 'access': ['-p gpu_h100', '--export=None'], - 'environs': ['default'], 'max_jobs': 60, 'devices': [ { @@ -136,14 +132,6 @@ ] }, ], - 'environments': [ - { - 'name': 'default', - 'cc': 'cc', - 'cxx': '', - 'ftn': '', - }, - ], 'logging': common_logging_config(reframe_prefix), 'general': [ { @@ -156,4 +144,4 @@ } # Set common Slurm config options -update_common_slurm_partition_config(site_configuration) +set_common_required_config(site_configuration) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 79cd5ac8..877a4da5 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -23,7 +23,7 @@ import os from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, - get_sbatch_account, update_common_slurm_partition_config) + get_sbatch_account, set_common_required_config) from eessi.testsuite.constants import EXTRAS, DEVICE_TYPES, FEATURES, GPU_VENDORS, SCALES hortense_access = ['--export=NONE', '--get-user-env=60L'] @@ -77,7 +77,6 @@ 'sched_options': { 'sched_access_in_submit': True, }, - 'environs': ['default'], 'descr': 'CPU nodes (AMD Rome, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, @@ -104,7 +103,6 @@ 'sched_options': { 'sched_access_in_submit': True, }, - 'environs': ['default'], 'descr': 'CPU nodes (AMD Rome, 512GiB RAM)', 'max_jobs': 20, 'launcher': launcher, @@ -131,7 +129,6 @@ 'sched_options': { 'sched_access_in_submit': True, }, - 'environs': ['default'], 'descr': 'CPU nodes (AMD Milan, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, @@ -158,7 +155,6 @@ 'sched_options': { 'sched_access_in_submit': True, }, - 'environs': ['default'], 'descr': 'CPU nodes (AMD Milan, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, @@ -185,7 +181,6 @@ 'sched_options': { 'sched_access_in_submit': True, }, - 'environs': ['default'], 'descr': 'GPU nodes (A100 40GB)', 'max_jobs': 20, 'launcher': launcher, @@ -220,7 +215,6 @@ 'sched_options': { 'sched_access_in_submit': True, }, - 'environs': ['default'], 'descr': 'GPU nodes (A100 80GB)', 'max_jobs': 20, 'launcher': launcher, @@ -245,14 +239,6 @@ ] }, ], - 'environments': [ - { - 'name': 'default', - 'cc': 'gcc', - 'cxx': 'g++', - 'ftn': 'gfortran', - }, - ], 'general': [ { 'remote_detect': True, @@ -265,4 +251,4 @@ } # Set common Slurm config options -update_common_slurm_partition_config(site_configuration) +set_common_required_config(site_configuration) diff --git a/eessi/testsuite/common_config.py b/eessi/testsuite/common_config.py index f1065e1c..88c6eca6 100644 --- a/eessi/testsuite/common_config.py +++ b/eessi/testsuite/common_config.py @@ -34,15 +34,18 @@ ]) -def update_common_slurm_partition_config(site_configuration, set_memory=True): +def set_common_required_config(site_configuration, set_memory=True): """ - Update ReFrame configuration file: set common config options for partitions using Slurm. + Update ReFrame configuration file: set common required config options This function must be called at the end of the site configuration file (after defining site_configuration) :param site_configuration: site configuration dictionary :param set_memory: set memory resources """ + site_configuration.update({'environments': [{'name': 'default'}]}) + for system in site_configuration['systems']: for partition in system['partitions']: + partition.update({'environs': ['default']}) if partition['scheduler'] in ['slurm', 'squeue']: # use --nodes option to ensure the exact number of nodes is requested partition['sched_options'] = {'use_nodes_option': True} From af76af660d86a3a7495c1c5a72557b71917696e5 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Mon, 22 Dec 2025 15:37:46 +0100 Subject: [PATCH 4/5] also update github_actions.py --- config/github_actions.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/config/github_actions.py b/config/github_actions.py index 390a54c4..adb92e3e 100644 --- a/config/github_actions.py +++ b/config/github_actions.py @@ -1,7 +1,7 @@ # ReFrame configuration file that can be used in GitHub Actions with EESSI -from eessi.testsuite.common_config import common_logging_config -from eessi.testsuite.constants import * +from eessi.testsuite.common_config import common_logging_config, set_common_required_config +from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES site_configuration = { @@ -22,12 +22,6 @@ 'num_cpus': 2, 'num_cpus_per_core': 1, }, - 'resources': [ - { - 'name': 'memory', - 'options': ['--mem={size}'], - } - ], 'max_jobs': 1, 'extras': { # Make sure to round down, otherwise a job might ask for more mem than is available @@ -39,14 +33,6 @@ ] } ], - 'environments': [ - { - 'name': 'default', - 'cc': 'cc', - 'cxx': '', - 'ftn': '' - } - ], 'general': [ { 'purge_environment': True, @@ -55,3 +41,6 @@ ], 'logging': common_logging_config(), } + +# Set common Slurm config options +set_common_required_config(site_configuration) From aedce552272e7b7f183f174eefd75c0d127dfe3e Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Fri, 26 Dec 2025 17:32:18 +0100 Subject: [PATCH 5/5] allow other sched_options; notify when changing site config --- eessi/testsuite/common_config.py | 61 ++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/eessi/testsuite/common_config.py b/eessi/testsuite/common_config.py index 88c6eca6..16926e11 100644 --- a/eessi/testsuite/common_config.py +++ b/eessi/testsuite/common_config.py @@ -1,6 +1,7 @@ +import json import os -import reframe.core.logging as rflog +from reframe.core.logging import getlogger from eessi.testsuite.constants import FEATURES @@ -41,25 +42,55 @@ def set_common_required_config(site_configuration, set_memory=True): :param site_configuration: site configuration dictionary :param set_memory: set memory resources """ - site_configuration.update({'environments': [{'name': 'default'}]}) + environments = [{'name': 'default'}] + environs = ['default'] + use_nodes_option = True + resources_memory = [{ + 'name': 'memory', + 'options': ['--mem={size}'], + }] + resources_gpu = [{ + 'name': '_rfm_gpu', + 'options': ['--gpus-per-node={num_gpus_per_node}'], + }] + + if site_configuration.get('environments') and site_configuration['environments'] != environments: + getlogger().info(f"Changing environments in site config to {environments}") + site_configuration['environments'] = environments for system in site_configuration['systems']: for partition in system['partitions']: - partition.update({'environs': ['default']}) + if partition.get('environs') and partition['environs'] != environs: + getlogger().info( + f"Changing environs in site config to {environs} for {system['name']}:{partition['name']}") + partition['environs'] = environs if partition['scheduler'] in ['slurm', 'squeue']: # use --nodes option to ensure the exact number of nodes is requested - partition['sched_options'] = {'use_nodes_option': True} - partition['resources'] = [] - if set_memory: - partition['resources'] += [{ - 'name': 'memory', - 'options': ['--mem={size}'], - }] + if ( + partition.get('sched_options') + and partition['sched_options'].get('use_nodes_option', use_nodes_option) is not use_nodes_option + ): + getlogger().info(' '.join([ + "Changing sched_options['use_nodes_option'] in site config to", + f"{use_nodes_option} for {system['name']}:{partition['name']}", + ])) + if partition.get('sched_options'): + partition['sched_options']['use_nodes_option'] = use_nodes_option + else: + partition['sched_options'] = {'use_nodes_option': use_nodes_option} if FEATURES.GPU in partition['features']: - partition['resources'] += [{ - 'name': '_rfm_gpu', - 'options': ['--gpus-per-node={num_gpus_per_node}'], - }] + resources = resources_memory + resources_gpu + else: + resources = resources_memory + if partition.get('resources'): + orig = {json.dumps(x, sort_keys=True) for x in partition['resources']} + new = {json.dumps(x, sort_keys=True) for x in resources} + if orig != new: + getlogger().info(' '.join([ + f"Changing resources in site config to {resources}", + f"for {system['name']}:{partition['name']}", + ])) + partition['resources'] = resources def common_logging_config(prefix=None): @@ -128,7 +159,7 @@ def common_eessi_init(eessi_version=None): eessi_cvmfs_repo = os.getenv('EESSI_CVMFS_REPO', None) if eessi_cvmfs_repo is None: - rflog.getlogger().warning(' '.join([ + getlogger().warning(' '.join([ "Environment variable 'EESSI_CVMFS_REPO' is not defined.", "If you plan to use the EESSI software stack,", "make sure to initialize the EESSI environment before running the test suite.",