Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/integration/defs/perf/disagg/execution/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ def submit_test_job(test_config) -> tuple:

# Write temporary config file with replaced environment variables
logger.info(f"Creating temporary config: {temp_config_path}")

with open(temp_config_path, "w") as f:
yaml.dump(
test_config.config_data,
Expand Down
54 changes: 29 additions & 25 deletions tests/integration/defs/perf/disagg/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,49 +2,42 @@

import os

# GPU resource configuration
# Centralized configuration for all GPU-specific parameters
# GPU resource configuration - centralized config for all GPU-specific parameters
GPU_RESOURCE_CONFIG = {
# OCI GB200
"GB200": {
"slurm_extra_args": "--gres=gpu:4", # SLURM extra arguments (empty string if not required)
"GB200": { # OCI GB200
"slurm_extra_args": "--gres=gpu:4",
"set_segment": True,
"lock_freq_graphics_mhz": 2062, # GPU graphics clock lock frequency (MHz)
"lock_freq_memory_mhz": 3996, # GPU memory clock lock frequency (MHz)
"lock_freq_graphics_mhz": 2062,
"lock_freq_memory_mhz": 3996,
},
# Lyris GB200
"GB200_LYRIS": {
"slurm_extra_args": "", # GB300 does not require extra args
"GB200_LYRIS": { # Lyris GB200
"slurm_extra_args": "",
"set_segment": True,
"lock_freq_graphics_mhz": None, # TODO: Set GB300 lock frequency
"lock_freq_graphics_mhz": None,
"lock_freq_memory_mhz": None,
},
# Lyris GB300
"GB300": {
"slurm_extra_args": "", # GB300 does not require extra args
"GB300": { # Lyris GB300
"slurm_extra_args": "",
"set_segment": True,
"lock_freq_graphics_mhz": None, # TODO: Set GB300 lock frequency
"lock_freq_graphics_mhz": None,
"lock_freq_memory_mhz": None,
},
# H100
"H100": {
"slurm_extra_args": "", # H100 does not require extra args
"slurm_extra_args": "",
"set_segment": False,
"lock_freq_graphics_mhz": None, # TODO: Set H100 lock frequency
"lock_freq_graphics_mhz": None,
"lock_freq_memory_mhz": None,
},
# B200
"B200": {
"B200": { # OCI B200
"slurm_extra_args": "--gres=gpu:4",
"set_segment": False,
"lock_freq_graphics_mhz": None, # TODO: Set B200 lock frequency
"lock_freq_graphics_mhz": None,
"lock_freq_memory_mhz": None,
},
# B300
"B300": {
"B300": { # OCI B300
"slurm_extra_args": "--gres=gpu:4",
"set_segment": False,
"lock_freq_graphics_mhz": None, # TODO: Set B300 lock frequency
"lock_freq_graphics_mhz": None,
"lock_freq_memory_mhz": None,
},
}
Expand All @@ -67,7 +60,18 @@ def get_slurm_account() -> str:

@staticmethod
def get_slurm_job_name() -> str:
return os.getenv("SLURM_JOB_NAME", "unified-benchmark")
"""Get SLURM job name: {SLURM_ACCOUNT}-{base}.

Example: myaccount-unified.benchmark
Customize base via SLURM_JOB_BASE_NAME env var (default: unified.benchmark)
"""
account = EnvManager.get_slurm_account()
base = os.getenv("SLURM_JOB_BASE_NAME", "unified.benchmark")

# Only use account as prefix if it's set and not a placeholder
if account and not account.startswith("<"):
return f"{account}-{base}"
return base

@staticmethod
def get_slurm_set_segment() -> bool:
Expand Down