diff --git a/docs/source/overview.parallelisation.rst b/docs/source/overview.parallelisation.rst index f0388195..d9f879a8 100644 --- a/docs/source/overview.parallelisation.rst +++ b/docs/source/overview.parallelisation.rst @@ -68,7 +68,10 @@ created using `multiprocessing.pool.Pool The number of threads is controlled by an integer, passed in to the function as an optional ``num_python_subprocesses`` argument, or stored in the env var ``WFL_NUM_PYTHON_SUBPROCESSES``. The script should be -started with a normal run of the python executable. +started with a normal run of the python executable. Setting +the ``WFL_TORCH_N_GPUS`` env var to the number of GPUs +causes ``wfl`` to call ``torch.cuda.set_device()`` for each subprocess +ensuring that it gets a unique GPU from the other subprocesses. ======================================== diff --git a/tests/local_scripts/complete_pytest.tin b/tests/local_scripts/complete_pytest.tin index 2d1f7060..c1f98f65 100755 --- a/tests/local_scripts/complete_pytest.tin +++ b/tests/local_scripts/complete_pytest.tin @@ -2,17 +2,40 @@ module purge # module load compiler/gnu python/system python_extras/quippy lapack/mkl -module load compiler/gnu python python_extras/quippy lapack/mkl +module load compiler/gnu python python_extras/structure python_extras/quippy lapack/mkl # for wfl dependencies module load python_extras/wif module load python_extras/torch/cpu +if [ ! -z "$WFL_PYTEST_POST_MODULE_COMMANDS" ]; then + echo "Using WFL_PYTEST_POST_MODULE_COMMANDS '$WFL_PYTEST_POST_MODULE_COMMANDS'" 1>&2 + eval $WFL_PYTEST_POST_MODULE_COMMANDS +else + echo "Using no WFL_PYTEST_POST_MODULE_COMMANDS" 1>&2 +fi + if [ -z "$WFL_PYTEST_EXPYRE_INFO" ]; then echo "To override partition used, set WFL_PYTEST_EXPYRE_INFO='{\"resources\" : {\"partitions\": \"DESIRED_PARTITION\"}}'" 1>&2 fi +WFL_PYTEST_EXPYRE_INFO=$( +cat << EOF | python3 +import json, os +i = {"pre_cmds": ["module purge", + "module load compiler/gnu lapack/mkl python python_extras/structure python_extras/quippy python_extras/wif dft/vasp dft/pwscf", + "module list"]} +ienv = json.loads(os.environ.get("WFL_PYTEST_EXPYRE_INFO", "{}")) +i.update(ienv) +print(json.dumps(i)) +EOF +) +export WFL_PYTEST_EXPYRE_INFO +echo "Using WFL_PYTEST_EXPYRE_INFO '$WFL_PYTEST_EXPYRE_INFO'" 1>&2 if [ ! -z $WFL_PYTHONPATH_EXTRA ]; then + echo "Adding WFL_PYTHONPATH_EXTRA '$WFL_PYTHONPATH_EXTRA'" 1>&2 export PYTHONPATH=${WFL_PYTHONPATH_EXTRA}:${PYTHONPATH} +else + echo "Adding no WFL_PYTHONPATH_EXTRA" 1>&2 fi export JULIA_PROJECT=${PWD}/tests/assets/julia @@ -29,7 +52,7 @@ echo "" >> complete_pytest.tin.out # buildcell export WFL_PYTEST_BUILDCELL=$HOME/src/work/AIRSS/airss-0.9.1/src/buildcell/src/buildcell # VASP -module load dft/vasp +module load dft/vasp/serial export ASE_VASP_COMMAND=vasp.serial export ASE_VASP_COMMAND_GAMMA=vasp.gamma.serial export PYTEST_VASP_POTCAR_DIR=$VASP_PATH/pot/rev_54/PBE @@ -37,6 +60,8 @@ export PYTEST_VASP_POTCAR_DIR=$VASP_PATH/pot/rev_54/PBE module load dft/pwscf # no ORCA +module list + export OPENBLAS_NUM_THREADS=1 export MKL_NUM_THREADS=1 # required for descriptor calc to not hang @@ -70,7 +95,7 @@ l=`egrep '^=.*(passed|failed|skipped|xfailed|error).* in ' complete_pytest.tin.o echo "summary line $l" lp=$( echo $l | sed -E -e 's/ in .*//' -e 's/\s*,\s*/\n/g' ) -declare -A expected_n=( ["passed"]="177" ["skipped"]="21" ["warnings"]=823 ["xfailed"]=2 ["xpassed"]=1 ) +declare -A expected_n=( ["passed"]="188" ["skipped"]="26" ["warnings"]=1068 ["xfailed"]=1 ) IFS=$'\n' t_stat=0 for out in $lp; do diff --git a/tests/test_autoparallelize.py b/tests/test_autoparallelize.py index 76747794..f2586c8f 100644 --- a/tests/test_autoparallelize.py +++ b/tests/test_autoparallelize.py @@ -1,4 +1,5 @@ import pytest +import os import time import numpy as np @@ -12,6 +13,12 @@ from wfl.calculators import generic from wfl.autoparallelize import AutoparaInfo +try: + import torch + from mace.calculators.foundations_models import mace_mp +except ImportError: + torch = None + def test_empty_iterator(tmp_path): co = buildcell.buildcell(range(0), OutputSpec(tmp_path / 'dummy.xyz'), buildcell_cmd='dummy', buildcell_input='dummy') @@ -35,21 +42,71 @@ def test_autopara_info_dict(): def test_pool_speedup(): np.random.seed(5) + rng = np.random.default_rng(5) ats = [] nconf = 60 + at_prim = Atoms('Al', cell=[1, 1, 1], pbc=[True] * 3) for _ in range(nconf): - ats.append(Atoms(['Al'] * nconf, scaled_positions=np.random.uniform(size=(nconf, 3)), cell=[10, 10, 10], pbc=[True] * 3)) + ats.append(at_prim * (4, 4, 4)) + ats[-1].rattle(rng=rng) t0 = time.time() - co = generic.calculate(ConfigSet(ats), OutputSpec(), EMT(), output_prefix="_auto_", autopara_info=AutoparaInfo(num_python_subprocesses=1)) + co = generic.calculate(ConfigSet(ats), OutputSpec(), EMT(), output_prefix="_auto_", + autopara_info=AutoparaInfo(num_python_subprocesses=1, + num_inputs_per_python_subprocess=30)) dt_1 = time.time() - t0 t0 = time.time() - co = generic.calculate(ConfigSet(ats), OutputSpec(), EMT(), output_prefix="_auto_", autopara_info=AutoparaInfo(num_python_subprocesses=2)) + co = generic.calculate(ConfigSet(ats), OutputSpec(), EMT(), output_prefix="_auto_", + autopara_info=AutoparaInfo(num_python_subprocesses=2, + num_inputs_per_python_subprocess=30)) dt_2 = time.time() - t0 print("time ratio", dt_2 / dt_1) - assert dt_2 < dt_1 * (2/3) + assert dt_2 / dt_1 < 0.75 + + +@pytest.mark.skipif(torch is None or not torch.cuda.is_available() or os.environ.get("WFL_TORCH_N_GPUS") is None, reason="No torch CUDA devices available, or WFL_TORCH_N_GPUS isn't set") +@pytest.mark.perf +def test_pool_speedup_GPU(monkeypatch): + np.random.seed(5) + + rng = np.random.default_rng(5) + ats = [] + nconf = 60 + at_prim = Atoms('Al', cell=[1, 1, 1], pbc=[True] * 3) + for _ in range(nconf): + ats.append(at_prim * (5, 5, 5)) + ats[-1].rattle(rng=rng) + + calc = (mace_mp, ["small-omat-0"], {"device": "cuda"}) + + req_n_gpus = os.environ["WFL_TORCH_N_GPUS"] + if len(req_n_gpus) == 0: + req_n_gpus = str(len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))) + + if "WFL_TORCH_N_GPUS" in os.environ: + monkeypatch.delenv("WFL_TORCH_N_GPUS") + + t0 = time.time() + co = generic.calculate(ConfigSet(ats), OutputSpec(), calc, output_prefix="_auto_", + autopara_info=AutoparaInfo(num_python_subprocesses=1, + num_inputs_per_python_subprocess=30)) + dt_1 = time.time() - t0 + + monkeypatch.setenv("WFL_TORCH_N_GPUS", req_n_gpus) + + t0 = time.time() + co = generic.calculate(ConfigSet(ats), OutputSpec(), calc, output_prefix="_auto_", + autopara_info=AutoparaInfo(num_python_subprocesses=2, + num_inputs_per_python_subprocess=30)) + dt_2 = time.time() - t0 + + monkeypatch.delenv("WFL_TORCH_N_GPUS") + + print("time ratio", dt_2 / dt_1) + assert dt_2 / dt_1 < 0.75 + def test_outputspec_overwrite(tmp_path): with open(tmp_path / "ats.xyz", "w") as fout: diff --git a/tests/test_clean_dir.py b/tests/test_clean_dir.py new file mode 100644 index 00000000..71882bbd --- /dev/null +++ b/tests/test_clean_dir.py @@ -0,0 +1,64 @@ +from wfl.calculators.utils import clean_rundir + +# def clean_rundir(rundir, keep_files, default_keep_files, calculation_succeeded): + +all_files = ["a", "aa", "b", "c", "d"] +default_keep_files = ["a*", "b"] +actual_default_keep_files = ["a", "aa", "b"] + +def create_files(dir): + for filename in all_files: + with open(dir / filename, "w") as fout: + fout.write("content\n") + +def check_dir(dir, files): + if files is None: + # even path doesn't exist + assert not dir.is_dir() + return + + files = set(files) + + # all expected files are present + for file in files: + assert (dir / file).is_file() + # all present files are expected + for file in dir.iterdir(): + assert file.name in files + +def test_clean_rundir(tmp_path): + # keep True + # keep all files regardless of success + for succ, files in [(True, all_files), (False, all_files)]: + p = tmp_path / f"True_{succ}" + p.mkdir() + create_files(p) + clean_rundir(p, True, default_keep_files, calculation_succeeded=succ) + check_dir(p, files) + + # keep False + # succeeded means keep nothing, failed means keep default + for succ, files in [(True, None), (False, actual_default_keep_files)]: + p = tmp_path / f"False_{succ}" + p.mkdir() + create_files(p) + clean_rundir(p, False, default_keep_files, calculation_succeeded=succ) + check_dir(p, files) + + # keep subset of default + # succeeded means keep subset, failed means keep default + for succ, files in [(True, ["a"]), (False, actual_default_keep_files)]: + p = tmp_path / f"a_{succ}" + p.mkdir() + create_files(p) + clean_rundir(p, ["a"], default_keep_files, calculation_succeeded=succ) + check_dir(p, files) + + # keep different set from default + # succeeded means keep set, failed means keep union of default and set + for succ, files in [(True, ["a", "c"]), (False, actual_default_keep_files + ["a", "c"])]: + p = tmp_path / f"ac_{succ}" + p.mkdir() + create_files(p) + clean_rundir(p, ["a", "c"], default_keep_files, calculation_succeeded=succ) + check_dir(p, files) diff --git a/tests/test_md.py b/tests/test_md.py index ac2385b5..15860fe3 100644 --- a/tests/test_md.py +++ b/tests/test_md.py @@ -20,9 +20,9 @@ from wfl.generate.md.abort import AbortOnCollision, AbortOnLowEnergy try: - from wif.Langevin_BAOAB import Langevin_BAOAB + from ase.md.langevinbaoab import LangevinBAOAB except ImportError: - Langevin_BAOAB = None + LangevinBAOAB = None def select_every_10_steps_for_tests_during(at): return at.info.get("MD_step", 1) % 10 == 0 @@ -153,14 +153,14 @@ def test_NPT_Berendsen(cu_slab): assert np.allclose(atoms_traj[0].cell, atoms_traj[-1].cell * cell_f) -@pytest.mark.skipif(Langevin_BAOAB is None, reason="No Langevin_BAOAB available") -def test_NPT_Langevin_BAOAB(cu_slab): +@pytest.mark.skipif(LangevinBAOAB is None, reason="No LangevinBAOAB available") +def test_NPT_LangevinBAOAB(cu_slab): calc = EMT() inputs = ConfigSet(cu_slab) outputs = OutputSpec() - atoms_traj = md.md(inputs, outputs, calculator=calc, integrator="Langevin_BAOAB", steps=300, dt=1.0, + atoms_traj = md.md(inputs, outputs, calculator=calc, integrator="LangevinBAOAB", steps=300, dt=1.0, temperature=500.0, temperature_tau=100/fs, pressure=0.0, rng=np.random.default_rng(1)) @@ -176,14 +176,14 @@ def test_NPT_Langevin_BAOAB(cu_slab): assert np.allclose(atoms_traj[0].cell, atoms_traj[-1].cell * cell_f) -@pytest.mark.skipif(Langevin_BAOAB is None, reason="No Langevin_BAOAB available") -def test_NPT_Langevin_BAOAB_hydro_F(cu_slab): +@pytest.mark.skipif(LangevinBAOAB is None, reason="No LangevinBAOAB available") +def test_NPT_LangevinBAOAB_hydro_F(cu_slab): calc = EMT() inputs = ConfigSet(cu_slab) outputs = OutputSpec() - atoms_traj = md.md(inputs, outputs, calculator=calc, integrator="Langevin_BAOAB", steps=300, dt=1.0, + atoms_traj = md.md(inputs, outputs, calculator=calc, integrator="LangevinBAOAB", steps=300, dt=1.0, temperature=500.0, temperature_tau=100/fs, pressure=0.0, hydrostatic=False, rng=np.random.default_rng(1)) diff --git a/tests/test_phonopy.py b/tests/test_phonopy.py index 7c7b3db0..9643db90 100644 --- a/tests/test_phonopy.py +++ b/tests/test_phonopy.py @@ -7,6 +7,10 @@ from wfl.configset import ConfigSet, OutputSpec from wfl.generate.phonopy import phonopy +try: + import phono3py +except ImportError: + phono3py = None def test_phonopy(tmp_path): @@ -33,6 +37,7 @@ def test_phonopy(tmp_path): for v in at.positions[1:]: assert min(np.linalg.norm(sc.positions[1:] - v, axis=1)) < 1.0e-7 +@pytest.mark.skipif(phono3py is None, reason="No phono3py module") def test_phono3py(tmp_path): at0 = Atoms(numbers=[29], cell = [[0, 2, 2], [2, 0, 2], [2, 2, 0]], positions = [[0, 0, 0]], pbc = [True]*3) at1 = Atoms(numbers=[29], cell = [[0, 1.9, 1.9], [1.9, 0, 1.9], [1.9, 1.9, 0]], positions = [[0, 0, 0]], pbc = [True]*3) @@ -62,6 +67,7 @@ def test_phono3py(tmp_path): assert sum([at.info["config_type"] == "phonon_cubic_1" for at in pert]) == 13*2 +@pytest.mark.skipif(phono3py is None, reason="No phono3py module") def test_phono3py_same_supercell(tmp_path): at0 = Atoms(numbers=[29], cell = [[0, 2, 2], [2, 0, 2], [2, 2, 0]], positions = [[0, 0, 0]], pbc = [True]*3) at1 = Atoms(numbers=[29], cell = [[0, 1.9, 1.9], [1.9, 0, 1.9], [1.9, 1.9, 0]], positions = [[0, 0, 0]], pbc = [True]*3) diff --git a/tests/test_remote_run.py b/tests/test_remote_run.py index 6c68cadf..88fce875 100644 --- a/tests/test_remote_run.py +++ b/tests/test_remote_run.py @@ -69,7 +69,7 @@ def test_vasp_fail(tmp_path, expyre_systems, monkeypatch, remoteinfo_env): def do_vasp_fail(tmp_path, sys_name, monkeypatch, remoteinfo_env): ri = {'sys_name': sys_name, 'job_name': 'pytest_vasp_'+sys_name, - 'env_vars' : ['ASE_VASP_COMMAND=NONE', 'ASE_VASP_COMMAND_GAMMA=NONE'], + 'env_vars' : ['ASE_VASP_COMMAND=NO_VASP_FAIL', 'ASE_VASP_COMMAND_GAMMA=NO_VASP_FAIL'], 'input_files' : ['POTCARs'], 'resources': {'max_time': '5m', 'num_nodes': 1}, 'num_inputs_per_queued_job': 1, 'check_interval': 10} diff --git a/wfl/__version__.py b/wfl/__version__.py index 334b8995..a8d4557d 100644 --- a/wfl/__version__.py +++ b/wfl/__version__.py @@ -1 +1 @@ -__version__ = "0.3.4" +__version__ = "0.3.5" diff --git a/wfl/autoparallelize/pool.py b/wfl/autoparallelize/pool.py index 7e2085ec..d964ce67 100644 --- a/wfl/autoparallelize/pool.py +++ b/wfl/autoparallelize/pool.py @@ -16,7 +16,25 @@ # except ModuleNotFoundError: # pass +# https://docs.pytorch.org/docs/stable/notes/multiprocessing.html#poison-fork-in-multiprocessing +# But, only use forkserver if needed because it has a lot of overhead +try: + import torch +except: + torch = None +if os.environ.get("WFL_TORCH_N_GPUS") is not None: + if not torch: + raise RuntimeError(f"Got WFL_TORCH_N_GPUS '{os.environ['WFL_TORCH_N_GPUS']}' but torch module is not available") + try: + import multiprocessing + multiprocessing.set_start_method('forkserver') + except RuntimeError as exc: + # ignore complains about setting start method more than once + pass from multiprocessing.pool import Pool +# to help keep track of distinct GPU for each python subprocess +# https://stackoverflow.com/questions/53422761/distributing-jobs-evenly-across-multiple-gpus-with-multiprocessing-pool +from multiprocessing import Queue, Manager from wfl.configset import ConfigSet from wfl.autoparallelize.mpipool_support import wfl_mpipool @@ -24,7 +42,7 @@ from .utils import grouper, items_inputs_generator, set_autopara_per_item_info -def _wrapped_autopara_wrappable(op, iterable_arg, inherited_per_item_info, args, kwargs, item_inputs): +def _wrapped_autopara_wrappable(op, iterable_arg, inherited_per_item_info, args, kwargs, gpu_id_queue, item_inputs): """Wrap an operation to be run in parallel by autoparallelize Parameters: @@ -40,6 +58,8 @@ def _wrapped_autopara_wrappable(op, iterable_arg, inherited_per_item_info, args, list of positional args kwargs: dict dict of keyword args + gpu_id_queue: Queue + queue to ensure that each subprocess gets a unique GPU item_inputs: iterable(4-tuples) One or more 4-tuples. (item, item_i, label, item_rng). item is passed to function in iterable_arg, item_i is its number in the overall list, label is a quantity to be passed back with the @@ -49,27 +69,39 @@ def _wrapped_autopara_wrappable(op, iterable_arg, inherited_per_item_info, args, ------- list of 2-tuples containing, the output of each function call, together with the corresponding label (3rd) field of item_inputs """ - for item_input in item_inputs: - assert len(item_input) == 4 + if gpu_id_queue is not None: + gpu_id = gpu_id_queue.get() + else: + gpu_id = None - # item_inputs is iterable of 4-tuples - item_list = [item_input[0] for item_input in item_inputs] - item_i_list = [item_input[1] for item_input in item_inputs] - label_list = [item_input[2] for item_input in item_inputs] - rng_list = [item_input[3] for item_input in item_inputs] + try: + for item_input in item_inputs: + assert len(item_input) == 4 - set_autopara_per_item_info(kwargs, op, inherited_per_item_info, rng_list, item_i_list) + if gpu_id is not None: + torch.cuda.set_device(gpu_id) - if isinstance(iterable_arg, int): - u_args = args[0:iterable_arg] + (item_list,) + args[iterable_arg:] - else: - u_args = args - if iterable_arg is not None: - kwargs[iterable_arg] = item_list + # item_inputs is iterable of 4-tuples + item_list = [item_input[0] for item_input in item_inputs] + item_i_list = [item_input[1] for item_input in item_inputs] + label_list = [item_input[2] for item_input in item_inputs] + rng_list = [item_input[3] for item_input in item_inputs] + + set_autopara_per_item_info(kwargs, op, inherited_per_item_info, rng_list, item_i_list) - outputs = op(*u_args, **kwargs) - if outputs is None: - outputs = [None] * len(item_list) + if isinstance(iterable_arg, int): + u_args = args[0:iterable_arg] + (item_list,) + args[iterable_arg:] + else: + u_args = args + if iterable_arg is not None: + kwargs[iterable_arg] = item_list + + outputs = op(*u_args, **kwargs) + if outputs is None: + outputs = [None] * len(item_list) + finally: + if gpu_id is not None: + gpu_id_queue.put(gpu_id) return zip(outputs, label_list) @@ -132,7 +164,7 @@ def do_in_pool(num_python_subprocesses=None, num_inputs_per_python_subprocess=1, f'always uses all MPI processes {wfl_mpipool.size}') if initializer[0] is not None: # generate a task for each mpi process that will call initializer with positional initargs - _ = wfl_mpipool.map(functools.partial(_wrapped_autopara_wrappable, initializer[0], None, None, initializer[1], {}), + _ = wfl_mpipool.map(functools.partial(_wrapped_autopara_wrappable, initializer[0], None, None, initializer[1], {}, None), grouper(1, ((None, None, None, None) for i in range(wfl_mpipool.size)))) pool = wfl_mpipool else: @@ -142,30 +174,37 @@ def do_in_pool(num_python_subprocesses=None, num_inputs_per_python_subprocess=1, initializer_args = {} pool = Pool(num_python_subprocesses, **initializer_args) - if wfl_mpipool: - map_f = pool.map - else: - map_f = pool.imap - results = map_f(functools.partial(_wrapped_autopara_wrappable, op, iterable_arg, - inherited_per_item_info, args, kwargs), items_inputs) - - if not wfl_mpipool: - # only close pool if it's from multiprocessing.pool - pool.close() - - # always loop over results to trigger lazy imap() - for result_group in results: - if outputspec is not None: - for at, from_input_loc in result_group: - if skip_failed and at is None: - continue - did_no_work = False - outputspec.store(at, from_input_loc) - - if not wfl_mpipool: - # call join pool (prevent pytest-cov deadlock as per https://pytest-cov.readthedocs.io/en/latest/subprocess-support.html) - # but only if it's from multiprocessing.pool - pool.join() + with Manager() as manager: + if os.environ.get("WFL_TORCH_N_GPUS") is not None: + gpu_id_queue = manager.Queue() + for subprocess_id in range(num_python_subprocesses): + gpu_id_queue.put(subprocess_id % int(os.environ["WFL_TORCH_N_GPUS"])) + else: + gpu_id_queue = None + if wfl_mpipool: + map_f = pool.map + else: + map_f = pool.imap + results = map_f(functools.partial(_wrapped_autopara_wrappable, op, iterable_arg, + inherited_per_item_info, args, kwargs, gpu_id_queue), items_inputs) + + if not wfl_mpipool: + # only close pool if it's from multiprocessing.pool + pool.close() + + # always loop over results to trigger lazy imap() + for result_group in results: + if outputspec is not None: + for at, from_input_loc in result_group: + if skip_failed and at is None: + continue + did_no_work = False + outputspec.store(at, from_input_loc) + + if not wfl_mpipool: + # call join pool (prevent pytest-cov deadlock as per https://pytest-cov.readthedocs.io/en/latest/subprocess-support.html) + # but only if it's from multiprocessing.pool + pool.join() else: # do directly: still not trivial because of num_inputs_per_python_subprocess @@ -174,7 +213,7 @@ def do_in_pool(num_python_subprocesses=None, num_inputs_per_python_subprocess=1, # unpickle to better reproduce the behavior of Pool.map() ? for items_inputs_group in items_inputs: result_group = _wrapped_autopara_wrappable(op, iterable_arg, inherited_per_item_info, args, - kwargs, items_inputs_group) + kwargs, None, items_inputs_group) if outputspec is not None: for at, from_input_loc in result_group: diff --git a/wfl/calculators/castep.py b/wfl/calculators/castep.py index 4f61bf53..32a6a8e4 100644 --- a/wfl/calculators/castep.py +++ b/wfl/calculators/castep.py @@ -25,7 +25,7 @@ class Castep(WFLFileIOCalculator, ASE_Castep): what kind of files to keep from the run - True : everything kept - None, False : nothing kept, unless calculation fails - - "default" : only ones needed for NOMAD uploads ('\*.pwo') + - "default" : only ones needed for NOMAD uploads ('\*.{castep,param,cell}') - list(str) : list of file globs to save rundir_prefix: str / Path, default 'run\_CASTEP\_' Run directory name prefix diff --git a/wfl/calculators/orca/__init__.py b/wfl/calculators/orca/__init__.py index c8075348..edfd3511 100644 --- a/wfl/calculators/orca/__init__.py +++ b/wfl/calculators/orca/__init__.py @@ -34,7 +34,7 @@ class ORCA(WFLFileIOCalculator, ASE_ORCA): what kind of files to keep from the run - True : everything kept - None, False : nothing kept, unless calculation fails - - "default" : only ones needed for NOMAD uploads ('\*.pwo') + - "default" : only ones needed for NOMAD uploads ('\*.{inp,out,engrad,xyz,_traj.xyz}') - list(str) : list of file globs to save rundir_prefix: str / Path, default 'ORCA\_' Run directory name prefix diff --git a/wfl/calculators/utils.py b/wfl/calculators/utils.py index 1ca8c6b6..d4fdf2ed 100644 --- a/wfl/calculators/utils.py +++ b/wfl/calculators/utils.py @@ -1,3 +1,5 @@ +from pathlib import Path + from wfl.utils.file_utils import clean_dir def clean_rundir(rundir, keep_files, default_keep_files, calculation_succeeded): @@ -7,19 +9,29 @@ def clean_rundir(rundir, keep_files, default_keep_files, calculation_succeeded): ---------- rundir: str path to run dir - keep_files: 'default' / list(str) / '*' / bool / None + keep_files: 'default' / iterable(file_glob) / '*' / bool / None files to keep, None or False for nothing, '*' or True for all default_keep_files: list(str) files to keep if keep_files == 'default' or calculation_succeeded is False + and keep_files is not True calculation_succeeded: bool """ - if not calculation_succeeded: - clean_dir(rundir, set(default_keep_files) | set(keep_files if keep_files else []), force=False) - elif keep_files == 'default': - clean_dir(rundir, default_keep_files, force=False) - elif not keep_files: - clean_dir(rundir, False, force=False) - else: - clean_dir(rundir, keep_files, force=False) + if isinstance(keep_files, str): + if keep_files == 'default': + keep_files = default_keep_files + elif keep_files == '*': + keep_files = [keep_files] + else: + raise ValueError(f"str keep_files can only be 'default' or '*', not '{keep_files}'") + # now keep_files should either be True, evaluate to False, or an iterable + + # calculation failed, default optionally union with keep_files + if not calculation_succeeded: + if not keep_files: + keep_files = default_keep_files + elif keep_files is not True: + keep_files = set(default_keep_files) | set(keep_files) + # else True + clean_dir(rundir, keep_files, force=False) diff --git a/wfl/generate/neb.py b/wfl/generate/neb.py index 1f2be063..400d4c7b 100644 --- a/wfl/generate/neb.py +++ b/wfl/generate/neb.py @@ -89,9 +89,10 @@ def process_step(): # preliminary value final_status = 'unconverged' + converged = False try: - opt.run(fmax=fmax, steps=steps) + converged = opt.run(fmax=fmax, steps=steps) except Exception as exc: # label actual failed optimizations # when this happens, the atomic config somehow ends up with a 6-vector stress, which can't be @@ -109,7 +110,9 @@ def process_step(): for at in traj[0]: at.info['neb_config_type'] = 'neb_initial' - if opt.converged(): + # from value returned by Optimizer.run(), since Optimizer.converged requires + # a gradient as of ASE 3.26 + if converged: final_status = 'converged' for intermed_images in traj[1:-1]: diff --git a/wfl/generate/optimize.py b/wfl/generate/optimize.py index 6af51889..ef150eb6 100644 --- a/wfl/generate/optimize.py +++ b/wfl/generate/optimize.py @@ -159,9 +159,10 @@ def process_step(): # preliminary value final_status = 'unconverged' + converged = False try: - opt.run(fmax=fmax, smax=smax, steps=steps) + converged = opt.run(fmax=fmax, smax=smax, steps=steps) except Exception as exc: # label actual failed optimizations # when this happens, the atomic config somehow ends up with a 6-vector stress, which can't be @@ -182,7 +183,10 @@ def process_step(): # set for first config, to be overwritten if it's also last config traj[0].info['optimize_config_type'] = 'optimize_initial' - if opt.converged(): + # as of 3.26 converged() requires a gradient, but for PreconLBFGS it's not used + # See https://gitlab.com/ase/ase/-/issues/1744 + # use value returned by Optimizer.run() instead + if converged: final_status = 'converged' traj[-1].info['optimize_config_type'] = f'optimize_last_{final_status}' diff --git a/wfl/utils/file_utils.py b/wfl/utils/file_utils.py index 87175938..dbc0570f 100644 --- a/wfl/utils/file_utils.py +++ b/wfl/utils/file_utils.py @@ -9,11 +9,11 @@ def clean_dir(directory, keep_files, force=False): Parameters ---------- directory : directory to be cleaned - keep_files: bool or list(filenames) or str + keep_files: bool or iterable(file_globs) What to keep in rundir when done: - - list(filenames) : ONLY these filenames if they exist - - True or "*" : everything - does nothing - - False or None : remove everything, or anything evaluating to False in if + - True : keep everything + - iterable : ONLY files matching these globs if they exist + - evaluates to False : nothing force : bool, default = False fail if directory does not exist @@ -29,21 +29,19 @@ def clean_dir(directory, keep_files, force=False): else: return - # defaults - if keep_files is None: - keep_files = False - elif keep_files == "*": - keep_files = True - elif isinstance(keep_files, str): - keep_files = [keep_files] + if isinstance(keep_files, str): + # common error that will not be caught below + raise ValueError(f"keep_files must not be str '{keep_files}'") # operations - if isinstance(keep_files, bool) and keep_files: + if keep_files is True: + # bool True, keep all return elif not keep_files: - # lets None and anything else evaluating to False + # evaluates to False, keep none shutil.rmtree(directory) - elif isinstance(keep_files, (list, tuple, set)): + else: + # keep_files should be an iterable extended_keep_files = [] for glob_index in keep_files: extended_keep_files.extend(glob(os.path.join(directory, glob_index))) @@ -56,5 +54,3 @@ def clean_dir(directory, keep_files, force=False): os.remove(abs_fn) else: shutil.rmtree(abs_fn) - else: - raise RuntimeError('Got unknown type or value for keep_rundir \'{}\''.format(keep_files))