diff --git a/.gitignore b/.gitignore index 62485f1faa..d3df50e3b3 100644 --- a/.gitignore +++ b/.gitignore @@ -67,3 +67,4 @@ build/* *.log *.xml +AGENTS.md diff --git a/Makefile b/Makefile index 9e40dd07e7..84f4331103 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DEVTOOLS_DIR := devtools .PHONY: all help clean test test-unittests test-functional test-all \ install-all install-ci install-pyrdl install-rmgdb install-autotst install-gcn \ - install-gcn-cpu install-kinbot install-sella install-xtb install-torchani install-ob \ + install-gcn-cpu install-kinbot install-sella install-xtb install-crest install-torchani install-ob \ lite check-env compile @@ -35,6 +35,7 @@ help: @echo " install-kinbot Install KinBot" @echo " install-sella Install Sella" @echo " install-xtb Install xTB" + @echo " install-crest Install CREST" @echo " install-torchani Install TorchANI" @echo " install-ob Install OpenBabel" @echo "" @@ -96,6 +97,9 @@ install-sella: install-xtb: bash $(DEVTOOLS_DIR)/install_xtb.sh +install-crest: + bash $(DEVTOOLS_DIR)/install_crest.sh + install-torchani: bash $(DEVTOOLS_DIR)/install_torchani.sh diff --git a/arc/job/adapters/ts/autotst_ts.py b/arc/job/adapters/ts/autotst_ts.py index bd2aabea0f..2eeffb7743 100644 --- a/arc/job/adapters/ts/autotst_ts.py +++ b/arc/job/adapters/ts/autotst_ts.py @@ -20,16 +20,14 @@ from arc.species.species import ARCSpecies, TSGuess, colliding_atoms HAS_AUTOTST = True -try: - from autotst.reaction import Reaction as AutoTST_Reaction -except (ImportError, ModuleNotFoundError): - HAS_AUTOTST = False if TYPE_CHECKING: from arc.level import Level AUTOTST_PYTHON = settings['AUTOTST_PYTHON'] +if AUTOTST_PYTHON is None: + HAS_AUTOTST = False logger = get_logger() @@ -218,9 +216,14 @@ def execute_incore(self): """ Execute a job incore. """ - if not HAS_AUTOTST: - raise ModuleNotFoundError(f'Could not import AutoTST, make sure it is properly installed.\n' - f'See {self.url} for more information, or use the Makefile provided with ARC.') + # 1) Check that ARC knows *which* Python to use for AutoTST + if not AUTOTST_PYTHON: + raise ModuleNotFoundError( + "settings['AUTOTST_PYTHON'] is not set. " + "ARC cannot run AutoTST as a subprocess without this. " + "Set AUTOTST_PYTHON in your ARC settings to the Python executable of your tst_env." + ) + self._log_job_execution() self.initial_time = self.initial_time if self.initial_time else datetime.datetime.now() @@ -234,76 +237,122 @@ def execute_incore(self): charge=rxn.charge, multiplicity=rxn.multiplicity, ) + reaction_label_fwd = get_autotst_reaction_string(rxn) - reaction_label_rev = get_autotst_reaction_string(ARCReaction(r_species=rxn.p_species, - p_species=rxn.r_species, - reactants=rxn.products, - products=rxn.reactants)) + reaction_label_rev = get_autotst_reaction_string( + ARCReaction( + r_species=rxn.p_species, + p_species=rxn.r_species, + reactants=rxn.products, + products=rxn.reactants, + ) + ) i = 0 - for reaction_label, direction in zip([reaction_label_fwd, reaction_label_rev], ['F', 'R']): - # run AutoTST as a subprocess in the desired direction - script_path = os.path.join(ARC_PATH, 'arc', 'job', 'adapters', 'scripts', 'autotst_script.py') - commands = ['source ~/.bashrc', f'"{AUTOTST_PYTHON}" "{script_path}" "{reaction_label}" "{self.output_path}"'] + for reaction_label, direction in zip( + [reaction_label_fwd, reaction_label_rev], + ['F', 'R'], + ): + script_path = os.path.join( + ARC_PATH, 'arc', 'job', 'adapters', 'scripts', 'autotst_script.py' + ) + # 2) Build the bash command to run tst_env’s Python on the script + commands = [ + 'source ~/.bashrc', + f'"{AUTOTST_PYTHON}" "{script_path}" "{reaction_label}" "{self.output_path}"', + ] command = '; '.join(commands) tic = datetime.datetime.now() - output = subprocess.run(command, shell=True, executable='/bin/bash') + # 3) Capture stdout/stderr so we can diagnose missing AutoTST + output = subprocess.run( + command, + shell=True, + executable='/bin/bash', + capture_output=True, + text=True, + ) tok = datetime.datetime.now() - tic if output.returncode: - direction_str = 'forward' if direction == 'F' else 'reverse' - logger.warning(f'AutoTST subprocess did not give a successful return code for {rxn} ' - f'in the {direction_str} direction.\n' - f'Got return code: {output.returncode}\n' - f'stdout: {output.stdout}\n' - f'stderr: {output.stderr}') + stderr = output.stderr or "" + stdout = output.stdout or "" + + # Special case: autotst itself is missing in tst_env + if 'No module named' in stderr and 'autotst' in stderr: + logger.error( + f"AutoTST subprocess failed for {rxn} because the 'autotst' " + f"package is not importable in the tst_env used by AUTOTST_PYTHON:\n" + f"{stderr}" + ) + else: + direction_str = 'forward' if direction == 'F' else 'reverse' + logger.warning( + f'AutoTST subprocess did not give a successful return code for {rxn} ' + f'in the {direction_str} direction.\n' + f'Got return code: {output.returncode}\n' + f'stdout: {stdout}\n' + f'stderr: {stderr}' + ) + + # 4) Check for the YAML output and add TS guesses as before if os.path.isfile(self.output_path): results = read_yaml_file(path=self.output_path) if results: for result in results: - xyz = xyz_from_data(coords=result['coords'], numbers=result['numbers']) + xyz = xyz_from_data( + coords=result['coords'], + numbers=result['numbers'], + ) unique = True for other_tsg in rxn.ts_species.ts_guesses: - if other_tsg.success and almost_equal_coords(xyz, other_tsg.initial_xyz): + if other_tsg.success and almost_equal_coords( + xyz, other_tsg.initial_xyz + ): if 'autotst' not in other_tsg.method.lower(): other_tsg.method += ' and AutoTST' unique = False break if unique and not colliding_atoms(xyz): - ts_guess = TSGuess(method='AutoTST', - method_direction=direction, - method_index=i, - t0=tic, - execution_time=tok, - xyz=xyz, - success=True, - index=len(rxn.ts_species.ts_guesses), - ) + ts_guess = TSGuess( + method='AutoTST', + method_direction=direction, + method_index=i, + t0=tic, + execution_time=tok, + xyz=xyz, + success=True, + index=len(rxn.ts_species.ts_guesses), + ) rxn.ts_species.ts_guesses.append(ts_guess) - save_geo(xyz=xyz, - path=self.local_path, - filename=f'AutoTST {direction}', - format_='xyz', - comment=f'AutoTST {direction}', - ) + save_geo( + xyz=xyz, + path=self.local_path, + filename=f'AutoTST {direction}', + format_='xyz', + comment=f'AutoTST {direction}', + ) i += 1 else: - ts_guess = TSGuess(method=f'AutoTST', - method_direction=direction, - method_index=i, - t0=tic, - execution_time=tok, - success=False, - index=len(rxn.ts_species.ts_guesses), - ) + ts_guess = TSGuess( + method='AutoTST', + method_direction=direction, + method_index=i, + t0=tic, + execution_time=tok, + success=False, + index=len(rxn.ts_species.ts_guesses), + ) rxn.ts_species.ts_guesses.append(ts_guess) i += 1 if len(self.reactions) < 5: - successes = len([tsg for tsg in rxn.ts_species.ts_guesses if tsg.success and 'autotst' in tsg.method]) + successes = len( + [tsg for tsg in rxn.ts_species.ts_guesses + if tsg.success and 'autotst' in tsg.method.lower()] + ) if successes: logger.info(f'AutoTST successfully found {successes} TS guesses for {rxn.label}.') else: @@ -311,6 +360,7 @@ def execute_incore(self): self.final_time = datetime.datetime.now() + def execute_queue(self): """ (Execute a job to the server's queue.) diff --git a/arc/job/adapters/ts/crest.py b/arc/job/adapters/ts/crest.py new file mode 100644 index 0000000000..2d6ce2a46a --- /dev/null +++ b/arc/job/adapters/ts/crest.py @@ -0,0 +1,378 @@ +""" +Utilities for running CREST within ARC. + +Separated from heuristics so CREST can be conditionally imported and reused. +""" + +import os +import re +import time +from typing import List + +import numpy as np +import pandas as pd + +from arc.common import get_logger +from arc.imports import settings, submit_scripts +from arc.job.local import check_job_status, submit_job +from arc.species.converter import reorder_xyz_string, str_to_xyz, xyz_to_dmat, xyz_to_str + +logger = get_logger() + +try: + CREST_PATH = settings["CREST_PATH"] + CREST_ENV_PATH = settings["CREST_ENV_PATH"] + SERVERS = settings["servers"] +except KeyError: + CREST_PATH = None + CREST_ENV_PATH = None + SERVERS = {} + + +def crest_available() -> bool: + """ + Return whether CREST is configured for use. + """ + return bool(SERVERS.get("local")) and bool(CREST_PATH or CREST_ENV_PATH) + + +def crest_ts_conformer_search( + xyz_guess: dict, + a_atom: int, + h_atom: int, + b_atom: int, + path: str = "", + xyz_crest_int: int = 0, +) -> str: + """ + Prepare a CREST TS conformer search job: + - Write coords.ref and constraints.inp + - Write a PBS/HTCondor submit script using submit_scripts["local"]["crest"] + - Return the CREST job directory path + """ + path = os.path.join(path, f"crest_{xyz_crest_int}") + os.makedirs(path, exist_ok=True) + + # --- coords.ref --- + symbols = xyz_guess["symbols"] + converted_coords = reorder_xyz_string( + xyz_str=xyz_to_str(xyz_guess), + reverse_atoms=True, + convert_to="bohr", + ) + coords_ref_content = f"$coord\n{converted_coords}\n$end\n" + coords_ref_path = os.path.join(path, "coords.ref") + with open(coords_ref_path, "w") as f: + f.write(coords_ref_content) + + # --- constraints.inp --- + num_atoms = len(symbols) + # CREST uses 1-based indices + a_atom += 1 + h_atom += 1 + b_atom += 1 + + # All atoms not directly involved in A–H–B go into the metadynamics atom list + list_of_atoms_numbers_not_participating_in_reaction = [ + i for i in range(1, num_atoms + 1) if i not in [a_atom, h_atom, b_atom] + ] + + constraints_path = os.path.join(path, "constraints.inp") + with open(constraints_path, "w") as f: + f.write("$constrain\n") + f.write(f" atoms: {a_atom}, {h_atom}, {b_atom}\n") + f.write(" force constant: 0.5\n") + f.write(" reference=coords.ref\n") + f.write(f" distance: {a_atom}, {h_atom}, auto\n") + f.write(f" distance: {h_atom}, {b_atom}, auto\n") + f.write("$metadyn\n") + if list_of_atoms_numbers_not_participating_in_reaction: + f.write( + f' atoms: {", ".join(map(str, list_of_atoms_numbers_not_participating_in_reaction))}\n' + ) + f.write("$end\n") + + # --- build CREST command string --- + # Example: crest coords.ref --cinp constraints.inp --noreftopo -T 8 + local_server = SERVERS.get("local", {}) + cpus = int(local_server.get("cpus", 8)) + if CREST_ENV_PATH: + crest_exe = "crest" + else: + crest_exe = CREST_PATH if CREST_PATH is not None else "crest" + + commands = [ + crest_exe, + "coords.ref", + "--cinp constraints.inp", + "--noreftopo", + f'-T {local_server.get("cpus", 8)}', + ] + command = " ".join(commands) + + # --- activation line (optional) --- + activation_line = CREST_ENV_PATH or "" + + if SERVERS.get("local") is not None: + cluster_soft = SERVERS["local"]["cluster_soft"].lower() + + if cluster_soft in ["condor", "htcondor"]: + # HTCondor branch (kept for completeness – you can delete if you don't use it) + sub_job = submit_scripts["local"]["crest"] + format_params = { + "name": f"crest_{xyz_crest_int}", + "cpus": cpus, + "memory": int(SERVERS["local"].get("memory", 32.0) * 1024), + } + sub_job = sub_job.format(**format_params) + + with open( + os.path.join(path, settings["submit_filenames"]["HTCondor"]), "w" + ) as f: + f.write(sub_job) + + crest_job = submit_scripts["local"]["crest_job"] + crest_job = crest_job.format( + path=path, + activation_line=activation_line, + commands=command, + ) + + with open(os.path.join(path, "job.sh"), "w") as f: + f.write(crest_job) + os.chmod(os.path.join(path, "job.sh"), 0o700) + + # Pre-create out/err for any status checkers that expect them + for fname in ("out.txt", "err.txt"): + fpath = os.path.join(path, fname) + if not os.path.exists(fpath): + with open(fpath, "w") as f: + f.write("") + os.chmod(fpath, 0o600) + + elif cluster_soft == "pbs": + # PBS branch that matches your 'crest' template above + sub_job = submit_scripts["local"]["crest"] + format_params = { + "queue": SERVERS["local"].get("queue", "alon_q"), + "name": f"crest_{xyz_crest_int}", + "cpus": cpus, + # 'memory' is in GB for the template: mem={memory}gb + "memory": int( + SERVERS["local"].get("memory", 32) + if SERVERS["local"].get("memory", 32) < 60 + else 40 + ), + "activation_line": activation_line, + "commands": command, + } + sub_job = sub_job.format(**format_params) + + submit_filename = settings["submit_filenames"]["PBS"] # usually 'submit.sh' + submit_path = os.path.join(path, submit_filename) + with open(submit_path, "w") as f: + f.write(sub_job) + os.chmod(submit_path, 0o700) + + else: + raise ValueError(f"Unsupported cluster_soft for CREST: {cluster_soft!r}") + + return path + + +def submit_crest_jobs(crest_paths: List[str]) -> dict: + """ + Submit CREST jobs to the server. + + Args: + crest_paths (List[str]): List of paths to the CREST directories. + + Returns: + dict: A dictionary containing job IDs as keys and their statuses as values. + """ + crest_jobs = {} + for crest_path in crest_paths: + job_status, job_id = submit_job(path=crest_path) + logger.info(f"CREST job {job_id} submitted for {crest_path}") + crest_jobs[job_id] = {"path": crest_path, "status": job_status} + return crest_jobs + + +def monitor_crest_jobs(crest_jobs: dict, check_interval: int = 300) -> None: + """ + Monitor CREST jobs until they are complete. + + Args: + crest_jobs (dict): Dictionary containing job information (job ID, path, and status). + check_interval (int): Time interval (in seconds) to wait between status checks. + """ + while True: + all_done = True + for job_id, job_info in crest_jobs.items(): + if job_info["status"] not in ["done", "failed"]: + try: + job_info["status"] = check_job_status(job_id) # Update job status + except Exception as e: + logger.error(f"Error checking job status for job {job_id}: {e}") + job_info["status"] = "failed" + if job_info["status"] not in ["done", "failed"]: + all_done = False + if all_done: + break + time.sleep(min(check_interval, 100)) + + +def process_completed_jobs(crest_jobs: dict) -> list: + """ + Process the completed CREST jobs and update XYZ guesses. + + Args: + crest_jobs (dict): Dictionary containing job information. + """ + xyz_guesses = [] + for job_id, job_info in crest_jobs.items(): + crest_path = job_info["path"] + if job_info["status"] == "done": + crest_best_path = os.path.join(crest_path, "crest_best.xyz") + if os.path.exists(crest_best_path): + with open(crest_best_path, "r") as f: + content = f.read() + xyz_guess = str_to_xyz(content) + xyz_guesses.append(xyz_guess) + else: + logger.error(f"crest_best.xyz not found in {crest_path}") + elif job_info["status"] == "failed": + logger.error(f"CREST job failed for {crest_path}") + + return xyz_guesses + + +def extract_digits(s: str) -> int: + """ + Extract the first integer from a string + + Args: + s (str): The string to extract the integer from + + Returns: + int: The first integer in the string + + """ + return int(re.sub(r"[^\d]", "", s)) + + +def convert_xyz_to_df(xyz: dict) -> pd.DataFrame: + """ + Convert a dictionary of xyz coords to a pandas DataFrame with bond distances + + Args: + xyz (dict): The xyz coordinates of the molecule + + Return: + pd.DataFrame: The xyz coordinates as a pandas DataFrame + + """ + symbols = xyz["symbols"] + symbol_enum = [f"{symbol}{i}" for i, symbol in enumerate(symbols)] + ts_dmat = xyz_to_dmat(xyz) + + return pd.DataFrame(ts_dmat, columns=symbol_enum, index=symbol_enum) + + +def get_h_abs_atoms(dataframe: pd.DataFrame) -> dict: + """ + Get the donating/accepting hydrogen atom, and the two heavy atoms that are bonded to it + + Args: + dataframe (pd.DataFrame): The dataframe of the bond distances, columns and index are the atom symbols + + Returns: + dict: The hydrogen atom and the two heavy atoms. The keys are 'H', 'A', 'B' + """ + + closest_atoms = {} + for index, row in dataframe.iterrows(): + + row[index] = np.inf + closest = row.nsmallest(2).index.tolist() + closest_atoms[index] = closest + + hydrogen_keys = [key for key in dataframe.index if key.startswith("H")] + condition_occurrences = [] + + for hydrogen_key in hydrogen_keys: + atom_neighbours = closest_atoms[hydrogen_key] + is_heavy_present = any( + atom for atom in atom_neighbours if not atom.startswith("H") + ) + if_hydrogen_present = any( + atom + for atom in atom_neighbours + if atom.startswith("H") and atom != hydrogen_key + ) + + if is_heavy_present and if_hydrogen_present: + # Store the details of this occurrence + condition_occurrences.append( + {"H": hydrogen_key, "A": atom_neighbours[0], "B": atom_neighbours[1]} + ) + + # Check if the condition was met + if condition_occurrences: + if len(condition_occurrences) > 1: + # Store distances to decide which occurrence to use + occurrence_distances = [] + for occurrence in condition_occurrences: + # Calculate the sum of distances to the two heavy atoms + hydrogen_key = f"{occurrence['H']}" + heavy_atoms = [f"{occurrence['A']}", f"{occurrence['B']}"] + try: + distances = dataframe.loc[hydrogen_key, heavy_atoms].sum() + occurrence_distances.append((occurrence, distances)) + except KeyError as e: + logger.error(f"Error accessing distances for occurrence {occurrence}: {e}") + + # Select the occurrence with the smallest distance + best_occurrence = min(occurrence_distances, key=lambda x: x[1])[0] + return { + "H": extract_digits(best_occurrence["H"]), + "A": extract_digits(best_occurrence["A"]), + "B": extract_digits(best_occurrence["B"]), + } + else: + single_occurrence = condition_occurrences[0] + return { + "H": extract_digits(single_occurrence["H"]), + "A": extract_digits(single_occurrence["A"]), + "B": extract_digits(single_occurrence["B"]), + } + else: + + # Check the all the hydrogen atoms, and see the closest two heavy atoms and aggregate their distances to determine which Hyodrogen atom has the lowest distance aggregate + min_distance = np.inf + selected_hydrogen = None + selected_heavy_atoms = None + + for hydrogen_key in hydrogen_keys: + atom_neighbours = closest_atoms[hydrogen_key] + heavy_atoms = [atom for atom in atom_neighbours if not atom.startswith("H")] + + if len(heavy_atoms) < 2: + continue + + distances = dataframe.loc[hydrogen_key, heavy_atoms[:2]].sum() + if distances < min_distance: + min_distance = distances + selected_hydrogen = hydrogen_key + selected_heavy_atoms = heavy_atoms + + if selected_hydrogen: + return { + "H": extract_digits(selected_hydrogen), + "A": extract_digits(selected_heavy_atoms[0]), + "B": extract_digits(selected_heavy_atoms[1]), + } + else: + raise ValueError("No valid hydrogen atom found.") + + return {} diff --git a/arc/job/adapters/ts/crest_test.py b/arc/job/adapters/ts/crest_test.py new file mode 100644 index 0000000000..1d5320ad5e --- /dev/null +++ b/arc/job/adapters/ts/crest_test.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# encoding: utf-8 + +""" +Unit tests for arc.job.adapters.ts.crest +""" + +import os +import tempfile +import unittest + +from arc.species.converter import str_to_xyz + + +class TestCrestAdapter(unittest.TestCase): + """ + Tests for CREST input generation. + """ + + def setUp(self): + self.tmpdir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.tmpdir.cleanup() + + def test_creates_valid_input_files(self): + """ + Ensure CREST inputs are written with expected content/format. + """ + from arc.job.adapters.ts import crest as crest_mod + + xyz = str_to_xyz( + """O 0.0 0.0 0.0 + H 0.0 0.0 0.96 + H 0.9 0.0 0.0""" + ) + + backups = { + "settings": crest_mod.settings, + "submit_scripts": crest_mod.submit_scripts, + "CREST_PATH": crest_mod.CREST_PATH, + "CREST_ENV_PATH": crest_mod.CREST_ENV_PATH, + "SERVERS": crest_mod.SERVERS, + } + + try: + crest_mod.settings = {"submit_filenames": {"PBS": "submit.sh"}} + crest_mod.submit_scripts = { + "local": { + "crest": ( + "#PBS -q {queue}\n" + "#PBS -N {name}\n" + "#PBS -l select=1:ncpus={cpus}:mem={memory}gb\n" + ), + "crest_job": "{activation_line}\ncd {path}\n{commands}\n", + } + } + crest_mod.CREST_PATH = "/usr/bin/crest" + crest_mod.CREST_ENV_PATH = "" + crest_mod.SERVERS = { + "local": {"cluster_soft": "pbs", "cpus": 4, "memory": 8, "queue": "testq"} + } + + crest_dir = crest_mod.crest_ts_conformer_search( + xyz_guess=xyz, a_atom=0, h_atom=1, b_atom=2, path=self.tmpdir.name, xyz_crest_int=0 + ) + + coords_path = os.path.join(crest_dir, "coords.ref") + constraints_path = os.path.join(crest_dir, "constraints.inp") + submit_path = os.path.join(crest_dir, "submit.sh") + + self.assertTrue(os.path.exists(coords_path)) + self.assertTrue(os.path.exists(constraints_path)) + self.assertTrue(os.path.exists(submit_path)) + + with open(coords_path) as f: + coords = f.read().strip().splitlines() + self.assertEqual(coords[0].strip(), "$coord") + self.assertEqual(coords[-1].strip(), "$end") + self.assertEqual(len(coords) - 2, len(xyz["symbols"])) + + with open(constraints_path) as f: + constraints = f.read() + self.assertIn("atoms: 1, 2, 3", constraints) + self.assertIn("force constant: 0.5", constraints) + self.assertIn("reference=coords.ref", constraints) + self.assertIn("distance: 1, 2, auto", constraints) + self.assertIn("distance: 2, 3, auto", constraints) + self.assertIn("$metadyn", constraints) + self.assertTrue(constraints.strip().endswith("$end")) + finally: + crest_mod.settings = backups["settings"] + crest_mod.submit_scripts = backups["submit_scripts"] + crest_mod.CREST_PATH = backups["CREST_PATH"] + crest_mod.CREST_ENV_PATH = backups["CREST_ENV_PATH"] + crest_mod.SERVERS = backups["SERVERS"] + + +if __name__ == "__main__": + unittest.main() diff --git a/arc/job/adapters/ts/heuristics.py b/arc/job/adapters/ts/heuristics.py index aa281542ae..3aacaa3667 100644 --- a/arc/job/adapters/ts/heuristics.py +++ b/arc/job/adapters/ts/heuristics.py @@ -14,7 +14,6 @@ - Think: two H sites on a CH2 element, one being abstracted. On which one in the reactant do we put the abstractor? Can/should we try both? """ - import datetime import itertools from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union @@ -23,8 +22,23 @@ from arc.job.adapter import JobAdapter from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family from arc.job.factory import register_job_adapter +from arc.job.adapters.ts.crest import ( + convert_xyz_to_df, + crest_available, + crest_ts_conformer_search, + get_h_abs_atoms, + monitor_crest_jobs, + process_completed_jobs, + submit_crest_jobs, +) from arc.plotter import save_geo -from arc.species.converter import compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz +from arc.species.converter import ( + compare_zmats, + relocate_zmat_dummy_atoms_to_the_end, + str_to_xyz, + zmat_from_xyz, + zmat_to_xyz, +) from arc.mapping.engine import map_two_species from arc.molecule.molecule import Molecule from arc.species.species import ARCSpecies, TSGuess, SpeciesError, colliding_atoms @@ -34,7 +48,6 @@ from arc.level import Level from arc.reaction import ARCReaction - DIHEDRAL_INCREMENT = 30 logger = get_logger() @@ -252,19 +265,23 @@ def execute_incore(self): if rxn.family == 'H_Abstraction': tsg = TSGuess(method='Heuristics') tsg.tic() - xyzs = h_abstraction(reaction=rxn, dihedral_increment=self.dihedral_increment) + xyzs = h_abstraction(reaction=rxn, dihedral_increment=self.dihedral_increment, path=self.local_path) tsg.tok() - for method_index, xyz in enumerate(xyzs): + for method_index, xyz_entry in enumerate(xyzs): + xyz = xyz_entry.get("xyz") + method_label = xyz_entry.get("method", "Heuristics") + if xyz is None: + continue unique = True for other_tsg in rxn.ts_species.ts_guesses: if almost_equal_coords(xyz, other_tsg.initial_xyz): - if 'heuristics' not in other_tsg.method.lower(): - other_tsg.method += ' and Heuristics' + if method_label.lower() not in other_tsg.method.lower(): + other_tsg.method += f' and {method_label}' unique = False break if unique: - ts_guess = TSGuess(method='Heuristics', + ts_guess = TSGuess(method=method_label, index=len(rxn.ts_species.ts_guesses), method_index=method_index, t0=tsg.t0, @@ -276,15 +293,18 @@ def execute_incore(self): rxn.ts_species.ts_guesses.append(ts_guess) save_geo(xyz=xyz, path=self.local_path, - filename=f'Heuristics_{method_index}', + filename=f'{method_label}_{method_index}', format_='xyz', - comment=f'Heuristics {method_index}, family: {rxn.family}', + comment=f'{method_label} {method_index}, family: {rxn.family}', ) if len(self.reactions) < 5: - successes = len([tsg for tsg in rxn.ts_species.ts_guesses if tsg.success and 'heuristics' in tsg.method]) + successes = [tsg for tsg in rxn.ts_species.ts_guesses if tsg.success] + crest_successes = len([tsg for tsg in successes if 'crest' in tsg.method.lower()]) if successes: - logger.info(f'Heuristics successfully found {successes} TS guesses for {rxn.label}.') + logger.info(f'Heuristics successfully found {len(successes)} TS guesses for {rxn.label}.') + if crest_successes: + logger.info(f'CREST contributed {crest_successes} TS guesses for {rxn.label}.') else: logger.info(f'Heuristics did not find any successful TS guesses for {rxn.label}.') @@ -836,6 +856,7 @@ def h_abstraction(reaction: 'ARCReaction', r2_stretch: float = 1.2, a2: float = 180, dihedral_increment: Optional[int] = None, + path: str = "" ) -> List[dict]: """ Generate TS guesses for reactions of the RMG ``H_Abstraction`` family. @@ -850,9 +871,12 @@ def h_abstraction(reaction: 'ARCReaction', dihedral_increment (int, optional): The dihedral increment to use for B-H-A-C and D-B-H-C dihedral scans. Returns: List[dict] - Entries are Cartesian coordinates of TS guesses for all reactions. + Entries hold Cartesian coordinates of TS guesses and the generating method label. """ xyz_guesses = list() + crest_paths = list() + all_zmats = list() + use_crest = crest_available() dihedral_increment = dihedral_increment or DIHEDRAL_INCREMENT reactants_reversed, products_reversed = are_h_abs_wells_reversed(rxn=reaction, product_dict=reaction.product_dicts[0]) for product_dict in reaction.product_dicts: @@ -898,7 +922,7 @@ def h_abstraction(reaction: 'ARCReaction', d2_d3_product = [(None, None)] zmats = list() - for d2, d3 in d2_d3_product: + for iteration, (d2, d3) in enumerate(d2_d3_product): xyz_guess = None try: xyz_guess = combine_coordinates_with_redundant_atoms( @@ -929,8 +953,55 @@ def h_abstraction(reaction: 'ARCReaction', else: # This TS is unique, and has no atom collisions. zmats.append(zmat_guess) - xyz_guesses.append(xyz_guess) + all_zmats.append(zmat_guess) + xyz_guesses.append({"xyz": xyz_guess, "method": "Heuristics"}) + + if use_crest: + xyz_guess_crest = xyz_guess.copy() + if isinstance(xyz_guess_crest, dict): + df_dmat = convert_xyz_to_df(xyz_guess_crest) + elif isinstance(xyz_guess_crest, str): + xyz = str_to_xyz(xyz_guess_crest) + df_dmat = convert_xyz_to_df(xyz) + elif isinstance(xyz_guess_crest, list): + xyz_temp = "\n".join(xyz_guess_crest) + xyz_to_dmat = str_to_xyz(xyz_temp) + df_dmat = convert_xyz_to_df(xyz_to_dmat) + else: + df_dmat = None + + if df_dmat is not None: + try: + h_abs_atoms_dict = get_h_abs_atoms(df_dmat) + crest_path = crest_ts_conformer_search( + xyz_guess_crest, + h_abs_atoms_dict["A"], + h_abs_atoms_dict["H"], + h_abs_atoms_dict["B"], + path=path, + xyz_crest_int=iteration, + ) + crest_paths.append(crest_path) + except (ValueError, KeyError) as e: + logger.error(f"Could not determine the H abstraction atoms, got:\n{e}") + + if use_crest and crest_paths: + crest_jobs = submit_crest_jobs(crest_paths) + monitor_crest_jobs(crest_jobs) # Keep checking job statuses until complete + xyz_guesses_crest = process_completed_jobs(crest_jobs) + for xyz_guess_crest in xyz_guesses_crest: + zmat_guess = zmat_from_xyz(xyz_guess_crest, is_ts=True) + is_unique = True # Assume the current Z-matrix is unique + for existing_zmat_guess in all_zmats: + if compare_zmats(existing_zmat_guess, zmat_guess): + is_unique = False # Found a match, mark as not unique + break # Exit this inner loop only + if is_unique: + # If no match was found, append to lists + all_zmats.append(zmat_guess) + xyz_guesses.append({"xyz": xyz_guess_crest, "method": "CREST"}) + return xyz_guesses -register_job_adapter('heuristics', HeuristicsAdapter) +register_job_adapter("heuristics", HeuristicsAdapter) diff --git a/arc/settings/settings.py b/arc/settings/settings.py index 8be7b76cd7..50f1418504 100644 --- a/arc/settings/settings.py +++ b/arc/settings/settings.py @@ -8,6 +8,7 @@ import os import string import sys +import shutil # Users should update the following server dictionary. # Instructions for RSA key generation can be found here: @@ -369,3 +370,127 @@ def find_executable(env_name, executable_name='python'): if path and os.path.isdir(path): RMG_DB_PATH = path break + + +crest_path1 = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(sys.executable))), + 'crest_env', 'bin', 'crest') +crest_path2 = os.path.join(home, 'anaconda3', 'envs', 'crest_env', 'bin', 'crest') +crest_path3 = os.path.join(home, 'miniconda3', 'envs', 'crest_env', 'bin', 'crest') +crest_path4 = os.path.join(home, '.conda', 'envs', 'crest_env', 'bin', 'crest') +crest_path5 = os.path.join('/Local/ce_dana', 'anaconda3', 'envs', 'crest_env', 'bin', 'crest') +crest_path6 = os.path.join(home, 'mambaforge', 'envs', 'crest_env', 'bin', 'crest') +crest_path7 = os.path.join(home, 'micromamba', 'envs', 'crest_env', 'bin', 'crest') +# Binary path for CREST + + +for crest_path in [crest_path1, crest_path2, crest_path3, crest_path4, crest_path5, crest_path6, crest_path7]: + if os.path.isfile(crest_path): + # check if using micromamba, mambaforge, anaconda3, miniconda3, or .conda + break +# If the path (environment) does not exist, then we use the binary + +def parse_version(folder_name): + """ + Parses the version from the folder name and returns a tuple for comparison. + Supports versions like: 3.0.2, v212, 2.1, 2 + """ + version_regex = re.compile(r"(?:v?(\d+)(?:\.(\d+))?(?:\.(\d+))?)", re.IGNORECASE) + match = version_regex.search(folder_name) + if not match: + return (0, 0, 0) + + major = int(match.group(1)) if match.group(1) else 0 + minor = int(match.group(2)) if match.group(2) else 0 + patch = int(match.group(3)) if match.group(3) else 0 + + # Example: v212 → (2, 1, 2) + if major >= 100 and match.group(2) is None and match.group(3) is None: + s = str(major).rjust(3, "0") + major = int(s[0]) + minor = int(s[1]) + patch = int(s[2]) + + return (major, minor, patch) + + +def find_highest_version_in_directory(directory, name_contains): + """ + Finds the file with the highest version in a directory containing a specific string. + """ + if not os.path.exists(directory): + return None + + highest_version_path = None + highest_version = () + + for folder in os.listdir(directory): + file_path = os.path.join(directory, folder) + if name_contains.lower() in folder.lower() and os.path.isdir(file_path): + crest_path = os.path.join(file_path, "crest") + if os.path.isfile(crest_path) and os.access(crest_path, os.X_OK): + version = parse_version(folder) + if highest_version == () or version > highest_version: + highest_version = version + highest_version_path = crest_path + return highest_version_path + + +def find_crest_executable(): + """ + Returns (crest_path, env_cmd): + + - crest_path: full path to 'crest' + - env_cmd: shell snippet to activate its environment (may be "") + """ + # Priority 1: /Local/ce_dana standalone builds + crest_path = find_highest_version_in_directory("/Local/ce_dana", "crest") + if crest_path and os.path.isfile(crest_path) and os.access(crest_path, os.X_OK): + # Standalone binary: no env activation needed + return crest_path, "" + + # Priority 2: Conda/Mamba/Micromamba envs + home = os.path.expanduser("~") + potential_env_paths = [ + os.path.join(home, "anaconda3", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "miniconda3", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "miniforge3", "envs", "crest_env", "bin", "crest"), + os.path.join(home, ".conda", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "mambaforge", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "micromamba", "envs", "crest_env", "bin", "crest"), + ] + + # Also check the current env's bin + current_env_bin = os.path.dirname(sys.executable) + potential_env_paths.insert(0, os.path.join(current_env_bin, "crest")) + + for crest_path in potential_env_paths: + if os.path.isfile(crest_path) and os.access(crest_path, os.X_OK): + # env_root = .../anaconda3 or .../miniforge3 or .../mambaforge etc. + env_root = crest_path.split("/envs/crest_env/")[0] + if "micromamba" in crest_path: + env_cmd = ( + f"source {env_root}/etc/profile.d/micromamba.sh && " + f"micromamba activate crest_env" + ) + elif any( + name in env_root + for name in ("anaconda3", "miniconda3", "miniforge3", "mambaforge", ".conda") + ): + env_cmd = ( + f"source {env_root}/etc/profile.d/conda.sh && " + f"conda activate crest_env" + ) + else: + # If for some reason it's just a random prefix with crest in bin + env_cmd = "" + return crest_path, env_cmd + + # Priority 3: PATH + crest_in_path = shutil.which("crest") + if crest_in_path: + return crest_in_path, "" + + return None, None + + +CREST_PATH, CREST_ENV_PATH = find_crest_executable() diff --git a/arc/species/converter.py b/arc/species/converter.py index b8ec5e7d06..b2d7a5126e 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -5,6 +5,7 @@ import math import numpy as np import os +import warnings from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union from ase import Atoms @@ -44,6 +45,99 @@ ob.obErrorLog.SetOutputLevel(0) logger = get_logger() +def reorder_xyz_string(xyz_str: str, + reverse_atoms: bool = False, + units: str = 'angstrom', + convert_to: str = 'angstrom', + project_directory: Optional[str] = None + ) -> str: + """ + Reorder an XYZ string between ``ATOM X Y Z`` and ``X Y Z ATOM`` with optional unit conversion. + + Args: + xyz_str (str): The string xyz format to be converted. + reverse_atoms (bool, optional): Whether to reverse the atoms and coordinates. + units (str, optional): Units of the input coordinates ('angstrom' or 'bohr'). + convert_to (str, optional): The units to convert to (either 'angstrom' or 'bohr'). + project_directory (str, optional): The path to the project directory. + + Raises: + ConverterError: If xyz_str is not a string or does not have four space-separated entries per non-empty line. + + Returns: str + The converted string xyz format. + """ + if isinstance(xyz_str, tuple): + xyz_str = '\n'.join(xyz_str) + if isinstance(xyz_str, list): + xyz_str = '\n'.join(xyz_str) + if not isinstance(xyz_str, str): + raise ConverterError(f'Expected a string input, got {type(xyz_str)}') + if project_directory is not None and os.path.isfile(os.path.join(project_directory, xyz_str)): + xyz_str = os.path.join(project_directory, xyz_str) + + + BOHR_TO_ANGSTROM = 0.529177 + ANGSTROM_TO_BOHR = 1.8897259886 + + if units.lower() == 'angstrom' and convert_to.lower() == 'angstrom': + conversion_factor = 1 + elif units.lower() == 'bohr' and convert_to.lower() == 'bohr': + conversion_factor = 1 + elif units.lower() == 'angstrom' and convert_to.lower() == 'bohr': + conversion_factor = ANGSTROM_TO_BOHR + elif units.lower() == 'bohr' and convert_to.lower() == 'angstrom': + conversion_factor = BOHR_TO_ANGSTROM + else: + raise ConverterError("Invalid target unit. Choose 'angstrom' or 'bohr'.") + + processed_lines = list() + # Split the string into lines + lxyz = xyz_str.strip().split() + + atom_first = False if is_str_float(lxyz[0]) else True + lxyz = xyz_str.strip().splitlines() + + for idx, item in enumerate(lxyz): + parts = item.strip().split() + + if len(parts) != 4: + raise ConverterError(f'xyz_str has an incorrect format, expected 4 elements in each line, ' + f'got "{item}" in:\n{xyz_str}') + if atom_first: + atom, x_str, y_str, z_str = parts + else: + x_str, y_str, z_str, atom = parts + + try: + x = float(x_str) * conversion_factor + y = float(y_str) * conversion_factor + z = float(z_str) * conversion_factor + + except ValueError as e: + raise ConverterError(f'Could not convert {x_str}, {y_str}, or {z_str} to floats.') from e + + if reverse_atoms and atom_first: + formatted_line = f'{x} {y} {z} {atom}' + elif reverse_atoms and not atom_first: + formatted_line = f'{atom} {x} {y} {z}' + elif not reverse_atoms and atom_first: + formatted_line = f'{atom} {x} {y} {z}' + elif not reverse_atoms and not atom_first: + formatted_line = f'{x} {y} {z} {atom}' + + processed_lines.append(formatted_line) + + return '\n'.join(processed_lines) + + +def str_to_str(*args, **kwargs) -> str: + """ + Backwards compatible wrapper for reorder_xyz_string. + """ + warnings.warn("str_to_str was renamed to reorder_xyz_string", DeprecationWarning) + return reorder_xyz_string(*args, **kwargs) + def str_to_xyz(xyz_str: str, project_directory: Optional[str] = None, diff --git a/arc/species/converter_test.py b/arc/species/converter_test.py index 8203d2f56c..9f2b7d25b9 100644 --- a/arc/species/converter_test.py +++ b/arc/species/converter_test.py @@ -686,6 +686,25 @@ def test_str_to_xyz(self): xyz = converter.str_to_xyz(xyz_format) self.assertEqual(xyz, expected_xyz) + def test_reorder_xyz_string_atom_first(self): + """Test reordering atom-first XYZ strings with unit conversion""" + xyz_format = "C 0.0 1.0 2.0\nH -1.0 0.5 0.0" + converted = converter.reorder_xyz_string(xyz_str=xyz_format, reverse_atoms=True, convert_to="bohr") + expected = "0.0 1.8897259886 3.7794519772 C\n-1.8897259886 0.9448629943 0.0 H" + self.assertEqual(converted, expected) + + def test_reorder_xyz_string_coordinate_first(self): + """Test reordering coordinate-first XYZ strings back to atom-last order with conversion""" + xyz_format = "0.0 0.0 0.0 N\n1.0 0.0 0.0 H" + converted = converter.reorder_xyz_string( + xyz_str=xyz_format, + reverse_atoms=False, + units="bohr", + convert_to="angstrom", + ) + expected = "0.0 0.0 0.0 N\n0.529177 0.0 0.0 H" + self.assertEqual(converted, expected) + def test_xyz_to_str(self): """Test converting an ARC xyz format to a string xyz format""" xyz_str1 = converter.xyz_to_str(xyz_dict=self.xyz1['dict']) diff --git a/arc/species/species.py b/arc/species/species.py index 882060b221..eebcf048e8 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -1525,6 +1525,7 @@ def cluster_tsgs(self): for tsg in self.ts_guesses: for cluster_tsg in cluster_tsgs: if cluster_tsg.almost_equal_tsgs(tsg): + logger.info(f"Similar TSGuesses found: {tsg.index} is similar to {cluster_tsg.index}") cluster_tsg.cluster.append(tsg.index) if tsg.method not in cluster_tsg.method: cluster_tsg.method += f' + {tsg.method}' diff --git a/arc/statmech/arkane.py b/arc/statmech/arkane.py index 581676e25e..75bab9a96f 100644 --- a/arc/statmech/arkane.py +++ b/arc/statmech/arkane.py @@ -671,6 +671,141 @@ def _section_contains_key(file_path: str, section_start: str, section_end: str, return False +def _normalize_method(method: str) -> str: + """ + Normalize method names for comparison: + - lowercase + - remove all hyphens + + Examples: + "DLPNO-CCSD(T)-F12" -> "dlpnoccsd(t)f12" + "dlpnoccsd(t)f122023" -> "dlpnoccsd(t)f122023" + """ + return method.lower().replace('-', '') + + +def _split_method_year(method_norm: str): + """ + Split a normalized method into (base_method, year). + + Examples: + "dlpnoccsd(t)f122023" -> ("dlpnoccsd(t)f12", 2023) + "dlpnoccsd(t)f12" -> ("dlpnoccsd(t)f12", None) + """ + m = re.match(r"^(.*?)(\d{4})$", method_norm) + if not m: + return method_norm, None + base, year_str = m.groups() + return base, int(year_str) + + +def _normalize_basis(basis: Optional[str]) -> Optional[str]: + """ + Normalize basis names for comparison: + - lowercase + - remove hyphens and spaces + + Examples: + "cc-pVTZ-F12" -> "ccpvtzf12" + "ccpvtzf12" -> "ccpvtzf12" + """ + if basis is None: + return None + return basis.replace('-', '').replace(' ', '').lower() + + +def _parse_lot_params(lot_str: str) -> dict: + """ + Parse method, basis, and software from a LevelOfTheory(...) string. + + Example lot_str: + "LevelOfTheory(method='dlpnoccsd(t)f122023',basis='ccpvtzf12',software='orca')" + """ + params = {'method': None, 'basis': None, 'software': None} + for key in params.keys(): + m = re.search(rf"{key}='([^']+)'", lot_str) + if m: + params[key] = m.group(1) + return params + + +def _iter_level_keys_from_section(file_path: str, + section_start: str, + section_end: str) -> list[str]: + """ + Return all LevelOfTheory(...) key strings that appear as dictionary keys + in a given section of data.py. + + These look like: + "LevelOfTheory(method='...',basis='...',software='...')" : { ... } + """ + section = _extract_section(file_path, section_start, section_end) + if section is None: + return [] + + # Match things like: "LevelOfTheory(...)" : { ... } + pattern = r'"(LevelOfTheory\([^"]*\))"\s*:' + return re.findall(pattern, section, flags=re.DOTALL) + + +def _find_best_level_key_for_sp_level(level: "Level", + file_path: str, + section_start: str, + section_end: str) -> Optional[str]: + """ + Given an ARC Level and a data.py section, find the LevelOfTheory(...) key string + that best matches the level's method/basis, allowing: + - hyphen-insensitive comparison + - an optional 4-digit year suffix in Arkane's method + and choose the *latest* year among matching entries. + """ + if level is None or level.method is None: + return None + + target_method_norm = _normalize_method(level.method) + target_base, _ = _split_method_year(target_method_norm) + target_basis_norm = _normalize_basis(level.basis) + target_software = level.software.lower() if level.software else None + + best_key = None + best_year = -1 + + for lot_str in _iter_level_keys_from_section(file_path, section_start, section_end): + params = _parse_lot_params(lot_str) + cand_method = params.get('method') + cand_basis = params.get('basis') + cand_sw = params.get('software') + + if cand_method is None: + continue + + cand_method_norm = _normalize_method(cand_method) + cand_base, cand_year = _split_method_year(cand_method_norm) + + # method base must match + if cand_base != target_base: + continue + + # basis must match (normalized), if we have one + if target_basis_norm is not None: + cand_basis_norm = _normalize_basis(cand_basis) + if cand_basis_norm != target_basis_norm: + continue + + # if user specified software, prefer matching software; + # but don't *require* it to exist in data.py + if target_software is not None and cand_sw is not None: + if cand_sw.lower() != target_software: + continue + + year_val = cand_year if cand_year is not None else 0 + if year_val > best_year: + best_year = year_val + best_key = lot_str + + return best_key + + def _level_to_str(level: 'Level') -> str: """ Convert Level to Arkane's LevelOfTheory string representation. @@ -696,15 +831,16 @@ def get_arkane_model_chemistry(sp_level: 'Level', """ Get Arkane model chemistry string with database validation. - Args: - sp_level (Level): Level of theory for energy. - freq_level (Optional[Level]): Level of theory for frequencies. - freq_scale_factor (Optional[float]): Frequency scaling factor. + Reads RMG's quantum_corrections/data.py as plain text, searches for + LevelOfTheory(...) keys, and matches: + - method: ignoring hyphens and optional 4-digit year suffix + - basis: ignoring hyphens and spaces - Returns: - Optional[str]: Arkane-compatible model chemistry string. + If multiple entries only differ by year, the one with the *latest* year + is chosen (year=0 if no year in that entry). """ if sp_level.method_type == 'composite': + # Composite Gaussian methods: no basis / year complications here return f"LevelOfTheory(method='{sp_level.method}',software='gaussian')" qm_corr_file = os.path.join(RMG_DB_PATH, 'input', 'quantum_corrections', 'data.py') @@ -714,40 +850,40 @@ def get_arkane_model_chemistry(sp_level: 'Level', freq_dict_start = "freq_dict = {" freq_dict_end = "}" - sp_repr = _level_to_str(sp_level) - quoted_sp_repr = f'"{sp_repr}"' - + # ---- Case 1: User supplied explicit frequency scale factor ---- + # We only need an energy level (AEC entry in atom_energies) if freq_scale_factor is not None: - found = _section_contains_key(file_path=qm_corr_file, - section_start=atom_energies_start, - section_end=atom_energies_end, - target=quoted_sp_repr) - if not found: + best_energy = _find_best_level_key_for_sp_level( + sp_level, qm_corr_file, atom_energies_start, atom_energies_end + ) + if best_energy is None: + # No matching AEC level in Arkane DB return None - return sp_repr + # modelChemistry = LevelOfTheory(...) + return best_energy + # ---- Case 2: CompositeLevelOfTheory (separate freq and energy levels) ---- if freq_level is None: raise ValueError("freq_level required when freq_scale_factor isn't provided") - freq_repr = _level_to_str(freq_level) - quoted_freq_repr = f'"{freq_repr}"' - - found_sp = _section_contains_key(file_path=qm_corr_file, - section_start=atom_energies_start, - section_end=atom_energies_end, - target=quoted_sp_repr) - found_freq = _section_contains_key(file_path=qm_corr_file, - section_start=freq_dict_start, - section_end=freq_dict_end, - target=quoted_freq_repr) + best_energy = _find_best_level_key_for_sp_level( + sp_level, qm_corr_file, atom_energies_start, atom_energies_end + ) + best_freq = _find_best_level_key_for_sp_level( + freq_level, qm_corr_file, freq_dict_start, freq_dict_end + ) - if not found_sp or not found_freq: + if best_energy is None or best_freq is None: + # If either is missing, cannot construct a valid composite model chemistry return None - return (f"CompositeLevelOfTheory(\n" - f" freq={freq_repr},\n" - f" energy={sp_repr}\n" - f")") + # These strings are LevelOfTheory(...) expressions usable directly in Arkane input + return ( + "CompositeLevelOfTheory(\n" + f" freq={best_freq},\n" + f" energy={best_energy}\n" + ")" + ) def check_arkane_bacs(sp_level: 'Level', @@ -757,13 +893,11 @@ def check_arkane_bacs(sp_level: 'Level', """ Check that Arkane has AECs and BACs for the given sp level of theory. - Args: - sp_level (Level): Level of theory for energy. - bac_type (str): Type of bond additivity correction ('p' for Petersson, 'm' for Melius) - raise_error (bool): Whether to raise an error if AECs or BACs are missing. - - Returns: - bool: True if both AECs and BACs are available, False otherwise. + Uses plain-text parsing of quantum_corrections/data.py, matching LevelOfTheory + keys by: + - method base (ignore hyphens + optional year) + - basis (normalized) + and picking the latest year where multiple exist. """ qm_corr_file = os.path.join(RMG_DB_PATH, 'input', 'quantum_corrections', 'data.py') @@ -776,24 +910,25 @@ def check_arkane_bacs(sp_level: 'Level', bac_section_start = "pbac = {" bac_section_end = "mbac = {" - sp_repr = _level_to_str(sp_level) - quoted_sp_repr = f'"{sp_repr}"' - - has_aec = _section_contains_key( - file_path=qm_corr_file, - section_start=atom_energies_start, - section_end=atom_energies_end, - target=quoted_sp_repr, + best_aec_key = _find_best_level_key_for_sp_level( + sp_level, qm_corr_file, atom_energies_start, atom_energies_end ) - has_bac = _section_contains_key( - file_path=qm_corr_file, - section_start=bac_section_start, - section_end=bac_section_end, - target=quoted_sp_repr, + best_bac_key = _find_best_level_key_for_sp_level( + sp_level, qm_corr_file, bac_section_start, bac_section_end ) + + has_aec = best_aec_key is not None + has_bac = best_bac_key is not None has_encorr = bool(has_aec and has_bac) + + # For logging, prefer the matched key; fall back to the naive LevelOfTheory string + repr_level = best_aec_key if best_aec_key is not None else _level_to_str(sp_level) + if not has_encorr: - mssg = f"Arkane does not have the required energy corrections for {sp_repr} (AEC: {has_aec}, BAC: {has_bac})" + mssg = ( + f"Arkane does not have the required energy corrections for {repr_level} " + f"(AEC: {has_aec}, BAC: {has_bac})" + ) if raise_error: raise ValueError(mssg) else: @@ -801,6 +936,7 @@ def check_arkane_bacs(sp_level: 'Level', return has_encorr + def parse_species_thermo(species, output_content: str) -> None: """Parse thermodynamic data for a single species.""" # Parse E0 diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt b/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt deleted file mode 100644 index 17a55b3536..0000000000 --- a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt +++ /dev/null @@ -1,17 +0,0 @@ -=>> PBS: job killed: walltime 86415 exceeded limit 86400 -Error: software termination - rax fffffffffffffffc, rbx 00007ffc0d4f90d0, rcx ffffffffffffffff - rdx 0000000000000000, rsp 00007ffc0d4f9098, rbp 0000000000000001 - rsi 00007ffc0d4f90d0, rdi 0000000000038f1b, r8 00002b7af22a5700 - r9 0000000000000000, r10 0000000000000000, r11 0000000000000246 - r12 00007ffc0d4f90f0, r13 000000000000008f, r14 0000000000000000 - r15 00007ffc0d4fff40 -Error: software termination - rax 0000000000024fa8, rbx 00002ae812e9f2c0, rcx 0000000000035498 - rdx 00002ae8c4888bd0, rsp 00007ffde70fb680, rbp 00007ffde70fbf70 - rsi 00002ae8c48be068, rdi 00002ae8c48f3508, r8 00002ae8c49289b0 - r9 0000000000006a93, r10 0000000000006a95, r11 00002ae812ed4768 - r12 00002ae812f66508, r13 00002ae812f9b9b0, r14 0000000000006a92 - r15 00002ae81311f478 - --- traceback not available - --- traceback not available diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf b/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf deleted file mode 100644 index 36f9d855ac..0000000000 --- a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf +++ /dev/null @@ -1,12 +0,0 @@ -%chk=check.chk -%mem=14336mb -%NProcShared=8 - -#P opt=(calcfc) cbs-qb3 IOp(2/9=2000) - -spc1 - -0 3 -O 0.00000000 0.00000000 1.00000000 - - diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub b/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub deleted file mode 100644 index 00b840cd67..0000000000 --- a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -l -#SBATCH -p normal -#SBATCH -J server1 -#SBATCH -N 1 -#SBATCH -n 8 -#SBATCH --time=120:00:00 -#SBATCH --mem-per-cpu=15770 -#SBATCH -o out.txt -#SBATCH -e err.txt - -export g16root=/home/gridsan/groups/GRPAPI/Software -export PATH=$g16root/g16/:$g16root/gv:$PATH -which g16 - -echo "============================================================" -echo "Job ID : $SLURM_JOB_ID" -echo "Job Name : $SLURM_JOB_NAME" -echo "Starting on : $(date)" -echo "Running on node : $SLURMD_NODENAME" -echo "Current directory : $(pwd)" -echo "============================================================" - -touch initial_time - -GAUSS_SCRDIR=/state/partition1/user//$SLURM_JOB_NAME-$SLURM_JOB_ID -export $GAUSS_SCRDIR -. $g16root/g16/bsd/g16.profile - -mkdir -p $GAUSS_SCRDIR - -g16 < input.gjf > input.log - -rm -rf $GAUSS_SCRDIR - -touch final_time - - \ No newline at end of file diff --git a/devtools/crest_environment.yml b/devtools/crest_environment.yml new file mode 100644 index 0000000000..2291e72d37 --- /dev/null +++ b/devtools/crest_environment.yml @@ -0,0 +1,6 @@ +name: crest_env +channels: + - conda-forge +dependencies: + - python>=3.7 + - crest=2.12 diff --git a/devtools/install_all.sh b/devtools/install_all.sh index c958fdd548..e4fe750ea2 100644 --- a/devtools/install_all.sh +++ b/devtools/install_all.sh @@ -26,6 +26,8 @@ run_devtool () { bash "$DEVTOOLS_DIR/$1" "${@:2}"; } SKIP_CLEAN=false SKIP_EXT=false SKIP_ARC=false +SKIP_RMG=false +ARC_INSTALLED=false RMG_ARGS=() ARC_ARGS=() EXT_ARGS=() @@ -36,6 +38,7 @@ while [[ $# -gt 0 ]]; do --no-clean) SKIP_CLEAN=true ;; --no-ext) SKIP_EXT=true ;; --no-arc) SKIP_ARC=true ;; + --no-rmg) SKIP_RMG=true ;; --rmg-*) RMG_ARGS+=("--${1#--rmg-}") ;; --arc-*) ARC_ARGS+=("--${1#--arc-}") ;; --ext-*) EXT_ARGS+=("--${1#--ext-}") ;; @@ -44,6 +47,7 @@ while [[ $# -gt 0 ]]; do Usage: $0 [global-flags] [--rmg-xxx] [--arc-yyy] [--ext-zzz] --no-clean Skip micromamba/conda cache cleanup --no-ext Skip external tools (AutoTST, KinBot, …) + --no-rmg Skip RMG-Py entirely --rmg-path Forward '--path' to RMG installer --rmg-pip Forward '--pip' to RMG installer ... @@ -67,16 +71,15 @@ echo " EXT sub-flags : ${EXT_ARGS[*]:-(none)}" echo ">>> Beginning full ARC external repo installation…" pushd . >/dev/null -# 1) RMG -echo "=== Installing RMG ===" -run_devtool install_rmg.sh "${RMG_ARGS[@]}" - - - # 2) PyRDL - echo "=== Installing PyRDL ===" - bash devtools/install_pyrdl.sh +# 1) RMG (optional) +if [[ $SKIP_RMG == false ]]; then + echo "=== Installing RMG ===" + run_devtool install_rmg.sh "${RMG_ARGS[@]}" +else + echo "ℹ️ --no-rmg flag set. Skipping RMG installation." +fi -# 3) ARC itself (skip env creation in CI or if user requests it) +# 2) ARC itself (skip env creation in CI or if user requests it) if [[ "${CI:-false}" != "true" && "${SKIP_ARC:-false}" != "true" ]]; then if [[ $SKIP_CLEAN == false ]]; then echo "=== Cleaning up old ARC build artifacts ===" @@ -88,10 +91,20 @@ if [[ "${CI:-false}" != "true" && "${SKIP_ARC:-false}" != "true" ]]; then echo "=== Installing ARC ===" run_devtool install_arc.sh "${ARC_ARGS[@]}" + ARC_INSTALLED=true else + ARC_INSTALLED=false echo ":information_source: CI detected or --no-arc flag set. Skip cleaning ARC installation." fi +# 3) PyRDL (depends on ARC) +if [[ $ARC_INSTALLED == true ]]; then + echo "=== Installing PyRDL ===" + bash devtools/install_pyrdl.sh +else + echo "ℹ️ Skipping PyRDL install because ARC installation was skipped." +fi + if [[ $SKIP_EXT == false ]]; then # map of friendly names → installer scripts declare -A EXT_INSTALLERS=( @@ -100,6 +113,7 @@ if [[ $SKIP_EXT == false ]]; then [KinBot]=install_kinbot.sh [OpenBabel]=install_ob.sh [xtb]=install_xtb.sh + [CREST]=install_crest.sh [Sella]=install_sella.sh [TorchANI]=install_torchani.sh ) diff --git a/devtools/install_autotst.sh b/devtools/install_autotst.sh index 5e3bc35288..7396326fde 100644 --- a/devtools/install_autotst.sh +++ b/devtools/install_autotst.sh @@ -51,15 +51,36 @@ write_hook () { cat >"$act" <>"$act" <<'EOF' +# Remove RMG-Py from PATH/PYTHONPATH to avoid clashes while AutoTST is active. +if [[ -n "${RMG_PY_PATH:-}" ]]; then + export PATH="$(_strip_path "$RMG_PY_PATH" "$PATH")" + export PYTHONPATH="$(_strip_path "$RMG_PY_PATH" "${PYTHONPATH:-}")" +fi +EOF + fi + + cat >>"$act" <<'EOF' case ":\$PYTHONPATH:" in *":\$AUTOTST_ROOT:"*) ;; \ *) export PYTHONPATH="\$AUTOTST_ROOT:\${PYTHONPATH:-}" ;; esac EOF # --- de-activation ----------------------------------------------------- cat >"$deact" <<'EOF' -_strip () { local n=":$1:"; local s=":$2:"; echo "${s//$n/:}" | sed 's/^://;s/:$//'; } -export PYTHONPATH=$(_strip "$AUTOTST_ROOT" ":${PYTHONPATH:-}:") -unset AUTOTST_ROOT +export PATH="${AUTOTST_OLD_PATH:-$PATH}" +if [[ -n "${AUTOTST_OLD_PYTHONPATH+x}" ]]; then + export PYTHONPATH="$AUTOTST_OLD_PYTHONPATH" +else + unset PYTHONPATH +fi +unset AUTOTST_ROOT AUTOTST_OLD_PATH AUTOTST_OLD_PYTHONPATH EOF echo "🔗 AutoTST hook refreshed in $env" } @@ -115,12 +136,48 @@ fi if [[ $MODE == "path" ]]; then - AUTO_PATH_LINE="export PYTHONPATH=\"\$PYTHONPATH:$(pwd)\"" - if ! grep -Fqx "$AUTO_PATH_LINE" ~/.bashrc; then - echo "$AUTO_PATH_LINE" >> ~/.bashrc - echo "✔️ Added AutoTST path to ~/.bashrc" + HOOK_SENTINEL="# AutoTST path-mode hook" + if ! grep -Fqx "$HOOK_SENTINEL" ~/.bashrc; then + cat <<'EOF' >> ~/.bashrc +# AutoTST path-mode hook +_strip_path () { + local needle=":$1:" + local haystack=":$2:" + echo "${haystack//$needle/:}" | sed 's/^://;s/:$//' +} + +autotst_on () { + export AUTOTST_ROOT="__AUTOTST_PATH__" + export AUTOTST_OLD_PATH="$PATH" + export AUTOTST_OLD_PYTHONPATH="${PYTHONPATH:-}" + if [[ -n "${RMG_PY_PATH:-}" ]]; then + PATH="$(_strip_path "$RMG_PY_PATH" "$PATH")" + PYTHONPATH="$(_strip_path "$RMG_PY_PATH" "${PYTHONPATH:-}")" + fi + + case ":$PYTHONPATH:" in *":$AUTOTST_ROOT:"*) ;; \ + *) PYTHONPATH="$AUTOTST_ROOT:${PYTHONPATH:-}" ;; esac + export PATH PYTHONPATH +} + +autotst_off () { + export PATH="${AUTOTST_OLD_PATH:-$PATH}" + if [[ -n "${AUTOTST_OLD_PYTHONPATH+x}" ]]; then + export PYTHONPATH="$AUTOTST_OLD_PYTHONPATH" + else + unset PYTHONPATH + fi + unset AUTOTST_ROOT AUTOTST_OLD_PATH AUTOTST_OLD_PYTHONPATH +} + +# Enable AutoTST by default in new shells and keep RMG-Py out of the way. +autotst_on +EOF + # replace placeholder with actual path + sed -i "s#__AUTOTST_PATH__#$(pwd | sed 's#/#\\\\/#g')#" ~/.bashrc + echo "✔️ Added AutoTST path-mode hook to ~/.bashrc" else - echo "ℹ️ AutoTST path already exists in ~/.bashrc" + echo "ℹ️ AutoTST path-mode hook already exists in ~/.bashrc" fi elif [[ $MODE == "conda" ]]; then write_hook tst_env "$(pwd)" diff --git a/devtools/install_crest.sh b/devtools/install_crest.sh new file mode 100644 index 0000000000..f6df81e2a5 --- /dev/null +++ b/devtools/install_crest.sh @@ -0,0 +1,63 @@ +#!/bin/bash -l +set -eo pipefail + +if command -v micromamba &> /dev/null; then + echo "✔️ Micromamba is installed." + COMMAND_PKG=micromamba +elif command -v mamba &> /dev/null; then + echo "✔️ Mamba is installed." + COMMAND_PKG=mamba +elif command -v conda &> /dev/null; then + echo "✔️ Conda is installed." + COMMAND_PKG=conda +else + echo "❌ Micromamba, Mamba, or Conda is required. Please install one." + exit 1 +fi + +if [ "$COMMAND_PKG" = "micromamba" ]; then + eval "$(micromamba shell hook --shell=bash)" +else + BASE=$(conda info --base) + . "$BASE/etc/profile.d/conda.sh" +fi + +ENV_FILE="devtools/crest_environment.yml" + +if [ ! -f "$ENV_FILE" ]; then + echo "❌ File not found: $ENV_FILE" + exit 1 +fi + +if $COMMAND_PKG env list | grep -q '^crest_env\s'; then + echo ">>> Updating existing crest_env..." + $COMMAND_PKG env update -n crest_env -f "$ENV_FILE" --prune +else + echo ">>> Creating new crest_env..." + $COMMAND_PKG env create -n crest_env -f "$ENV_FILE" -y +fi + +if [ "$COMMAND_PKG" = "micromamba" ]; then + micromamba activate crest_env +else + conda activate crest_env +fi + +echo ">>> Checking CREST installation..." + +if crest --version &> /dev/null; then + version_output=$(crest --version 2>&1 | head -n 1) + echo "$version_output" + if ! grep -q "2\\.12" <<< "$version_output"; then + echo "❌ CREST version mismatch (expected 2.12)." + exit 1 + fi + echo "✔️ CREST 2.12 is successfully installed." +else + echo "❌ CREST is not found in PATH. Please check the environment." + exit 1 +fi + +$COMMAND_PKG deactivate + +echo "✅ Done installing CREST (crest_env)." diff --git a/devtools/install_gcn.sh b/devtools/install_gcn.sh index 8f83a2cda1..5273353d77 100644 --- a/devtools/install_gcn.sh +++ b/devtools/install_gcn.sh @@ -93,12 +93,12 @@ write_hook() { # env_name repo_path rm -f "$act" "$deact" # --- activation hook ----------------------------------------------------- - cat <<'ACTHOOK' >"$act" + cat <"$act" # TS-GCN hook – $(date +%F) export TSGCN_ROOT="$repo" -case ":$PYTHONPATH:" in - *":$TSGCN_ROOT:") ;; \ - *) export PYTHONPATH="$TSGCN_ROOT:\${PYTHONPATH:-}" ;; +case ":\$PYTHONPATH:" in + *":\$TSGCN_ROOT:") ;; \ + *) export PYTHONPATH="\$TSGCN_ROOT:\${PYTHONPATH:-}" ;; esac ACTHOOK @@ -182,46 +182,43 @@ CORE_PKGS=( # ── inline env creation & unified PyTorch install -------------------------- if $COMMAND_PKG env list | awk '{print $1}' | grep -qx ts_gcn; then - $COMMAND_PKG env update -n ts_gcn \ + $COMMAND_PKG install -n ts_gcn \ -c schrodinger -c conda-forge \ --channel-priority flexible \ "${CORE_PKGS[@]}" \ - --prune -y + --yes else - $COMMAND_PKG env create -n ts_gcn \ + $COMMAND_PKG create -n ts_gcn \ -c schrodinger -c conda-forge \ --channel-priority flexible \ "${CORE_PKGS[@]}" \ - -y + --yes fi - # 2) activate it - we set +u to avoid printing variable names - # that are not set yet - set +u; $COMMAND_PKG activate ts_gcn; set -u - - # 3) pip‐install exactly the CPU or CUDA wheels (no ROCm on that index) - WHEEL=https://download.pytorch.org/whl/torch_stable.html - if [[ $CUDA_VERSION == cpu ]]; then -pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f $WHEEL - else - pip install torch==1.7.1+${CUDA_VERSION} \ - torchvision==0.8.2+${CUDA_VERSION} \ - torchaudio==0.7.2+${CUDA_VERSION} \ - -f $WHEEL - fi - # for PyG wheels use the official PyG index—with a real '+' in the URL - TORCH_VER=1.7.1 - WHEEL_URL="https://pytorch-geometric.com/whl/torch-${TORCH_VER}+${CUDA_VERSION}.html" - - # install ONLY the prebuilt binaries, never fall back to source - pip install torch-scatter -f "$WHEEL_URL" --only-binary torch-scatter - pip install torch-sparse -f "$WHEEL_URL" --only-binary torch-sparse - pip install torch-cluster -f "$WHEEL_URL" --only-binary torch-cluster - pip install torch-spline-conv -f "$WHEEL_URL" --only-binary torch-spline-conv - - # finally the meta‐package (this one can install from PyPI) - pip install torch-geometric - echo "✅ ts_gcn environment ready" +# 2) pip‐install exactly the CPU or CUDA wheels (no ROCm on that index) +PIP_RUN=("$COMMAND_PKG" run -n ts_gcn) +WHEEL=https://download.pytorch.org/whl/torch_stable.html +if [[ $CUDA_VERSION == cpu ]]; then + "${PIP_RUN[@]}" pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f $WHEEL +else + "${PIP_RUN[@]}" pip install torch==1.7.1+${CUDA_VERSION} \ + torchvision==0.8.2+${CUDA_VERSION} \ + torchaudio==0.7.2+${CUDA_VERSION} \ + -f $WHEEL +fi +# for PyG wheels use the official PyG index—with a real '+' in the URL +TORCH_VER=1.7.1 +WHEEL_URL="https://pytorch-geometric.com/whl/torch-${TORCH_VER}+${CUDA_VERSION}.html" + +# install ONLY the prebuilt binaries, never fall back to source +"${PIP_RUN[@]}" pip install torch-scatter -f "$WHEEL_URL" --only-binary torch-scatter +"${PIP_RUN[@]}" pip install torch-sparse -f "$WHEEL_URL" --only-binary torch-sparse +"${PIP_RUN[@]}" pip install torch-cluster -f "$WHEEL_URL" --only-binary torch-cluster +"${PIP_RUN[@]}" pip install torch-spline-conv -f "$WHEEL_URL" --only-binary torch-spline-conv + +# finally the meta‐package (this one can install from PyPI) +"${PIP_RUN[@]}" pip install torch-geometric +echo "✅ ts_gcn environment ready" # ── write hooks into conda envs if required ------------------------------- if [[ $MODE == conda ]]; then diff --git a/devtools/install_pyrdl.sh b/devtools/install_pyrdl.sh index 2b2cc9340c..87f1ccf454 100644 --- a/devtools/install_pyrdl.sh +++ b/devtools/install_pyrdl.sh @@ -49,8 +49,8 @@ fi # Ensure CMake is installed in the environment if ! command -v cmake &> /dev/null; then - echo "Installing CMake..." - "$COMMAND_PKG" install -y cmake + echo "Installing CMake into arc_env..." + "$COMMAND_PKG" install -n arc_env -c conda-forge -y cmake fi # Clone and build RingDecomposerLib diff --git a/devtools/install_rmg.sh b/devtools/install_rmg.sh index 036a6449ce..0a35192627 100644 --- a/devtools/install_rmg.sh +++ b/devtools/install_rmg.sh @@ -176,21 +176,34 @@ fi ACTIVE_RE="^[[:space:]]*[^#].*${RMG_PY_PATH//\//\\/}" # uncommented, contains path COMMENT_RE="^[[:space:]]*#.*${RMG_PY_PATH//\//\\/}" # commented-out, contains path -NEW_LINE='export PATH="$PATH:'"$RMG_PY_PATH"'"' -# If PATH_ADD is true, add RMG-Py to PATH +# If PATH_ADD is true, add RMG-Py to PATH/PYTHONPATH via bashrc if [ "$MODE" == path ]; then if grep -Eq "$ACTIVE_RE" "$RC"; then printf 'ℹ️ RMG-Py already active in %s\n' "$RC" elif grep -Eq "$COMMENT_RE" "$RC"; then - printf '✅ Found commented entry; adding new active line\n' - printf '\n# RMG-Py added on %s\n%s\n' "$(date +%F)" "$NEW_LINE" >> "$RC" + printf '✅ Found commented entry; adding new active block\n' + cat <> "$RC" + +# RMG-Py added on $(date +%F) +export RMG_PY_PATH="$RMG_PY_PATH" +case ":\$PATH:" in *":\$RMG_PY_PATH:"*) ;; *) export PATH="\$PATH:\$RMG_PY_PATH" ;; esac +case "\${PYTHONPATH:+:\$PYTHONPATH:}" in *":\$RMG_PY_PATH:"*) ;; \ + *) export PYTHONPATH="\${PYTHONPATH:+\$PYTHONPATH:}\$RMG_PY_PATH" ;; esac +EOF else - printf '✅ No entry found; adding new active line\n' - printf '\n# RMG-Py added on %s\n%s\n' "$(date +%F)" "$NEW_LINE" >> "$RC" + printf '✅ No entry found; adding new active block\n' + cat <> "$RC" + +# RMG-Py added on $(date +%F) +export RMG_PY_PATH="$RMG_PY_PATH" +case ":\$PATH:" in *":\$RMG_PY_PATH:"*) ;; *) export PATH="\$PATH:\$RMG_PY_PATH" ;; esac +case "\${PYTHONPATH:+:\$PYTHONPATH:}" in *":\$RMG_PY_PATH:"*) ;; \ + *) export PYTHONPATH="\${PYTHONPATH:+\$PYTHONPATH:}\$RMG_PY_PATH" ;; esac +EOF fi elif [ "$MODE" == conda ]; then # conda envs already have the RMG_PY_PATH in PATH, so no need to add it diff --git a/devtools/install_torchani.sh b/devtools/install_torchani.sh index 2ceca9ac5c..1536609abf 100644 --- a/devtools/install_torchani.sh +++ b/devtools/install_torchani.sh @@ -2,9 +2,10 @@ set -euo pipefail # Enable tracing of each command, but tee it to a logfile +LOGFILE="tani_env_setup.log" exec 3>&1 4>&2 trap 'exec 2>&4 1>&3' EXIT -exec 1> >(tee .log) 2>&1 +exec 1> >(tee "$LOGFILE") 2>&1 set -x echo ">>> Starting TANI environment setup at $(date)" @@ -53,7 +54,7 @@ fi echo ">>> Creating conda env from $ENV_YAML (name=$ENV_NAME)" if ! $COMMAND_PKG env create -n "$ENV_NAME" -f "$ENV_YAML" -v; then echo "❌ Environment creation failed. Dumping last 200 lines of log:" - tail -n 200 tani_env_setup.log + tail -n 200 "$LOGFILE" echo "---- Disk usage at failure ----" df -h . exit 1 diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 0d32bd5b86..26f3cdac8b 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -51,12 +51,20 @@ Install dependencies conda activate arc_env -- Install the latest **DEVELOPER** version of RMG (which has Arkane). - It is recommended to follow RMG's `Developer installation by source using Anaconda +- RMG-Py/Arkane is required: ARC relies on it for thermochemistry, kinetics parsing, conformer handling, and file formats. + Without RMG-Py, many ARC workflows (e.g., thermo, Arkane post‑processing) will not run. + You can either install RMG-Py manually following the `Developer install guide `_ instructions. - Make sure to add RMG-Py to your PATH and PYTHONPATH variables as explained in RMG's documentation. -- Type ``make install-all`` under the ARC repository folder to install the following 3rd party repositories: + #for-developers-installation-by-source-using-anaconda-environment>`_ or let the ARC installers handle it automatically + (see below). Ensure RMG-Py is on ``PATH`` and ``PYTHONPATH`` (or use the conda hook option below) so ARC can import it. +- Run ``make install`` under the ARC repository folder to install ARC plus RMG-Py/RMG-database (default bashrc/``PATH`` wiring), + PyRDL, and external tools (CREST, AutoTST, KinBot, TS-GCN, xTB, Sella, TorchANI, OpenBabel). + Use ``make install-lite`` (no externals) or call ``bash devtools/install_all.sh --no-rmg`` if you want to skip RMG. + These installers pin known-good versions (e.g., CREST 2.12) and set up per-tool conda environments where appropriate. + Additional installer knobs: + + * ``bash devtools/install_all.sh --rmg-conda`` installs RMG-Py into a conda env and adds activation hooks (vs the default bashrc PATH/PYTHONPATH wiring). + * ``--no-ext`` skips external tools; ``--no-arc`` skips ARC env creation/compile (useful in CI); ``--no-clean`` retains conda/mamba caches. `AutoTST `_ (`West et al. `_), `KinBot `_