From 8c1ef144d1bd1a35804693795eefb5cea56c1096 Mon Sep 17 00:00:00 2001
From: Calvin <calvinpieters@gmail.com>
Date: Tue, 25 Nov 2025 12:55:10 +0200
Subject: [PATCH 1/9] Added Crest into heuristics Fixed Arkane to check LoT by
 ignoring the year suffix (plans to implement year in the input) AutoTST does
 not attempt to import autotst (due to its reliance on RMG-Py env).

---
 arc/job/adapters/ts/autotst_ts.py | 151 +++++++----
 arc/job/adapters/ts/heuristics.py | 416 +++++++++++++++++++++++++++++-
 arc/settings/settings.py          | 137 ++++++++++
 arc/species/converter.py          |  84 ++++++
 arc/species/species.py            |   1 +
 arc/statmech/arkane.py            | 240 +++++++++++++----
 6 files changed, 924 insertions(+), 105 deletions(-)

diff --git a/arc/job/adapters/ts/autotst_ts.py b/arc/job/adapters/ts/autotst_ts.py
index bd2aabea0f..d2058ecddc 100644
--- a/arc/job/adapters/ts/autotst_ts.py
+++ b/arc/job/adapters/ts/autotst_ts.py
@@ -20,16 +20,19 @@
 from arc.species.species import ARCSpecies, TSGuess, colliding_atoms
 
 HAS_AUTOTST = True
-try:
-    from autotst.reaction import Reaction as AutoTST_Reaction
-except (ImportError, ModuleNotFoundError):
-    HAS_AUTOTST = False
+# try:
+#     from autotst.reaction import Reaction as AutoTST_Reaction
+# except (ImportError, ModuleNotFoundError):
+#     HAS_AUTOTST = False
 
 if TYPE_CHECKING:
     from arc.level import Level
+    from autotst.reaction import Reaction as AutoTST_Reaction  # noqa: F401
 
 
 AUTOTST_PYTHON = settings['AUTOTST_PYTHON']
+if AUTOTST_PYTHON is None:
+    HAS_AUTOTST = False
 
 logger = get_logger()
 
@@ -218,9 +221,14 @@ def execute_incore(self):
         """
         Execute a job incore.
         """
-        if not HAS_AUTOTST:
-            raise ModuleNotFoundError(f'Could not import AutoTST, make sure it is properly installed.\n'
-                                      f'See {self.url} for more information, or use the Makefile provided with ARC.')
+        # 1) Check that ARC knows *which* Python to use for AutoTST
+        if not AUTOTST_PYTHON:
+            raise ModuleNotFoundError(
+                "settings['AUTOTST_PYTHON'] is not set. "
+                "ARC cannot run AutoTST as a subprocess without this. "
+                "Set AUTOTST_PYTHON in your ARC settings to the Python executable of your tst_env."
+            )
+
         self._log_job_execution()
         self.initial_time = self.initial_time if self.initial_time else datetime.datetime.now()
 
@@ -234,76 +242,122 @@ def execute_incore(self):
                                                 charge=rxn.charge,
                                                 multiplicity=rxn.multiplicity,
                                                 )
+
                 reaction_label_fwd = get_autotst_reaction_string(rxn)
-                reaction_label_rev = get_autotst_reaction_string(ARCReaction(r_species=rxn.p_species,
-                                                                             p_species=rxn.r_species,
-                                                                             reactants=rxn.products,
-                                                                             products=rxn.reactants))
+                reaction_label_rev = get_autotst_reaction_string(
+                    ARCReaction(
+                        r_species=rxn.p_species,
+                        p_species=rxn.r_species,
+                        reactants=rxn.products,
+                        products=rxn.reactants,
+                    )
+                )
 
                 i = 0
-                for reaction_label, direction in zip([reaction_label_fwd, reaction_label_rev], ['F', 'R']):
-                    # run AutoTST as a subprocess in the desired direction
-                    script_path = os.path.join(ARC_PATH, 'arc', 'job', 'adapters', 'scripts', 'autotst_script.py')
-                    commands = ['source ~/.bashrc', f'"{AUTOTST_PYTHON}" "{script_path}" "{reaction_label}" "{self.output_path}"']
+                for reaction_label, direction in zip(
+                    [reaction_label_fwd, reaction_label_rev],
+                    ['F', 'R'],
+                ):
+                    script_path = os.path.join(
+                        ARC_PATH, 'arc', 'job', 'adapters', 'scripts', 'autotst_script.py'
+                    )
+                    # 2) Build the bash command to run tst_env’s Python on the script
+                    commands = [
+                        'source ~/.bashrc',
+                        f'"{AUTOTST_PYTHON}" "{script_path}" "{reaction_label}" "{self.output_path}"',
+                    ]
                     command = '; '.join(commands)
 
                     tic = datetime.datetime.now()
 
-                    output = subprocess.run(command, shell=True, executable='/bin/bash')
+                    # 3) Capture stdout/stderr so we can diagnose missing AutoTST
+                    output = subprocess.run(
+                        command,
+                        shell=True,
+                        executable='/bin/bash',
+                        capture_output=True,
+                        text=True,
+                    )
 
                     tok = datetime.datetime.now() - tic
 
                     if output.returncode:
-                        direction_str = 'forward' if direction == 'F' else 'reverse'
-                        logger.warning(f'AutoTST subprocess did not give a successful return code for {rxn} '
-                                       f'in the {direction_str} direction.\n'
-                                       f'Got return code: {output.returncode}\n'
-                                       f'stdout: {output.stdout}\n'
-                                       f'stderr: {output.stderr}')
+                        stderr = output.stderr or ""
+                        stdout = output.stdout or ""
+
+                        # Special case: autotst itself is missing in tst_env
+                        if 'No module named' in stderr and 'autotst' in stderr:
+                            logger.error(
+                                f"AutoTST subprocess failed for {rxn} because the 'autotst' "
+                                f"package is not importable in the tst_env used by AUTOTST_PYTHON:\n"
+                                f"{stderr}"
+                            )
+                        else:
+                            direction_str = 'forward' if direction == 'F' else 'reverse'
+                            logger.warning(
+                                f'AutoTST subprocess did not give a successful return code for {rxn} '
+                                f'in the {direction_str} direction.\n'
+                                f'Got return code: {output.returncode}\n'
+                                f'stdout: {stdout}\n'
+                                f'stderr: {stderr}'
+                            )
+
+                    # 4) Check for the YAML output and add TS guesses as before
                     if os.path.isfile(self.output_path):
                         results = read_yaml_file(path=self.output_path)
                         if results:
                             for result in results:
-                                xyz = xyz_from_data(coords=result['coords'], numbers=result['numbers'])
+                                xyz = xyz_from_data(
+                                    coords=result['coords'],
+                                    numbers=result['numbers'],
+                                )
                                 unique = True
                                 for other_tsg in rxn.ts_species.ts_guesses:
-                                    if other_tsg.success and almost_equal_coords(xyz, other_tsg.initial_xyz):
+                                    if other_tsg.success and almost_equal_coords(
+                                        xyz, other_tsg.initial_xyz
+                                    ):
                                         if 'autotst' not in other_tsg.method.lower():
                                             other_tsg.method += ' and AutoTST'
                                         unique = False
                                         break
                                 if unique and not colliding_atoms(xyz):
-                                    ts_guess = TSGuess(method='AutoTST',
-                                                       method_direction=direction,
-                                                       method_index=i,
-                                                       t0=tic,
-                                                       execution_time=tok,
-                                                       xyz=xyz,
-                                                       success=True,
-                                                       index=len(rxn.ts_species.ts_guesses),
-                                                       )
+                                    ts_guess = TSGuess(
+                                        method='AutoTST',
+                                        method_direction=direction,
+                                        method_index=i,
+                                        t0=tic,
+                                        execution_time=tok,
+                                        xyz=xyz,
+                                        success=True,
+                                        index=len(rxn.ts_species.ts_guesses),
+                                    )
                                     rxn.ts_species.ts_guesses.append(ts_guess)
-                                    save_geo(xyz=xyz,
-                                             path=self.local_path,
-                                             filename=f'AutoTST {direction}',
-                                             format_='xyz',
-                                             comment=f'AutoTST {direction}',
-                                             )
+                                    save_geo(
+                                        xyz=xyz,
+                                        path=self.local_path,
+                                        filename=f'AutoTST {direction}',
+                                        format_='xyz',
+                                        comment=f'AutoTST {direction}',
+                                    )
                                     i += 1
                         else:
-                            ts_guess = TSGuess(method=f'AutoTST',
-                                               method_direction=direction,
-                                               method_index=i,
-                                               t0=tic,
-                                               execution_time=tok,
-                                               success=False,
-                                               index=len(rxn.ts_species.ts_guesses),
-                                               )
+                            ts_guess = TSGuess(
+                                method='AutoTST',
+                                method_direction=direction,
+                                method_index=i,
+                                t0=tic,
+                                execution_time=tok,
+                                success=False,
+                                index=len(rxn.ts_species.ts_guesses),
+                            )
                             rxn.ts_species.ts_guesses.append(ts_guess)
                             i += 1
 
             if len(self.reactions) < 5:
-                successes = len([tsg for tsg in rxn.ts_species.ts_guesses if tsg.success and 'autotst' in tsg.method])
+                successes = len(
+                    [tsg for tsg in rxn.ts_species.ts_guesses
+                    if tsg.success and 'autotst' in tsg.method.lower()]
+                )
                 if successes:
                     logger.info(f'AutoTST successfully found {successes} TS guesses for {rxn.label}.')
                 else:
@@ -311,6 +365,7 @@ def execute_incore(self):
 
         self.final_time = datetime.datetime.now()
 
+
     def execute_queue(self):
         """
         (Execute a job to the server's queue.)
diff --git a/arc/job/adapters/ts/heuristics.py b/arc/job/adapters/ts/heuristics.py
index aa281542ae..436e16b537 100644
--- a/arc/job/adapters/ts/heuristics.py
+++ b/arc/job/adapters/ts/heuristics.py
@@ -14,17 +14,36 @@
     - Think: two H sites on a CH2 element, one being abstracted. On which one in the reactant do we put the abstractor?
       Can/should we try both?
 """
-
 import datetime
 import itertools
+import math
+import os
+import re
+import subprocess
+import time
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+import re
+
+import numpy as np
+import pandas as pd
 
+from arc.imports import settings, submit_scripts
 from arc.common import almost_equal_coords, get_logger, is_angle_linear, is_xyz_linear, key_by_val
 from arc.job.adapter import JobAdapter
+from arc.job.local import check_job_status, submit_job
 from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family
 from arc.job.factory import register_job_adapter
 from arc.plotter import save_geo
-from arc.species.converter import compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz
+from arc.species.converter import (
+    compare_zmats,
+    relocate_zmat_dummy_atoms_to_the_end,
+    str_to_str,
+    str_to_xyz,
+    xyz_to_dmat,
+    xyz_to_str,
+    zmat_from_xyz,
+    zmat_to_xyz,
+)
 from arc.mapping.engine import map_two_species
 from arc.molecule.molecule import Molecule
 from arc.species.species import ARCSpecies, TSGuess, SpeciesError, colliding_atoms
@@ -34,6 +53,13 @@
     from arc.level import Level
     from arc.reaction import ARCReaction
 
+try:
+    CREST_PATH = settings["CREST_PATH"]
+    CREST_ENV_PATH = settings["CREST_ENV_PATH"]
+    HAS_CREST = True
+    SERVERS = settings["servers"]
+except KeyError:
+    HAS_CREST = False
 
 DIHEDRAL_INCREMENT = 30
 
@@ -252,7 +278,7 @@ def execute_incore(self):
             if rxn.family == 'H_Abstraction':
                 tsg = TSGuess(method='Heuristics')
                 tsg.tic()
-                xyzs = h_abstraction(reaction=rxn, dihedral_increment=self.dihedral_increment)
+                xyzs = h_abstraction(reaction=rxn, dihedral_increment=self.dihedral_increment, path=self.local_path)
                 tsg.tok()
 
             for method_index, xyz in enumerate(xyzs):
@@ -836,6 +862,7 @@ def h_abstraction(reaction: 'ARCReaction',
                   r2_stretch: float = 1.2,
                   a2: float = 180,
                   dihedral_increment: Optional[int] = None,
+                  path: str = ""
                   ) -> List[dict]:
     """
     Generate TS guesses for reactions of the RMG ``H_Abstraction`` family.
@@ -853,6 +880,7 @@ def h_abstraction(reaction: 'ARCReaction',
         Entries are Cartesian coordinates of TS guesses for all reactions.
     """
     xyz_guesses = list()
+    crest_paths = list()
     dihedral_increment = dihedral_increment or DIHEDRAL_INCREMENT
     reactants_reversed, products_reversed = are_h_abs_wells_reversed(rxn=reaction, product_dict=reaction.product_dicts[0])
     for product_dict in reaction.product_dicts:
@@ -898,7 +926,7 @@ def h_abstraction(reaction: 'ARCReaction',
             d2_d3_product = [(None, None)]
 
         zmats = list()
-        for d2, d3 in d2_d3_product:
+        for iteration, (d2, d3) in enumerate(d2_d3_product):
             xyz_guess = None
             try:
                 xyz_guess = combine_coordinates_with_redundant_atoms(
@@ -930,7 +958,385 @@ def h_abstraction(reaction: 'ARCReaction',
                     # This TS is unique, and has no atom collisions.
                     zmats.append(zmat_guess)
                     xyz_guesses.append(xyz_guess)
+        
+                if HAS_CREST:
+                    xyz_guess_crest = xyz_guess.copy()
+                    if isinstance(xyz_guess_crest, dict):
+                        df_dmat = convert_xyz_to_df(xyz_guess_crest)
+                    elif isinstance(xyz_guess_crest, str):
+                        xyz = str_to_xyz(xyz_guess_crest)
+                        df_dmat = convert_xyz_to_df(xyz)
+                    elif isinstance(xyz_guess_crest, list):
+                        xyz_temp = "\n".join(xyz_guess_crest)
+                        xyz_to_dmat = str_to_xyz(xyz_temp)
+                        df_dmat = convert_xyz_to_df(xyz_to_dmat)
+
+                    try:
+                        h_abs_atoms_dict = get_h_abs_atoms(df_dmat)
+                        crest_path = crest_ts_conformer_search(
+                            xyz_guess_crest,
+                            h_abs_atoms_dict["A"],
+                            h_abs_atoms_dict["H"],
+                            h_abs_atoms_dict["B"],
+                            path=path,
+                            xyz_crest_int=iteration,
+                        )
+                        crest_paths.append(crest_path)
+                    except (ValueError, KeyError) as e:
+                        logger.error(f"Could not determine the H abstraction atoms, got:\n{e}")
+
+    if crest_paths:
+        crest_jobs = submit_crest_jobs(crest_paths)
+        monitor_crest_jobs(crest_jobs)  # Keep checking job statuses until complete
+        xyz_guesses_crest = process_completed_jobs(crest_jobs)
+        for xyz_guess_crest in xyz_guesses_crest:
+            zmat_guess = zmat_from_xyz(xyz_guess_crest, is_ts=True)
+            is_unique = True  # Assume the current Z-matrix is unique
+            for existing_zmat_guess in zmats:
+                if compare_zmats(existing_zmat_guess, zmat_guess):
+                    is_unique = False  # Found a match, mark as not unique
+                    break  # Exit this inner loop only
+            if is_unique:
+                # If no match was found, append to lists
+                zmats.append(zmat_guess)
+                xyz_guesses.append(xyz_guess_crest)
+    else:
+        logger.error("No CREST paths found")
+
+    return xyz_guesses
+
+
+def crest_ts_conformer_search(
+    xyz_guess: dict,
+    a_atom: int,
+    h_atom: int,
+    b_atom: int,
+    path: str = "",
+    xyz_crest_int: int = 0,
+) -> str:
+    """
+    Prepare a CREST TS conformer search job:
+    - Write coords.ref and constraints.inp
+    - Write a PBS submit script using submit_scripts["local"]["crest"]
+    - Return the CREST job directory path
+    """
+    path = os.path.join(path, f"crest_{xyz_crest_int}")
+    os.makedirs(path, exist_ok=True)
+
+    # --- coords.ref ---
+    symbols = xyz_guess["symbols"]
+    converted_coords = str_to_str(
+        xyz_str=xyz_to_str(xyz_guess),
+        reverse_atoms=True,
+        convert_to="bohr",
+    )
+    coords_ref_content = f"$coord\n{converted_coords}\n$end\n"
+    coords_ref_path = os.path.join(path, "coords.ref")
+    with open(coords_ref_path, "w") as f:
+        f.write(coords_ref_content)
+
+    # --- constraints.inp ---
+    num_atoms = len(symbols)
+    # CREST uses 1-based indices
+    a_atom += 1
+    h_atom += 1
+    b_atom += 1
+
+    # All atoms not directly involved in A–H–B go into the metadynamics atom list
+    list_of_atoms_numbers_not_participating_in_reaction = [
+        i for i in range(1, num_atoms + 1) if i not in [a_atom, h_atom, b_atom]
+    ]
+
+    constraints_path = os.path.join(path, "constraints.inp")
+    with open(constraints_path, "w") as f:
+        f.write("$constrain\n")
+        f.write(f"  atoms: {a_atom}, {h_atom}, {b_atom}\n")
+        f.write("  force constant: 0.5\n")
+        f.write("  reference=coords.ref\n")
+        f.write(f"  distance: {a_atom}, {h_atom}, auto\n")
+        f.write(f"  distance: {h_atom}, {b_atom}, auto\n")
+        f.write("$metadyn\n")
+        if list_of_atoms_numbers_not_participating_in_reaction:
+            f.write(
+                f'  atoms: {", ".join(map(str, list_of_atoms_numbers_not_participating_in_reaction))}\n'
+            )
+        f.write("$end\n")
+
+    # --- build CREST command string ---
+    # Example: crest coords.ref --cinp constraints.inp --noreftopo -T 8
+    cpus = int(SERVERS["local"].get("cpus", 8))
+    if CREST_ENV_PATH:
+        crest_exe = "crest"
+    else:
+        crest_exe = CREST_PATH if CREST_PATH is not None else "crest"
+
+    commands = [
+        crest_exe,
+        "coords.ref",
+        "--cinp constraints.inp",
+        "--noreftopo",
+        f'-T {SERVERS["local"].get("cpus", 8)}',
+    ]
+    command = " ".join(commands)
+
+    # --- activation line (optional) ---
+    activation_line = CREST_ENV_PATH or ""
+
+    if SERVERS.get("local") is not None:
+        cluster_soft = SERVERS["local"]["cluster_soft"].lower()
+
+        if cluster_soft in ["condor", "htcondor"]:
+            # HTCondor branch (kept for completeness – you can delete if you don't use it)
+            sub_job = submit_scripts["local"]["crest"]
+            format_params = {
+                "name": f"crest_{xyz_crest_int}",
+                "cpus": cpus,
+                "memory": int(SERVERS["local"].get("memory", 32.0) * 1024),
+            }
+            sub_job = sub_job.format(**format_params)
+
+            with open(
+                os.path.join(path, settings["submit_filenames"]["HTCondor"]), "w"
+            ) as f:
+                f.write(sub_job)
+
+            crest_job = submit_scripts["local"]["crest_job"]
+            crest_job = crest_job.format(
+                path=path,
+                activation_line=activation_line,
+                commands=command,
+            )
+
+            with open(os.path.join(path, "job.sh"), "w") as f:
+                f.write(crest_job)
+            os.chmod(os.path.join(path, "job.sh"), 0o777)
+
+            # Pre-create out/err for any status checkers that expect them
+            for fname in ("out.txt", "err.txt"):
+                fpath = os.path.join(path, fname)
+                if not os.path.exists(fpath):
+                    with open(fpath, "w") as f:
+                        f.write("")
+                    os.chmod(fpath, 0o777)
+
+        elif cluster_soft == "pbs":
+            # PBS branch that matches your 'crest' template above
+            sub_job = submit_scripts["local"]["crest"]
+            format_params = {
+                "queue": SERVERS["local"].get("queue", "alon_q"),
+                "name": f"crest_{xyz_crest_int}",
+                "cpus": cpus,
+                # 'memory' is in GB for the template: mem={memory}gb
+                "memory": int(
+                    SERVERS["local"].get("memory", 32)
+                    if SERVERS["local"].get("memory", 32) < 60
+                    else 40
+                ),
+                "activation_line": activation_line,
+                "commands": command,
+            }
+            sub_job = sub_job.format(**format_params)
+
+            submit_filename = settings["submit_filenames"]["PBS"]  # usually 'submit.sh'
+            submit_path = os.path.join(path, submit_filename)
+            with open(submit_path, "w") as f:
+                f.write(sub_job)
+            os.chmod(submit_path, 0o755)
+
+        else:
+            raise ValueError(f"Unsupported cluster_soft for CREST: {cluster_soft!r}")
+
+    return path
+
+
+def submit_crest_jobs(crest_paths: List[str]) -> None:
+    """
+    Submit CREST jobs to the server.
+
+    Args:
+        crest_paths (List[str]): List of paths to the CREST directories.
+
+    Returns:
+        dict: A dictionary containing job IDs as keys and their statuses as values.
+    """
+    crest_jobs = {}
+    for crest_path in crest_paths:
+        job_status, job_id = submit_job(path=crest_path)
+        logger.info(f"CREST job {job_id} submitted for {crest_path}")
+        crest_jobs[job_id] = {"path": crest_path, "status": job_status}
+    return crest_jobs
+
+
+def monitor_crest_jobs(crest_jobs: dict, check_interval: int = 300) -> None:
+    """
+    Monitor CREST jobs until they are complete.
+
+    Args:
+        crest_jobs (dict): Dictionary containing job information (job ID, path, and status).
+        check_interval (int): Time interval (in seconds) to wait between status checks.
+    """
+    while True:
+        all_done = True
+        for job_id, job_info in crest_jobs.items():
+            if job_info["status"] not in ["done", "failed"]:
+                try:
+                    job_info["status"] = check_job_status(job_id)  # Update job status
+                except Exception as e:
+                    logger.error(f"Error checking job status for job {job_id}: {e}")
+                    job_info["status"] = "failed"
+                if job_info["status"] not in ["done", "failed"]:
+                    all_done = False
+        if all_done:
+            break
+        time.sleep(min(check_interval, 100))
+
+
+def process_completed_jobs(crest_jobs: dict) -> list:
+    """
+    Process the completed CREST jobs and update XYZ guesses.
+
+    Args:
+        crest_jobs (dict): Dictionary containing job information.
+        xyz_guesses (list): List to store the resulting XYZ guesses.
+    """
+    xyz_guesses = []
+    for job_id, job_info in crest_jobs.items():
+        crest_path = job_info["path"]
+        if job_info["status"] == "done":
+            crest_best_path = os.path.join(crest_path, "crest_best.xyz")
+            if os.path.exists(crest_best_path):
+                with open(crest_best_path, "r") as f:
+                    content = f.read()
+                xyz_guess = str_to_xyz(content)
+                xyz_guesses.append(xyz_guess)
+            else:
+                logger.error(f"crest_best.xyz not found in {crest_path}")
+        elif job_info["status"] == "failed":
+            logger.error(f"CREST job failed for {crest_path}")
+
     return xyz_guesses
 
 
-register_job_adapter('heuristics', HeuristicsAdapter)
+def extract_digits(s: str) -> int:
+    """
+    Extract the first integer from a string
+
+    Args:
+        s (str): The string to extract the integer from
+
+    Returns:
+        int: The first integer in the string
+
+    """
+    return int(re.sub(r"[^\d]", "", s))
+
+
+def convert_xyz_to_df(xyz: dict) -> pd.DataFrame:
+    """
+    Convert a dictionary of xyz coords to a pandas DataFrame with bond distances
+
+    Args:
+        xyz (dict): The xyz coordinates of the molecule
+
+    Return:
+        pd.DataFrame: The xyz coordinates as a pandas DataFrame
+
+    """
+    symbols = xyz["symbols"]
+    symbol_enum = [f"{symbol}{i}" for i, symbol in enumerate(symbols)]
+    ts_dmat = xyz_to_dmat(xyz)
+
+    return pd.DataFrame(ts_dmat, columns=symbol_enum, index=symbol_enum)
+
+
+def get_h_abs_atoms(dataframe: pd.DataFrame) -> dict:
+    """
+    Get the donating/accepting hydrogen atom, and the two heavy atoms that are bonded to it
+
+    Args:
+        dataframe (pd.DataFrame): The dataframe of the bond distances, columns and index are the atom symbols
+
+    Returns:
+        dict: The hydrogen atom and the two heavy atoms. The keys are 'H', 'A', 'B'
+    """
+
+    closest_atoms = {}
+    for index, row in dataframe.iterrows():
+
+        row[index] = np.inf
+        closest = row.nsmallest(2).index.tolist()
+        closest_atoms[index] = closest
+
+    hydrogen_keys = [key for key in dataframe.index if key.startswith("H")]
+    condition_occurrences = []
+
+    for hydrogen_key in hydrogen_keys:
+        atom_neighbours = closest_atoms[hydrogen_key]
+        is_heavy_present = any(
+            atom for atom in closest_atoms if not atom.startswith("H")
+        )
+        if_hydrogen_present = any(
+            atom
+            for atom in closest_atoms
+            if atom.startswith("H") and atom != hydrogen_key
+        )
+
+        if is_heavy_present and if_hydrogen_present:
+            # Store the details of this occurrence
+            condition_occurrences.append(
+                {"H": hydrogen_key, "A": atom_neighbours[0], "B": atom_neighbours[1]}
+            )
+
+    # Check if the condition was met
+    if condition_occurrences:
+        if len(condition_occurrences) > 1:
+            # Store distances to decide which occurrence to use
+            occurrence_distances = []
+            for occurrence in condition_occurrences:
+                # Calculate the sum of distances to the two heavy atoms
+                hydrogen_key = f"{occurrence['H']}"
+                heavy_atoms = [f"{occurrence['A']}", f"{occurrence['B']}"]
+                try:
+                    distances = dataframe.loc[hydrogen_key, heavy_atoms].sum()
+                    occurrence_distances.append((occurrence, distances))
+                except KeyError as e:
+                    print(f"Error accessing distances for occurrence {occurrence}: {e}")
+
+            # Select the occurrence with the smallest distance
+            best_occurrence = min(occurrence_distances, key=lambda x: x[1])[0]
+            return {
+                "H": extract_digits(best_occurrence["H"]),
+                "A": extract_digits(best_occurrence["A"]),
+                "B": extract_digits(best_occurrence["B"]),
+            }
+    else:
+
+        # Check the all the hydrogen atoms, and see the closest two heavy atoms and aggregate their distances to determine which Hyodrogen atom has the lowest distance aggregate
+        min_distance = np.inf
+        selected_hydrogen = None
+        selected_heavy_atoms = None
+
+        for hydrogen_key in hydrogen_keys:
+            atom_neighbours = closest_atoms[hydrogen_key]
+            heavy_atoms = [atom for atom in atom_neighbours if not atom.startswith("H")]
+
+            if len(heavy_atoms) < 2:
+                continue
+
+            distances = dataframe.loc[hydrogen_key, heavy_atoms[:2]].sum()
+            if distances < min_distance:
+                min_distance = distances
+                selected_hydrogen = hydrogen_key
+                selected_heavy_atoms = heavy_atoms
+
+        if selected_hydrogen:
+            return {
+                "H": extract_digits(selected_hydrogen),
+                "A": extract_digits(selected_heavy_atoms[0]),
+                "B": extract_digits(selected_heavy_atoms[1]),
+            }
+        else:
+            raise ValueError("No valid hydrogen atom found.")
+
+
+register_job_adapter("heuristics", HeuristicsAdapter)
\ No newline at end of file
diff --git a/arc/settings/settings.py b/arc/settings/settings.py
index 8be7b76cd7..5315fa7598 100644
--- a/arc/settings/settings.py
+++ b/arc/settings/settings.py
@@ -8,6 +8,7 @@
 import os
 import string
 import sys
+import shutil
 
 # Users should update the following server dictionary.
 # Instructions for RSA key generation can be found here:
@@ -369,3 +370,139 @@ def find_executable(env_name, executable_name='python'):
     if path and os.path.isdir(path):
         RMG_DB_PATH = path
         break
+
+
+crest_path1 = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(sys.executable))),
+                                'crest_env', 'bin', 'crest')
+crest_path2 = os.path.join(home, 'anaconda3', 'envs', 'crest_env', 'bin', 'crest')
+crest_path3 = os.path.join(home, 'miniconda3', 'envs', 'crest_env', 'bin', 'crest')
+crest_path4 = os.path.join(home, '.conda', 'envs', 'crest_env', 'bin', 'crest')
+crest_path5 = os.path.join('/Local/ce_dana', 'anaconda3', 'envs', 'crest_env', 'bin', 'crest')
+crest_path6 = os.path.join(home, 'mambaforge', 'envs', 'crest_env', 'bin', 'crest')
+crest_path7 = os.path.join(home, 'micromamba', 'envs', 'crest_env', 'bin', 'crest')
+# Binary path for CREST
+
+
+for crest_path in [crest_path1, crest_path2, crest_path3, crest_path4, crest_path5, crest_path6, crest_path7]:
+    if os.path.isfile(crest_path):
+        CREST_PATH = crest_path
+        # check if using micromamba, mambaforge, anaconda3, miniconda3, or .conda
+        if 'micromamba' in crest_path:
+            #         CREST_ENV_PATH = "source ~/micromamba/etc/profile.d/micromamba.sh && micromamba activate crest_env"
+            CREST_ENV_PATH = "source ~/.bashrc && micromamba activate crest_env"
+        elif 'mambaforge' in crest_path:
+            CREST_ENV_PATH = "source ~/.bashrc && mamba activate crest_env"
+        elif 'anaconda3' in crest_path:
+            CREST_ENV_PATH = "source ~/.bashrc && conda activate crest_env"
+        elif 'miniconda3' in crest_path:
+            CREST_ENV_PATH = "source ~/.bashrc && conda activate crest_env"
+        elif '.conda' in crest_path:
+            CREST_ENV_PATH = "source ~/.bashrc && conda activate crest_env"
+        break
+# If the path (environment) does not exist, then we use the binary
+
+def parse_version(folder_name):
+    """
+    Parses the version from the folder name and returns a tuple for comparison.
+    Supports versions like: 3.0.2, v212, 2.1, 2
+    """
+    version_regex = re.compile(r"(?:v?(\d+)(?:\.(\d+))?(?:\.(\d+))?)", re.IGNORECASE)
+    match = version_regex.search(folder_name)
+    if not match:
+        return (0, 0, 0)
+
+    major = int(match.group(1)) if match.group(1) else 0
+    minor = int(match.group(2)) if match.group(2) else 0
+    patch = int(match.group(3)) if match.group(3) else 0
+
+    # Example: v212 → (2, 1, 2)
+    if major >= 100 and match.group(2) is None and match.group(3) is None:
+        s = str(major).rjust(3, "0")
+        major = int(s[0])
+        minor = int(s[1])
+        patch = int(s[2])
+
+    return (major, minor, patch)
+
+
+def find_highest_version_in_directory(directory, name_contains):
+    """
+    Finds the file with the highest version in a directory containing a specific string.
+    """
+    if not os.path.exists(directory):
+        return None
+
+    highest_version_path = None
+    highest_version = ()
+
+    for folder in os.listdir(directory):
+        file_path = os.path.join(directory, folder)
+        if name_contains.lower() in folder.lower() and os.path.isdir(file_path):
+            crest_path = os.path.join(file_path, "crest")
+            if os.path.isfile(crest_path) and os.access(crest_path, os.X_OK):
+                version = parse_version(folder)
+                if highest_version == () or version > highest_version:
+                    highest_version = version
+                    highest_version_path = crest_path
+    return highest_version_path
+
+
+def find_crest_executable():
+    """
+    Returns (crest_path, env_cmd):
+
+    - crest_path: full path to 'crest'
+    - env_cmd: shell snippet to activate its environment (may be "")
+    """
+    # Priority 1: /Local/ce_dana standalone builds
+    crest_path = find_highest_version_in_directory("/Local/ce_dana", "crest")
+    if crest_path and os.path.isfile(crest_path) and os.access(crest_path, os.X_OK):
+        # Standalone binary: no env activation needed
+        return crest_path, ""
+
+    # Priority 2: Conda/Mamba/Micromamba envs
+    home = os.path.expanduser("~")
+    potential_env_paths = [
+        os.path.join(home, "anaconda3", "envs", "crest_env", "bin", "crest"),
+        os.path.join(home, "miniconda3", "envs", "crest_env", "bin", "crest"),
+        os.path.join(home, "miniforge3", "envs", "crest_env", "bin", "crest"),
+        os.path.join(home, ".conda", "envs", "crest_env", "bin", "crest"),
+        os.path.join(home, "mambaforge", "envs", "crest_env", "bin", "crest"),
+        os.path.join(home, "micromamba", "envs", "crest_env", "bin", "crest"),
+    ]
+
+    # Also check the current env's bin
+    current_env_bin = os.path.dirname(sys.executable)
+    potential_env_paths.insert(0, os.path.join(current_env_bin, "crest"))
+
+    for crest_path in potential_env_paths:
+        if os.path.isfile(crest_path) and os.access(crest_path, os.X_OK):
+            # env_root = .../anaconda3 or .../miniforge3 or .../mambaforge etc.
+            env_root = crest_path.split("/envs/crest_env/")[0]
+            if "micromamba" in crest_path:
+                env_cmd = (
+                    f"source {env_root}/etc/profile.d/micromamba.sh && "
+                    f"micromamba activate crest_env"
+                )
+            elif any(
+                name in env_root
+                for name in ("anaconda3", "miniconda3", "miniforge3", "mambaforge", ".conda")
+            ):
+                env_cmd = (
+                    f"source {env_root}/etc/profile.d/conda.sh && "
+                    f"conda activate crest_env"
+                )
+            else:
+                # If for some reason it's just a random prefix with crest in bin
+                env_cmd = ""
+            return crest_path, env_cmd
+
+    # Priority 3: PATH
+    crest_in_path = shutil.which("crest")
+    if crest_in_path:
+        return crest_in_path, ""
+
+    return None, None
+
+
+CREST_PATH, CREST_ENV_PATH = find_crest_executable()
diff --git a/arc/species/converter.py b/arc/species/converter.py
index b8ec5e7d06..8489d8b317 100644
--- a/arc/species/converter.py
+++ b/arc/species/converter.py
@@ -44,6 +44,90 @@
 ob.obErrorLog.SetOutputLevel(0)
 logger = get_logger()
 
+def str_to_str(xyz_str: str,
+               reverse_atoms: bool = False,
+               units: str = 'angstrom',
+               convert_to: str = 'angstrom',
+               project_directory: Optional[str] = None
+               ) -> str:
+    """
+    Convert a string xyz format from `ATOM X Y Z` to `X Y Z ATOM`. Also, can convert units from `Angstrom` to `Bohr` and vice versa.
+
+    Args:
+        xyz_str (str): The string xyz format to be converted.
+        reverse_atoms (bool, optional): Whether to reverse the atoms and coordinates.
+        convert_to (str, optional): The units to convert to (either 'angstrom' or 'bohr').
+        project_directory (str, optional): The path to the project directory.
+    
+    Raises:
+        ConverterError: If xyz_str is not a string or does not have four space-separated entries per non-empty line.
+
+    Returns: str
+        The converted string xyz format.
+    """
+    if isinstance(xyz_str, tuple):
+        xyz_str = '\n'.join(xyz_str)
+    if isinstance(xyz_str, list):
+        xyz_str = '\n'.join(xyz_str)
+    if not isinstance(xyz_str, str):
+        raise ConverterError(f'Expected a string input, got {type(xyz_str)}')
+    if project_directory is not None and os.path.isfile(os.path.join(project_directory, xyz_str)):
+        xyz_str = os.path.join(project_directory, xyz_str)    
+    
+
+    BOHR_TO_ANGSTROM = 0.529177
+    ANGSTROM_TO_BOHR = 1.8897259886
+
+    if units.lower() == 'angstrom' and convert_to.lower() == 'angstrom':
+        conversion_factor = 1
+    elif units.lower() == 'bohr' and convert_to.lower() == 'bohr':
+        conversion_factor = 1
+    elif units.lower() == 'angstrom' and convert_to.lower() == 'bohr':
+        conversion_factor = ANGSTROM_TO_BOHR
+    elif units.lower() == 'bohr' and convert_to.lower() == 'angstrom':
+        conversion_factor = BOHR_TO_ANGSTROM
+    else:
+        raise ConverterError("Invalid target unit. Choose 'angstrom' or 'bohr'.")
+
+    processed_lines = list()
+    # Split the string into lines
+    lxyz = xyz_str.strip().split()
+
+    atom_first = False if is_str_float(lxyz[0]) else True
+    lxyz = xyz_str.strip().splitlines()
+
+    for idx, item in enumerate(lxyz):
+        parts = item.strip().split()
+
+        if len(parts) != 4:
+            raise ConverterError(f'xyz_str has an incorrect format, expected 4 elements in each line, '
+                                    f'got "{item}" in:\n{xyz_str}')
+        if atom_first:
+            atom, x_str, y_str, z_str = parts
+        else:
+            x_str, y_str, z_str, atom = parts
+        
+        try:
+            x = float(x_str) * conversion_factor
+            y = float(y_str) * conversion_factor
+            z = float(z_str) * conversion_factor
+        
+        except ValueError as e:
+            raise ConverterError(f'Could not convert {x_str}, {y_str}, or {z_str} to floats.') from e
+        
+        if reverse_atoms and atom_first:
+            formatted_line = f'{x} {y} {z} {atom}'
+        elif reverse_atoms and not atom_first:
+            formatted_line = f'{atom} {x} {y} {z}'
+        elif not reverse_atoms and atom_first:
+            formatted_line = f'{atom} {x} {y} {z}'
+        elif not reverse_atoms and not atom_first:
+            formatted_line = f'{x} {y} {z} {atom}'
+        
+        processed_lines.append(formatted_line)
+    
+    return '\n'.join(processed_lines)
+
 
 def str_to_xyz(xyz_str: str,
                project_directory: Optional[str] = None,
diff --git a/arc/species/species.py b/arc/species/species.py
index 882060b221..eebcf048e8 100644
--- a/arc/species/species.py
+++ b/arc/species/species.py
@@ -1525,6 +1525,7 @@ def cluster_tsgs(self):
         for tsg in self.ts_guesses:
             for cluster_tsg in cluster_tsgs:
                 if cluster_tsg.almost_equal_tsgs(tsg):
+                    logger.info(f"Similar TSGuesses found: {tsg.index} is similar to {cluster_tsg.index}")
                     cluster_tsg.cluster.append(tsg.index)
                     if tsg.method not in cluster_tsg.method:
                         cluster_tsg.method += f' + {tsg.method}'
diff --git a/arc/statmech/arkane.py b/arc/statmech/arkane.py
index 581676e25e..75bab9a96f 100644
--- a/arc/statmech/arkane.py
+++ b/arc/statmech/arkane.py
@@ -671,6 +671,141 @@ def _section_contains_key(file_path: str, section_start: str, section_end: str,
     return False
 
 
+def _normalize_method(method: str) -> str:
+    """
+    Normalize method names for comparison:
+      - lowercase
+      - remove all hyphens
+
+    Examples:
+        "DLPNO-CCSD(T)-F12"    -> "dlpnoccsd(t)f12"
+        "dlpnoccsd(t)f122023"  -> "dlpnoccsd(t)f122023"
+    """
+    return method.lower().replace('-', '')
+
+
+def _split_method_year(method_norm: str):
+    """
+    Split a normalized method into (base_method, year).
+
+    Examples:
+        "dlpnoccsd(t)f122023" -> ("dlpnoccsd(t)f12", 2023)
+        "dlpnoccsd(t)f12"     -> ("dlpnoccsd(t)f12", None)
+    """
+    m = re.match(r"^(.*?)(\d{4})$", method_norm)
+    if not m:
+        return method_norm, None
+    base, year_str = m.groups()
+    return base, int(year_str)
+
+
+def _normalize_basis(basis: Optional[str]) -> Optional[str]:
+    """
+    Normalize basis names for comparison:
+      - lowercase
+      - remove hyphens and spaces
+
+    Examples:
+        "cc-pVTZ-F12" -> "ccpvtzf12"
+        "ccpvtzf12"   -> "ccpvtzf12"
+    """
+    if basis is None:
+        return None
+    return basis.replace('-', '').replace(' ', '').lower()
+
+
+def _parse_lot_params(lot_str: str) -> dict:
+    """
+    Parse method, basis, and software from a LevelOfTheory(...) string.
+
+    Example lot_str:
+        "LevelOfTheory(method='dlpnoccsd(t)f122023',basis='ccpvtzf12',software='orca')"
+    """
+    params = {'method': None, 'basis': None, 'software': None}
+    for key in params.keys():
+        m = re.search(rf"{key}='([^']+)'", lot_str)
+        if m:
+            params[key] = m.group(1)
+    return params
+
+
+def _iter_level_keys_from_section(file_path: str,
+                                  section_start: str,
+                                  section_end: str) -> list[str]:
+    """
+    Return all LevelOfTheory(...) key strings that appear as dictionary keys
+    in a given section of data.py.
+
+    These look like:
+        "LevelOfTheory(method='...',basis='...',software='...')" : { ... }
+    """
+    section = _extract_section(file_path, section_start, section_end)
+    if section is None:
+        return []
+
+    # Match things like: "LevelOfTheory(...)" : { ... }
+    pattern = r'"(LevelOfTheory\([^"]*\))"\s*:'
+    return re.findall(pattern, section, flags=re.DOTALL)
+
+
+def _find_best_level_key_for_sp_level(level: "Level",
+                                      file_path: str,
+                                      section_start: str,
+                                      section_end: str) -> Optional[str]:
+    """
+    Given an ARC Level and a data.py section, find the LevelOfTheory(...) key string
+    that best matches the level's method/basis, allowing:
+      - hyphen-insensitive comparison
+      - an optional 4-digit year suffix in Arkane's method
+    and choose the *latest* year among matching entries.
+    """
+    if level is None or level.method is None:
+        return None
+
+    target_method_norm = _normalize_method(level.method)
+    target_base, _ = _split_method_year(target_method_norm)
+    target_basis_norm = _normalize_basis(level.basis)
+    target_software = level.software.lower() if level.software else None
+
+    best_key = None
+    best_year = -1
+
+    for lot_str in _iter_level_keys_from_section(file_path, section_start, section_end):
+        params = _parse_lot_params(lot_str)
+        cand_method = params.get('method')
+        cand_basis = params.get('basis')
+        cand_sw = params.get('software')
+
+        if cand_method is None:
+            continue
+
+        cand_method_norm = _normalize_method(cand_method)
+        cand_base, cand_year = _split_method_year(cand_method_norm)
+
+        # method base must match
+        if cand_base != target_base:
+            continue
+
+        # basis must match (normalized), if we have one
+        if target_basis_norm is not None:
+            cand_basis_norm = _normalize_basis(cand_basis)
+            if cand_basis_norm != target_basis_norm:
+                continue
+
+        # if user specified software, prefer matching software;
+        # but don't *require* it to exist in data.py
+        if target_software is not None and cand_sw is not None:
+            if cand_sw.lower() != target_software:
+                continue
+
+        year_val = cand_year if cand_year is not None else 0
+        if year_val > best_year:
+            best_year = year_val
+            best_key = lot_str
+
+    return best_key
+
+
 def _level_to_str(level: 'Level') -> str:
     """
     Convert Level to Arkane's LevelOfTheory string representation.
@@ -696,15 +831,16 @@ def get_arkane_model_chemistry(sp_level: 'Level',
     """
     Get Arkane model chemistry string with database validation.
 
-    Args:
-        sp_level (Level): Level of theory for energy.
-        freq_level (Optional[Level]): Level of theory for frequencies.
-        freq_scale_factor (Optional[float]): Frequency scaling factor.
+    Reads RMG's quantum_corrections/data.py as plain text, searches for
+    LevelOfTheory(...) keys, and matches:
+      - method:   ignoring hyphens and optional 4-digit year suffix
+      - basis:    ignoring hyphens and spaces
 
-    Returns:
-        Optional[str]: Arkane-compatible model chemistry string.
+    If multiple entries only differ by year, the one with the *latest* year
+    is chosen (year=0 if no year in that entry).
     """
     if sp_level.method_type == 'composite':
+        # Composite Gaussian methods: no basis / year complications here
         return f"LevelOfTheory(method='{sp_level.method}',software='gaussian')"
 
     qm_corr_file = os.path.join(RMG_DB_PATH, 'input', 'quantum_corrections', 'data.py')
@@ -714,40 +850,40 @@ def get_arkane_model_chemistry(sp_level: 'Level',
     freq_dict_start = "freq_dict = {"
     freq_dict_end = "}"
 
-    sp_repr = _level_to_str(sp_level)
-    quoted_sp_repr = f'"{sp_repr}"'
-
+    # ---- Case 1: User supplied explicit frequency scale factor ----
+    # We only need an energy level (AEC entry in atom_energies)
     if freq_scale_factor is not None:
-        found = _section_contains_key(file_path=qm_corr_file,
-                                      section_start=atom_energies_start,
-                                      section_end=atom_energies_end,
-                                      target=quoted_sp_repr)
-        if not found:
+        best_energy = _find_best_level_key_for_sp_level(
+            sp_level, qm_corr_file, atom_energies_start, atom_energies_end
+        )
+        if best_energy is None:
+            # No matching AEC level in Arkane DB
             return None
-        return sp_repr
+        # modelChemistry = LevelOfTheory(...)
+        return best_energy
 
+    # ---- Case 2: CompositeLevelOfTheory (separate freq and energy levels) ----
     if freq_level is None:
         raise ValueError("freq_level required when freq_scale_factor isn't provided")
 
-    freq_repr = _level_to_str(freq_level)
-    quoted_freq_repr = f'"{freq_repr}"'
-
-    found_sp = _section_contains_key(file_path=qm_corr_file,
-                                     section_start=atom_energies_start,
-                                     section_end=atom_energies_end,
-                                     target=quoted_sp_repr)
-    found_freq = _section_contains_key(file_path=qm_corr_file,
-                                       section_start=freq_dict_start,
-                                       section_end=freq_dict_end,
-                                       target=quoted_freq_repr)
+    best_energy = _find_best_level_key_for_sp_level(
+        sp_level, qm_corr_file, atom_energies_start, atom_energies_end
+    )
+    best_freq = _find_best_level_key_for_sp_level(
+        freq_level, qm_corr_file, freq_dict_start, freq_dict_end
+    )
 
-    if not found_sp or not found_freq:
+    if best_energy is None or best_freq is None:
+        # If either is missing, cannot construct a valid composite model chemistry
         return None
 
-    return (f"CompositeLevelOfTheory(\n"
-            f"    freq={freq_repr},\n"
-            f"    energy={sp_repr}\n"
-            f")")
+    # These strings are LevelOfTheory(...) expressions usable directly in Arkane input
+    return (
+        "CompositeLevelOfTheory(\n"
+        f"    freq={best_freq},\n"
+        f"    energy={best_energy}\n"
+        ")"
+    )
 
 
 def check_arkane_bacs(sp_level: 'Level',
@@ -757,13 +893,11 @@ def check_arkane_bacs(sp_level: 'Level',
     """
     Check that Arkane has AECs and BACs for the given sp level of theory.
 
-    Args:
-        sp_level (Level): Level of theory for energy.
-        bac_type (str): Type of bond additivity correction ('p' for Petersson, 'm' for Melius)
-        raise_error (bool): Whether to raise an error if AECs or BACs are missing.
-
-    Returns:
-        bool: True if both AECs and BACs are available, False otherwise.
+    Uses plain-text parsing of quantum_corrections/data.py, matching LevelOfTheory
+    keys by:
+      - method base (ignore hyphens + optional year)
+      - basis (normalized)
+    and picking the latest year where multiple exist.
     """
     qm_corr_file = os.path.join(RMG_DB_PATH, 'input', 'quantum_corrections', 'data.py')
 
@@ -776,24 +910,25 @@ def check_arkane_bacs(sp_level: 'Level',
         bac_section_start = "pbac = {"
         bac_section_end = "mbac = {"
 
-    sp_repr = _level_to_str(sp_level)
-    quoted_sp_repr = f'"{sp_repr}"'
-
-    has_aec = _section_contains_key(
-        file_path=qm_corr_file,
-        section_start=atom_energies_start,
-        section_end=atom_energies_end,
-        target=quoted_sp_repr,
+    best_aec_key = _find_best_level_key_for_sp_level(
+        sp_level, qm_corr_file, atom_energies_start, atom_energies_end
     )
-    has_bac = _section_contains_key(
-        file_path=qm_corr_file,
-        section_start=bac_section_start,
-        section_end=bac_section_end,
-        target=quoted_sp_repr,
+    best_bac_key = _find_best_level_key_for_sp_level(
+        sp_level, qm_corr_file, bac_section_start, bac_section_end
     )
+
+    has_aec = best_aec_key is not None
+    has_bac = best_bac_key is not None
     has_encorr = bool(has_aec and has_bac)
+
+    # For logging, prefer the matched key; fall back to the naive LevelOfTheory string
+    repr_level = best_aec_key if best_aec_key is not None else _level_to_str(sp_level)
+
     if not has_encorr:
-        mssg = f"Arkane does not have the required energy corrections for {sp_repr} (AEC: {has_aec}, BAC: {has_bac})"
+        mssg = (
+            f"Arkane does not have the required energy corrections for {repr_level} "
+            f"(AEC: {has_aec}, BAC: {has_bac})"
+        )
         if raise_error:
             raise ValueError(mssg)
         else:
@@ -801,6 +936,7 @@ def check_arkane_bacs(sp_level: 'Level',
     return has_encorr
 
 
+
 def parse_species_thermo(species, output_content: str) -> None:
     """Parse thermodynamic data for a single species."""
     # Parse E0

From 734db3e8a1c238174ca49b21229ca150174119d9 Mon Sep 17 00:00:00 2001
From: Calvin Pieters <calvinpieters@gmail.com>
Date: Thu, 27 Nov 2025 23:19:00 +0200
Subject: [PATCH 2/9] Crest Adapter

---
 AGENTS.md                         |  38 +++
 arc/job/adapters/ts/crest.py      | 369 +++++++++++++++++++++++++
 arc/job/adapters/ts/heuristics.py | 439 ++++--------------------------
 arc/species/converter.py          |  23 +-
 arc/species/converter_test.py     |  19 ++
 5 files changed, 496 insertions(+), 392 deletions(-)
 create mode 100644 AGENTS.md
 create mode 100644 arc/job/adapters/ts/crest.py

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000000..65a9b8337e
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,38 @@
+# Repository Guidelines
+
+## Project Structure & Module Organization
+- Core package lives in `arc/` (chemistry logic, schedulers, plotting utilities). Tests for these modules sit alongside as `*_test.py`.
+- Higher-level integration checks live in `functional/`. Examples and reproducible inputs are in `examples/` and `data/`.
+- Developer scripts are under `devtools/`; docs sources live in `docs/`; the CLI entry point is `ARC.py`.
+- Build helpers such as the `Makefile` and `utilities.py` are at the repo root; Docker assets are in `dockerfiles/` and `docker-compose.yml`.
+
+## Build, Test, and Development Commands
+- `python -m pip install -e .` — editable install for local development.
+- `make compile` — build the Cython extension (`arc.molecule`) in-place after dependency setup.
+- `make test` — run unit tests with coverage over `arc/`.
+- `make test-functional` — run functional/integration tests in `functional/`.
+- `make test-all` — run both suites with coverage; default report is `coverage.xml`.
+- `make clean` — remove build artifacts; `make check-env` prints Python path/version for debugging.
+
+## Coding Style & Naming Conventions
+- Follow PEP 8: 4-space indentation, readable line wraps (~100 cols), and descriptive variable names (species, reactions, jobs).
+- Prefer f-strings for logging and user messages; keep logging via the shared `logger` in `arc.common`.
+- Tests use `pytest` discovery; name files `*_test.py` and functions `test_*` near the code they cover.
+- Keep modules import-safe to avoid circular deps (e.g., `arc/common.py` avoids importing other ARC modules).
+
+## Testing Guidelines
+- When running tests or any code, you must activate the conda environment called arc_env
+- Primary framework: `pytest` (see `Makefile` targets above). Use `-ra -vv` locally when chasing failures.
+- Add unit tests under `arc/` for module-level behavior and functional tests under `functional/` for end-to-end job flows.
+- Aim to maintain existing coverage; `make test-all` produces `coverage.xml` for CI/codecov.
+- Record any required external programs (quantum chemistry engines, schedulers) in test docstrings or skip markers.
+
+## Commit & Pull Request Guidelines
+- Commit messages should be concise and action-oriented (e.g., `Fix species thermo parsing`, `Improve scheduler resubmission logging`).
+- Squash noisy WIP commits before raising a PR; keep each commit logically scoped (feature, fix, or refactor).
+- PRs should include: a brief summary, linked issues, test results (`make test`/`make test-functional` output), and notes on external requirements (e.g., queue systems, ORCA/Gaussian availability).
+- Add screenshots or sample log excerpts when changing plotting, logging, or job orchestration behavior to aid reviewers.
+
+## Environment & Configuration Tips
+- Use the provided `environment.yml` or `requirements.txt` to align dependencies; some features rely on external quantum chemistry backends configured via `arc/settings`.
+- Before running remote jobs, verify scheduler and credentials in your local settings, and prefer `make check-env` to confirm Python tooling paths.
diff --git a/arc/job/adapters/ts/crest.py b/arc/job/adapters/ts/crest.py
new file mode 100644
index 0000000000..7e8a5a0fc3
--- /dev/null
+++ b/arc/job/adapters/ts/crest.py
@@ -0,0 +1,369 @@
+"""
+Utilities for running CREST within ARC.
+
+Separated from heuristics so CREST can be conditionally imported and reused.
+"""
+
+import os
+import re
+import time
+from typing import List
+
+import numpy as np
+import pandas as pd
+
+from arc.common import get_logger
+from arc.imports import settings, submit_scripts
+from arc.job.local import check_job_status, submit_job
+from arc.species.converter import reorder_xyz_string, str_to_xyz, xyz_to_dmat, xyz_to_str
+
+logger = get_logger()
+
+try:
+    CREST_PATH = settings["CREST_PATH"]
+    CREST_ENV_PATH = settings["CREST_ENV_PATH"]
+    SERVERS = settings["servers"]
+except KeyError:
+    CREST_PATH = None
+    CREST_ENV_PATH = None
+    SERVERS = {}
+
+
+def crest_available() -> bool:
+    """
+    Return whether CREST is configured for use.
+    """
+    return bool(SERVERS.get("local")) and bool(CREST_PATH or CREST_ENV_PATH)
+
+
+def crest_ts_conformer_search(
+    xyz_guess: dict,
+    a_atom: int,
+    h_atom: int,
+    b_atom: int,
+    path: str = "",
+    xyz_crest_int: int = 0,
+) -> str:
+    """
+    Prepare a CREST TS conformer search job:
+    - Write coords.ref and constraints.inp
+    - Write a PBS/HTCondor submit script using submit_scripts["local"]["crest"]
+    - Return the CREST job directory path
+    """
+    path = os.path.join(path, f"crest_{xyz_crest_int}")
+    os.makedirs(path, exist_ok=True)
+
+    # --- coords.ref ---
+    symbols = xyz_guess["symbols"]
+    converted_coords = reorder_xyz_string(
+        xyz_str=xyz_to_str(xyz_guess),
+        reverse_atoms=True,
+        convert_to="bohr",
+    )
+    coords_ref_content = f"$coord\n{converted_coords}\n$end\n"
+    coords_ref_path = os.path.join(path, "coords.ref")
+    with open(coords_ref_path, "w") as f:
+        f.write(coords_ref_content)
+
+    # --- constraints.inp ---
+    num_atoms = len(symbols)
+    # CREST uses 1-based indices
+    a_atom += 1
+    h_atom += 1
+    b_atom += 1
+
+    # All atoms not directly involved in A–H–B go into the metadynamics atom list
+    list_of_atoms_numbers_not_participating_in_reaction = [
+        i for i in range(1, num_atoms + 1) if i not in [a_atom, h_atom, b_atom]
+    ]
+
+    constraints_path = os.path.join(path, "constraints.inp")
+    with open(constraints_path, "w") as f:
+        f.write("$constrain\n")
+        f.write(f"  atoms: {a_atom}, {h_atom}, {b_atom}\n")
+        f.write("  force constant: 0.5\n")
+        f.write("  reference=coords.ref\n")
+        f.write(f"  distance: {a_atom}, {h_atom}, auto\n")
+        f.write(f"  distance: {h_atom}, {b_atom}, auto\n")
+        f.write("$metadyn\n")
+        if list_of_atoms_numbers_not_participating_in_reaction:
+            f.write(
+                f'  atoms: {", ".join(map(str, list_of_atoms_numbers_not_participating_in_reaction))}\n'
+            )
+        f.write("$end\n")
+
+    # --- build CREST command string ---
+    # Example: crest coords.ref --cinp constraints.inp --noreftopo -T 8
+    local_server = SERVERS.get("local", {})
+    cpus = int(local_server.get("cpus", 8))
+    if CREST_ENV_PATH:
+        crest_exe = "crest"
+    else:
+        crest_exe = CREST_PATH if CREST_PATH is not None else "crest"
+
+    commands = [
+        crest_exe,
+        "coords.ref",
+        "--cinp constraints.inp",
+        "--noreftopo",
+        f'-T {local_server.get("cpus", 8)}',
+    ]
+    command = " ".join(commands)
+
+    # --- activation line (optional) ---
+    activation_line = CREST_ENV_PATH or ""
+
+    if SERVERS.get("local") is not None:
+        cluster_soft = SERVERS["local"]["cluster_soft"].lower()
+
+        if cluster_soft in ["condor", "htcondor"]:
+            # HTCondor branch (kept for completeness – you can delete if you don't use it)
+            sub_job = submit_scripts["local"]["crest"]
+            format_params = {
+                "name": f"crest_{xyz_crest_int}",
+                "cpus": cpus,
+                "memory": int(SERVERS["local"].get("memory", 32.0) * 1024),
+            }
+            sub_job = sub_job.format(**format_params)
+
+            with open(
+                os.path.join(path, settings["submit_filenames"]["HTCondor"]), "w"
+            ) as f:
+                f.write(sub_job)
+
+            crest_job = submit_scripts["local"]["crest_job"]
+            crest_job = crest_job.format(
+                path=path,
+                activation_line=activation_line,
+                commands=command,
+            )
+
+            with open(os.path.join(path, "job.sh"), "w") as f:
+                f.write(crest_job)
+            os.chmod(os.path.join(path, "job.sh"), 0o777)
+
+            # Pre-create out/err for any status checkers that expect them
+            for fname in ("out.txt", "err.txt"):
+                fpath = os.path.join(path, fname)
+                if not os.path.exists(fpath):
+                    with open(fpath, "w") as f:
+                        f.write("")
+                    os.chmod(fpath, 0o777)
+
+        elif cluster_soft == "pbs":
+            # PBS branch that matches your 'crest' template above
+            sub_job = submit_scripts["local"]["crest"]
+            format_params = {
+                "queue": SERVERS["local"].get("queue", "alon_q"),
+                "name": f"crest_{xyz_crest_int}",
+                "cpus": cpus,
+                # 'memory' is in GB for the template: mem={memory}gb
+                "memory": int(
+                    SERVERS["local"].get("memory", 32)
+                    if SERVERS["local"].get("memory", 32) < 60
+                    else 40
+                ),
+                "activation_line": activation_line,
+                "commands": command,
+            }
+            sub_job = sub_job.format(**format_params)
+
+            submit_filename = settings["submit_filenames"]["PBS"]  # usually 'submit.sh'
+            submit_path = os.path.join(path, submit_filename)
+            with open(submit_path, "w") as f:
+                f.write(sub_job)
+            os.chmod(submit_path, 0o755)
+
+        else:
+            raise ValueError(f"Unsupported cluster_soft for CREST: {cluster_soft!r}")
+
+    return path
+
+
+def submit_crest_jobs(crest_paths: List[str]) -> dict:
+    """
+    Submit CREST jobs to the server.
+
+    Args:
+        crest_paths (List[str]): List of paths to the CREST directories.
+
+    Returns:
+        dict: A dictionary containing job IDs as keys and their statuses as values.
+    """
+    crest_jobs = {}
+    for crest_path in crest_paths:
+        job_status, job_id = submit_job(path=crest_path)
+        logger.info(f"CREST job {job_id} submitted for {crest_path}")
+        crest_jobs[job_id] = {"path": crest_path, "status": job_status}
+    return crest_jobs
+
+
+def monitor_crest_jobs(crest_jobs: dict, check_interval: int = 300) -> None:
+    """
+    Monitor CREST jobs until they are complete.
+
+    Args:
+        crest_jobs (dict): Dictionary containing job information (job ID, path, and status).
+        check_interval (int): Time interval (in seconds) to wait between status checks.
+    """
+    while True:
+        all_done = True
+        for job_id, job_info in crest_jobs.items():
+            if job_info["status"] not in ["done", "failed"]:
+                try:
+                    job_info["status"] = check_job_status(job_id)  # Update job status
+                except Exception as e:
+                    logger.error(f"Error checking job status for job {job_id}: {e}")
+                    job_info["status"] = "failed"
+                if job_info["status"] not in ["done", "failed"]:
+                    all_done = False
+        if all_done:
+            break
+        time.sleep(min(check_interval, 100))
+
+
+def process_completed_jobs(crest_jobs: dict) -> list:
+    """
+    Process the completed CREST jobs and update XYZ guesses.
+
+    Args:
+        crest_jobs (dict): Dictionary containing job information.
+    """
+    xyz_guesses = []
+    for job_id, job_info in crest_jobs.items():
+        crest_path = job_info["path"]
+        if job_info["status"] == "done":
+            crest_best_path = os.path.join(crest_path, "crest_best.xyz")
+            if os.path.exists(crest_best_path):
+                with open(crest_best_path, "r") as f:
+                    content = f.read()
+                xyz_guess = str_to_xyz(content)
+                xyz_guesses.append(xyz_guess)
+            else:
+                logger.error(f"crest_best.xyz not found in {crest_path}")
+        elif job_info["status"] == "failed":
+            logger.error(f"CREST job failed for {crest_path}")
+
+    return xyz_guesses
+
+
+def extract_digits(s: str) -> int:
+    """
+    Extract the first integer from a string
+
+    Args:
+        s (str): The string to extract the integer from
+
+    Returns:
+        int: The first integer in the string
+
+    """
+    return int(re.sub(r"[^\d]", "", s))
+
+
+def convert_xyz_to_df(xyz: dict) -> pd.DataFrame:
+    """
+    Convert a dictionary of xyz coords to a pandas DataFrame with bond distances
+
+    Args:
+        xyz (dict): The xyz coordinates of the molecule
+
+    Return:
+        pd.DataFrame: The xyz coordinates as a pandas DataFrame
+
+    """
+    symbols = xyz["symbols"]
+    symbol_enum = [f"{symbol}{i}" for i, symbol in enumerate(symbols)]
+    ts_dmat = xyz_to_dmat(xyz)
+
+    return pd.DataFrame(ts_dmat, columns=symbol_enum, index=symbol_enum)
+
+
+def get_h_abs_atoms(dataframe: pd.DataFrame) -> dict:
+    """
+    Get the donating/accepting hydrogen atom, and the two heavy atoms that are bonded to it
+
+    Args:
+        dataframe (pd.DataFrame): The dataframe of the bond distances, columns and index are the atom symbols
+
+    Returns:
+        dict: The hydrogen atom and the two heavy atoms. The keys are 'H', 'A', 'B'
+    """
+
+    closest_atoms = {}
+    for index, row in dataframe.iterrows():
+
+        row[index] = np.inf
+        closest = row.nsmallest(2).index.tolist()
+        closest_atoms[index] = closest
+
+    hydrogen_keys = [key for key in dataframe.index if key.startswith("H")]
+    condition_occurrences = []
+
+    for hydrogen_key in hydrogen_keys:
+        atom_neighbours = closest_atoms[hydrogen_key]
+        is_heavy_present = any(
+            atom for atom in closest_atoms if not atom.startswith("H")
+        )
+        if_hydrogen_present = any(
+            atom
+            for atom in closest_atoms
+            if atom.startswith("H") and atom != hydrogen_key
+        )
+
+        if is_heavy_present and if_hydrogen_present:
+            # Store the details of this occurrence
+            condition_occurrences.append(
+                {"H": hydrogen_key, "A": atom_neighbours[0], "B": atom_neighbours[1]}
+            )
+
+    # Check if the condition was met
+    if condition_occurrences:
+        if len(condition_occurrences) > 1:
+            # Store distances to decide which occurrence to use
+            occurrence_distances = []
+            for occurrence in condition_occurrences:
+                # Calculate the sum of distances to the two heavy atoms
+                hydrogen_key = f"{occurrence['H']}"
+                heavy_atoms = [f"{occurrence['A']}", f"{occurrence['B']}"]
+                try:
+                    distances = dataframe.loc[hydrogen_key, heavy_atoms].sum()
+                    occurrence_distances.append((occurrence, distances))
+                except KeyError as e:
+                    print(f"Error accessing distances for occurrence {occurrence}: {e}")
+
+            # Select the occurrence with the smallest distance
+            best_occurrence = min(occurrence_distances, key=lambda x: x[1])[0]
+            return {
+                "H": extract_digits(best_occurrence["H"]),
+                "A": extract_digits(best_occurrence["A"]),
+                "B": extract_digits(best_occurrence["B"]),
+            }
+    else:
+
+        # Check the all the hydrogen atoms, and see the closest two heavy atoms and aggregate their distances to determine which Hyodrogen atom has the lowest distance aggregate
+        min_distance = np.inf
+        selected_hydrogen = None
+        selected_heavy_atoms = None
+
+        for hydrogen_key in hydrogen_keys:
+            atom_neighbours = closest_atoms[hydrogen_key]
+            heavy_atoms = [atom for atom in atom_neighbours if not atom.startswith("H")]
+
+            if len(heavy_atoms) < 2:
+                continue
+
+            distances = dataframe.loc[hydrogen_key, heavy_atoms[:2]].sum()
+            if distances < min_distance:
+                min_distance = distances
+                selected_hydrogen = hydrogen_key
+                selected_heavy_atoms = heavy_atoms
+
+        if selected_hydrogen:
+            return {
+                "H": extract_digits(selected_hydrogen),
+                "A": extract_digits(selected_heavy_atoms[0]),
+                "B": extract_digits(selected_heavy_atoms[1]),
+            }
+        else:
+            raise ValueError("No valid hydrogen atom found.")
diff --git a/arc/job/adapters/ts/heuristics.py b/arc/job/adapters/ts/heuristics.py
index 436e16b537..a97f0f0c36 100644
--- a/arc/job/adapters/ts/heuristics.py
+++ b/arc/job/adapters/ts/heuristics.py
@@ -16,28 +16,26 @@
 """
 import datetime
 import itertools
-import math
 import os
-import re
-import subprocess
-import time
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
-import re
 
-import numpy as np
-import pandas as pd
-
-from arc.imports import settings, submit_scripts
 from arc.common import almost_equal_coords, get_logger, is_angle_linear, is_xyz_linear, key_by_val
 from arc.job.adapter import JobAdapter
-from arc.job.local import check_job_status, submit_job
 from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family
 from arc.job.factory import register_job_adapter
+from arc.job.adapters.ts.crest import (
+    convert_xyz_to_df,
+    crest_available,
+    crest_ts_conformer_search,
+    get_h_abs_atoms,
+    monitor_crest_jobs,
+    process_completed_jobs,
+    submit_crest_jobs,
+)
 from arc.plotter import save_geo
 from arc.species.converter import (
     compare_zmats,
     relocate_zmat_dummy_atoms_to_the_end,
-    str_to_str,
     str_to_xyz,
     xyz_to_dmat,
     xyz_to_str,
@@ -53,14 +51,6 @@
     from arc.level import Level
     from arc.reaction import ARCReaction
 
-try:
-    CREST_PATH = settings["CREST_PATH"]
-    CREST_ENV_PATH = settings["CREST_ENV_PATH"]
-    HAS_CREST = True
-    SERVERS = settings["servers"]
-except KeyError:
-    HAS_CREST = False
-
 DIHEDRAL_INCREMENT = 30
 
 logger = get_logger()
@@ -281,16 +271,20 @@ def execute_incore(self):
                 xyzs = h_abstraction(reaction=rxn, dihedral_increment=self.dihedral_increment, path=self.local_path)
                 tsg.tok()
 
-            for method_index, xyz in enumerate(xyzs):
+            for method_index, xyz_entry in enumerate(xyzs):
+                xyz = xyz_entry.get("xyz")
+                method_label = xyz_entry.get("method", "Heuristics")
+                if xyz is None:
+                    continue
                 unique = True
                 for other_tsg in rxn.ts_species.ts_guesses:
                     if almost_equal_coords(xyz, other_tsg.initial_xyz):
-                        if 'heuristics' not in other_tsg.method.lower():
-                            other_tsg.method += ' and Heuristics'
+                        if method_label.lower() not in other_tsg.method.lower():
+                            other_tsg.method += f' and {method_label}'
                         unique = False
                         break
                 if unique:
-                    ts_guess = TSGuess(method='Heuristics',
+                    ts_guess = TSGuess(method=method_label,
                                        index=len(rxn.ts_species.ts_guesses),
                                        method_index=method_index,
                                        t0=tsg.t0,
@@ -302,15 +296,19 @@ def execute_incore(self):
                     rxn.ts_species.ts_guesses.append(ts_guess)
                     save_geo(xyz=xyz,
                              path=self.local_path,
-                             filename=f'Heuristics_{method_index}',
+                             filename=f'{method_label}_{method_index}',
                              format_='xyz',
-                             comment=f'Heuristics {method_index}, family: {rxn.family}',
+                             comment=f'{method_label} {method_index}, family: {rxn.family}',
                              )
 
             if len(self.reactions) < 5:
-                successes = len([tsg for tsg in rxn.ts_species.ts_guesses if tsg.success and 'heuristics' in tsg.method])
+                successes = [tsg for tsg in rxn.ts_species.ts_guesses if tsg.success]
+                heuristics_successes = len([tsg for tsg in successes if 'heuristics' in tsg.method.lower()])
+                crest_successes = len([tsg for tsg in successes if 'crest' in tsg.method.lower()])
                 if successes:
-                    logger.info(f'Heuristics successfully found {successes} TS guesses for {rxn.label}.')
+                    logger.info(f'Heuristics successfully found {len(successes)} TS guesses for {rxn.label}.')
+                    if crest_successes:
+                        logger.info(f'CREST contributed {crest_successes} TS guesses for {rxn.label}.')
                 else:
                     logger.info(f'Heuristics did not find any successful TS guesses for {rxn.label}.')
 
@@ -877,10 +875,12 @@ def h_abstraction(reaction: 'ARCReaction',
         dihedral_increment (int, optional): The dihedral increment to use for B-H-A-C and D-B-H-C dihedral scans.
 
     Returns: List[dict]
-        Entries are Cartesian coordinates of TS guesses for all reactions.
+        Entries hold Cartesian coordinates of TS guesses and the generating method label.
     """
     xyz_guesses = list()
     crest_paths = list()
+    all_zmats = list()
+    use_crest = crest_available()
     dihedral_increment = dihedral_increment or DIHEDRAL_INCREMENT
     reactants_reversed, products_reversed = are_h_abs_wells_reversed(rxn=reaction, product_dict=reaction.product_dicts[0])
     for product_dict in reaction.product_dicts:
@@ -957,9 +957,10 @@ def h_abstraction(reaction: 'ARCReaction',
                 else:
                     # This TS is unique, and has no atom collisions.
                     zmats.append(zmat_guess)
-                    xyz_guesses.append(xyz_guess)
-        
-                if HAS_CREST:
+                    all_zmats.append(zmat_guess)
+                    xyz_guesses.append({"xyz": xyz_guess, "method": "Heuristics"})
+
+                if use_crest:
                     xyz_guess_crest = xyz_guess.copy()
                     if isinstance(xyz_guess_crest, dict):
                         df_dmat = convert_xyz_to_df(xyz_guess_crest)
@@ -970,373 +971,41 @@ def h_abstraction(reaction: 'ARCReaction',
                         xyz_temp = "\n".join(xyz_guess_crest)
                         xyz_to_dmat = str_to_xyz(xyz_temp)
                         df_dmat = convert_xyz_to_df(xyz_to_dmat)
+                    else:
+                        df_dmat = None
+
+                    if df_dmat is not None:
+                        try:
+                            h_abs_atoms_dict = get_h_abs_atoms(df_dmat)
+                            crest_path = crest_ts_conformer_search(
+                                xyz_guess_crest,
+                                h_abs_atoms_dict["A"],
+                                h_abs_atoms_dict["H"],
+                                h_abs_atoms_dict["B"],
+                                path=path,
+                                xyz_crest_int=iteration,
+                            )
+                            crest_paths.append(crest_path)
+                        except (ValueError, KeyError) as e:
+                            logger.error(f"Could not determine the H abstraction atoms, got:\n{e}")
 
-                    try:
-                        h_abs_atoms_dict = get_h_abs_atoms(df_dmat)
-                        crest_path = crest_ts_conformer_search(
-                            xyz_guess_crest,
-                            h_abs_atoms_dict["A"],
-                            h_abs_atoms_dict["H"],
-                            h_abs_atoms_dict["B"],
-                            path=path,
-                            xyz_crest_int=iteration,
-                        )
-                        crest_paths.append(crest_path)
-                    except (ValueError, KeyError) as e:
-                        logger.error(f"Could not determine the H abstraction atoms, got:\n{e}")
-
-    if crest_paths:
+    if use_crest and crest_paths:
         crest_jobs = submit_crest_jobs(crest_paths)
         monitor_crest_jobs(crest_jobs)  # Keep checking job statuses until complete
         xyz_guesses_crest = process_completed_jobs(crest_jobs)
         for xyz_guess_crest in xyz_guesses_crest:
             zmat_guess = zmat_from_xyz(xyz_guess_crest, is_ts=True)
             is_unique = True  # Assume the current Z-matrix is unique
-            for existing_zmat_guess in zmats:
+            for existing_zmat_guess in all_zmats:
                 if compare_zmats(existing_zmat_guess, zmat_guess):
                     is_unique = False  # Found a match, mark as not unique
                     break  # Exit this inner loop only
             if is_unique:
                 # If no match was found, append to lists
-                zmats.append(zmat_guess)
-                xyz_guesses.append(xyz_guess_crest)
-    else:
-        logger.error("No CREST paths found")
-
-    return xyz_guesses
-
-
-def crest_ts_conformer_search(
-    xyz_guess: dict,
-    a_atom: int,
-    h_atom: int,
-    b_atom: int,
-    path: str = "",
-    xyz_crest_int: int = 0,
-) -> str:
-    """
-    Prepare a CREST TS conformer search job:
-    - Write coords.ref and constraints.inp
-    - Write a PBS submit script using submit_scripts["local"]["crest"]
-    - Return the CREST job directory path
-    """
-    path = os.path.join(path, f"crest_{xyz_crest_int}")
-    os.makedirs(path, exist_ok=True)
-
-    # --- coords.ref ---
-    symbols = xyz_guess["symbols"]
-    converted_coords = str_to_str(
-        xyz_str=xyz_to_str(xyz_guess),
-        reverse_atoms=True,
-        convert_to="bohr",
-    )
-    coords_ref_content = f"$coord\n{converted_coords}\n$end\n"
-    coords_ref_path = os.path.join(path, "coords.ref")
-    with open(coords_ref_path, "w") as f:
-        f.write(coords_ref_content)
-
-    # --- constraints.inp ---
-    num_atoms = len(symbols)
-    # CREST uses 1-based indices
-    a_atom += 1
-    h_atom += 1
-    b_atom += 1
-
-    # All atoms not directly involved in A–H–B go into the metadynamics atom list
-    list_of_atoms_numbers_not_participating_in_reaction = [
-        i for i in range(1, num_atoms + 1) if i not in [a_atom, h_atom, b_atom]
-    ]
-
-    constraints_path = os.path.join(path, "constraints.inp")
-    with open(constraints_path, "w") as f:
-        f.write("$constrain\n")
-        f.write(f"  atoms: {a_atom}, {h_atom}, {b_atom}\n")
-        f.write("  force constant: 0.5\n")
-        f.write("  reference=coords.ref\n")
-        f.write(f"  distance: {a_atom}, {h_atom}, auto\n")
-        f.write(f"  distance: {h_atom}, {b_atom}, auto\n")
-        f.write("$metadyn\n")
-        if list_of_atoms_numbers_not_participating_in_reaction:
-            f.write(
-                f'  atoms: {", ".join(map(str, list_of_atoms_numbers_not_participating_in_reaction))}\n'
-            )
-        f.write("$end\n")
-
-    # --- build CREST command string ---
-    # Example: crest coords.ref --cinp constraints.inp --noreftopo -T 8
-    cpus = int(SERVERS["local"].get("cpus", 8))
-    if CREST_ENV_PATH:
-        crest_exe = "crest"
-    else:
-        crest_exe = CREST_PATH if CREST_PATH is not None else "crest"
-
-    commands = [
-        crest_exe,
-        "coords.ref",
-        "--cinp constraints.inp",
-        "--noreftopo",
-        f'-T {SERVERS["local"].get("cpus", 8)}',
-    ]
-    command = " ".join(commands)
-
-    # --- activation line (optional) ---
-    activation_line = CREST_ENV_PATH or ""
-
-    if SERVERS.get("local") is not None:
-        cluster_soft = SERVERS["local"]["cluster_soft"].lower()
-
-        if cluster_soft in ["condor", "htcondor"]:
-            # HTCondor branch (kept for completeness – you can delete if you don't use it)
-            sub_job = submit_scripts["local"]["crest"]
-            format_params = {
-                "name": f"crest_{xyz_crest_int}",
-                "cpus": cpus,
-                "memory": int(SERVERS["local"].get("memory", 32.0) * 1024),
-            }
-            sub_job = sub_job.format(**format_params)
-
-            with open(
-                os.path.join(path, settings["submit_filenames"]["HTCondor"]), "w"
-            ) as f:
-                f.write(sub_job)
-
-            crest_job = submit_scripts["local"]["crest_job"]
-            crest_job = crest_job.format(
-                path=path,
-                activation_line=activation_line,
-                commands=command,
-            )
-
-            with open(os.path.join(path, "job.sh"), "w") as f:
-                f.write(crest_job)
-            os.chmod(os.path.join(path, "job.sh"), 0o777)
-
-            # Pre-create out/err for any status checkers that expect them
-            for fname in ("out.txt", "err.txt"):
-                fpath = os.path.join(path, fname)
-                if not os.path.exists(fpath):
-                    with open(fpath, "w") as f:
-                        f.write("")
-                    os.chmod(fpath, 0o777)
-
-        elif cluster_soft == "pbs":
-            # PBS branch that matches your 'crest' template above
-            sub_job = submit_scripts["local"]["crest"]
-            format_params = {
-                "queue": SERVERS["local"].get("queue", "alon_q"),
-                "name": f"crest_{xyz_crest_int}",
-                "cpus": cpus,
-                # 'memory' is in GB for the template: mem={memory}gb
-                "memory": int(
-                    SERVERS["local"].get("memory", 32)
-                    if SERVERS["local"].get("memory", 32) < 60
-                    else 40
-                ),
-                "activation_line": activation_line,
-                "commands": command,
-            }
-            sub_job = sub_job.format(**format_params)
-
-            submit_filename = settings["submit_filenames"]["PBS"]  # usually 'submit.sh'
-            submit_path = os.path.join(path, submit_filename)
-            with open(submit_path, "w") as f:
-                f.write(sub_job)
-            os.chmod(submit_path, 0o755)
-
-        else:
-            raise ValueError(f"Unsupported cluster_soft for CREST: {cluster_soft!r}")
-
-    return path
-
-
-def submit_crest_jobs(crest_paths: List[str]) -> None:
-    """
-    Submit CREST jobs to the server.
-
-    Args:
-        crest_paths (List[str]): List of paths to the CREST directories.
-
-    Returns:
-        dict: A dictionary containing job IDs as keys and their statuses as values.
-    """
-    crest_jobs = {}
-    for crest_path in crest_paths:
-        job_status, job_id = submit_job(path=crest_path)
-        logger.info(f"CREST job {job_id} submitted for {crest_path}")
-        crest_jobs[job_id] = {"path": crest_path, "status": job_status}
-    return crest_jobs
-
-
-def monitor_crest_jobs(crest_jobs: dict, check_interval: int = 300) -> None:
-    """
-    Monitor CREST jobs until they are complete.
-
-    Args:
-        crest_jobs (dict): Dictionary containing job information (job ID, path, and status).
-        check_interval (int): Time interval (in seconds) to wait between status checks.
-    """
-    while True:
-        all_done = True
-        for job_id, job_info in crest_jobs.items():
-            if job_info["status"] not in ["done", "failed"]:
-                try:
-                    job_info["status"] = check_job_status(job_id)  # Update job status
-                except Exception as e:
-                    logger.error(f"Error checking job status for job {job_id}: {e}")
-                    job_info["status"] = "failed"
-                if job_info["status"] not in ["done", "failed"]:
-                    all_done = False
-        if all_done:
-            break
-        time.sleep(min(check_interval, 100))
-
-
-def process_completed_jobs(crest_jobs: dict) -> list:
-    """
-    Process the completed CREST jobs and update XYZ guesses.
-
-    Args:
-        crest_jobs (dict): Dictionary containing job information.
-        xyz_guesses (list): List to store the resulting XYZ guesses.
-    """
-    xyz_guesses = []
-    for job_id, job_info in crest_jobs.items():
-        crest_path = job_info["path"]
-        if job_info["status"] == "done":
-            crest_best_path = os.path.join(crest_path, "crest_best.xyz")
-            if os.path.exists(crest_best_path):
-                with open(crest_best_path, "r") as f:
-                    content = f.read()
-                xyz_guess = str_to_xyz(content)
-                xyz_guesses.append(xyz_guess)
-            else:
-                logger.error(f"crest_best.xyz not found in {crest_path}")
-        elif job_info["status"] == "failed":
-            logger.error(f"CREST job failed for {crest_path}")
+                all_zmats.append(zmat_guess)
+                xyz_guesses.append({"xyz": xyz_guess_crest, "method": "CREST"})
 
     return xyz_guesses
 
 
-def extract_digits(s: str) -> int:
-    """
-    Extract the first integer from a string
-
-    Args:
-        s (str): The string to extract the integer from
-
-    Returns:
-        int: The first integer in the string
-
-    """
-    return int(re.sub(r"[^\d]", "", s))
-
-
-def convert_xyz_to_df(xyz: dict) -> pd.DataFrame:
-    """
-    Convert a dictionary of xyz coords to a pandas DataFrame with bond distances
-
-    Args:
-        xyz (dict): The xyz coordinates of the molecule
-
-    Return:
-        pd.DataFrame: The xyz coordinates as a pandas DataFrame
-
-    """
-    symbols = xyz["symbols"]
-    symbol_enum = [f"{symbol}{i}" for i, symbol in enumerate(symbols)]
-    ts_dmat = xyz_to_dmat(xyz)
-
-    return pd.DataFrame(ts_dmat, columns=symbol_enum, index=symbol_enum)
-
-
-def get_h_abs_atoms(dataframe: pd.DataFrame) -> dict:
-    """
-    Get the donating/accepting hydrogen atom, and the two heavy atoms that are bonded to it
-
-    Args:
-        dataframe (pd.DataFrame): The dataframe of the bond distances, columns and index are the atom symbols
-
-    Returns:
-        dict: The hydrogen atom and the two heavy atoms. The keys are 'H', 'A', 'B'
-    """
-
-    closest_atoms = {}
-    for index, row in dataframe.iterrows():
-
-        row[index] = np.inf
-        closest = row.nsmallest(2).index.tolist()
-        closest_atoms[index] = closest
-
-    hydrogen_keys = [key for key in dataframe.index if key.startswith("H")]
-    condition_occurrences = []
-
-    for hydrogen_key in hydrogen_keys:
-        atom_neighbours = closest_atoms[hydrogen_key]
-        is_heavy_present = any(
-            atom for atom in closest_atoms if not atom.startswith("H")
-        )
-        if_hydrogen_present = any(
-            atom
-            for atom in closest_atoms
-            if atom.startswith("H") and atom != hydrogen_key
-        )
-
-        if is_heavy_present and if_hydrogen_present:
-            # Store the details of this occurrence
-            condition_occurrences.append(
-                {"H": hydrogen_key, "A": atom_neighbours[0], "B": atom_neighbours[1]}
-            )
-
-    # Check if the condition was met
-    if condition_occurrences:
-        if len(condition_occurrences) > 1:
-            # Store distances to decide which occurrence to use
-            occurrence_distances = []
-            for occurrence in condition_occurrences:
-                # Calculate the sum of distances to the two heavy atoms
-                hydrogen_key = f"{occurrence['H']}"
-                heavy_atoms = [f"{occurrence['A']}", f"{occurrence['B']}"]
-                try:
-                    distances = dataframe.loc[hydrogen_key, heavy_atoms].sum()
-                    occurrence_distances.append((occurrence, distances))
-                except KeyError as e:
-                    print(f"Error accessing distances for occurrence {occurrence}: {e}")
-
-            # Select the occurrence with the smallest distance
-            best_occurrence = min(occurrence_distances, key=lambda x: x[1])[0]
-            return {
-                "H": extract_digits(best_occurrence["H"]),
-                "A": extract_digits(best_occurrence["A"]),
-                "B": extract_digits(best_occurrence["B"]),
-            }
-    else:
-
-        # Check the all the hydrogen atoms, and see the closest two heavy atoms and aggregate their distances to determine which Hyodrogen atom has the lowest distance aggregate
-        min_distance = np.inf
-        selected_hydrogen = None
-        selected_heavy_atoms = None
-
-        for hydrogen_key in hydrogen_keys:
-            atom_neighbours = closest_atoms[hydrogen_key]
-            heavy_atoms = [atom for atom in atom_neighbours if not atom.startswith("H")]
-
-            if len(heavy_atoms) < 2:
-                continue
-
-            distances = dataframe.loc[hydrogen_key, heavy_atoms[:2]].sum()
-            if distances < min_distance:
-                min_distance = distances
-                selected_hydrogen = hydrogen_key
-                selected_heavy_atoms = heavy_atoms
-
-        if selected_hydrogen:
-            return {
-                "H": extract_digits(selected_hydrogen),
-                "A": extract_digits(selected_heavy_atoms[0]),
-                "B": extract_digits(selected_heavy_atoms[1]),
-            }
-        else:
-            raise ValueError("No valid hydrogen atom found.")
-
-
-register_job_adapter("heuristics", HeuristicsAdapter)
\ No newline at end of file
+register_job_adapter("heuristics", HeuristicsAdapter)
diff --git a/arc/species/converter.py b/arc/species/converter.py
index 8489d8b317..ef07cbd9f3 100644
--- a/arc/species/converter.py
+++ b/arc/species/converter.py
@@ -5,6 +5,7 @@
 import math
 import numpy as np
 import os
+import warnings
 from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union
 
 from ase import Atoms
@@ -44,14 +45,14 @@
 ob.obErrorLog.SetOutputLevel(0)
 logger = get_logger()
 
-def str_to_str(xyz_str: str,
-               reverse_atoms: bool = False,
-               units: str = 'angstrom',
-               convert_to: str = 'angstrom',
-               project_directory: Optional[str] = None
-               ) -> str:
+def reorder_xyz_string(xyz_str: str,
+                       reverse_atoms: bool = False,
+                       units: str = 'angstrom',
+                       convert_to: str = 'angstrom',
+                       project_directory: Optional[str] = None
+                       ) -> str:
     """
-    Convert a string xyz format from `ATOM X Y Z` to `X Y Z ATOM`. Also, can convert units from `Angstrom` to `Bohr` and vice versa.
+    Reorder an XYZ string between ``ATOM X Y Z`` and ``X Y Z ATOM`` with optional unit conversion.
 
     Args:
         xyz_str (str): The string xyz format to be converted.
@@ -129,6 +130,14 @@ def str_to_str(xyz_str: str,
     return '\n'.join(processed_lines)
 
 
+def str_to_str(*args, **kwargs) -> str:
+    """
+    Backwards compatible wrapper for reorder_xyz_string.
+    """
+    warnings.warn("str_to_str was renamed to reorder_xyz_string", DeprecationWarning)
+    return reorder_xyz_string(*args, **kwargs)
+
+
 def str_to_xyz(xyz_str: str,
                project_directory: Optional[str] = None,
                ) -> dict:
diff --git a/arc/species/converter_test.py b/arc/species/converter_test.py
index 8203d2f56c..9f2b7d25b9 100644
--- a/arc/species/converter_test.py
+++ b/arc/species/converter_test.py
@@ -686,6 +686,25 @@ def test_str_to_xyz(self):
         xyz = converter.str_to_xyz(xyz_format)
         self.assertEqual(xyz, expected_xyz)
 
+    def test_reorder_xyz_string_atom_first(self):
+        """Test reordering atom-first XYZ strings with unit conversion"""
+        xyz_format = "C 0.0 1.0 2.0\nH -1.0 0.5 0.0"
+        converted = converter.reorder_xyz_string(xyz_str=xyz_format, reverse_atoms=True, convert_to="bohr")
+        expected = "0.0 1.8897259886 3.7794519772 C\n-1.8897259886 0.9448629943 0.0 H"
+        self.assertEqual(converted, expected)
+
+    def test_reorder_xyz_string_coordinate_first(self):
+        """Test reordering coordinate-first XYZ strings back to atom-last order with conversion"""
+        xyz_format = "0.0 0.0 0.0 N\n1.0 0.0 0.0 H"
+        converted = converter.reorder_xyz_string(
+            xyz_str=xyz_format,
+            reverse_atoms=False,
+            units="bohr",
+            convert_to="angstrom",
+        )
+        expected = "0.0 0.0 0.0 N\n0.529177 0.0 0.0 H"
+        self.assertEqual(converted, expected)
+
     def test_xyz_to_str(self):
         """Test converting an ARC xyz format to a string xyz format"""
         xyz_str1 = converter.xyz_to_str(xyz_dict=self.xyz1['dict'])

From 8c115258448bdc9fdf3b91ccaacc09266eb85af6 Mon Sep 17 00:00:00 2001
From: Calvin Pieters <calvinpieters@gmail.com>
Date: Fri, 28 Nov 2025 00:19:24 +0200
Subject: [PATCH 3/9] Added AGENTS.md to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 62485f1faa..d3df50e3b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -67,3 +67,4 @@ build/*
 
 *.log
 *.xml
+AGENTS.md

From 8f2d1478ad63878fd1136ecc4c3406fdbe140a7d Mon Sep 17 00:00:00 2001
From: Calvin Pieters <calvinpieters@gmail.com>
Date: Fri, 28 Nov 2025 00:46:30 +0200
Subject: [PATCH 4/9] ARC Installation fixes

---
 Makefile                       |  6 ++-
 devtools/crest_environment.yml |  6 +++
 devtools/install_all.sh        | 32 +++++++++++-----
 devtools/install_autotst.sh    | 69 ++++++++++++++++++++++++++++++----
 devtools/install_crest.sh      | 63 +++++++++++++++++++++++++++++++
 devtools/install_gcn.sh        | 67 ++++++++++++++++-----------------
 devtools/install_pyrdl.sh      |  4 +-
 devtools/install_rmg.sh        | 25 +++++++++---
 devtools/install_torchani.sh   |  5 ++-
 9 files changed, 214 insertions(+), 63 deletions(-)
 create mode 100644 devtools/crest_environment.yml
 create mode 100644 devtools/install_crest.sh

diff --git a/Makefile b/Makefile
index 9e40dd07e7..84f4331103 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DEVTOOLS_DIR := devtools
 
 .PHONY: all help clean test test-unittests test-functional test-all \
         install-all install-ci install-pyrdl install-rmgdb install-autotst install-gcn \
-        install-gcn-cpu install-kinbot install-sella install-xtb install-torchani install-ob \
+        install-gcn-cpu install-kinbot install-sella install-xtb install-crest install-torchani install-ob \
         lite check-env compile
 
 
@@ -35,6 +35,7 @@ help:
 	@echo "  install-kinbot   Install KinBot"
 	@echo "  install-sella    Install Sella"
 	@echo "  install-xtb      Install xTB"
+	@echo "  install-crest    Install CREST"
 	@echo "  install-torchani Install TorchANI"
 	@echo "  install-ob       Install OpenBabel"
 	@echo ""
@@ -96,6 +97,9 @@ install-sella:
 install-xtb:
 	bash $(DEVTOOLS_DIR)/install_xtb.sh
 
+install-crest:
+	bash $(DEVTOOLS_DIR)/install_crest.sh
+
 install-torchani:
 	bash $(DEVTOOLS_DIR)/install_torchani.sh
 
diff --git a/devtools/crest_environment.yml b/devtools/crest_environment.yml
new file mode 100644
index 0000000000..2291e72d37
--- /dev/null
+++ b/devtools/crest_environment.yml
@@ -0,0 +1,6 @@
+name: crest_env
+channels:
+  - conda-forge
+dependencies:
+  - python>=3.7
+  - crest=2.12
diff --git a/devtools/install_all.sh b/devtools/install_all.sh
index c958fdd548..e4fe750ea2 100644
--- a/devtools/install_all.sh
+++ b/devtools/install_all.sh
@@ -26,6 +26,8 @@ run_devtool () { bash "$DEVTOOLS_DIR/$1" "${@:2}"; }
 SKIP_CLEAN=false
 SKIP_EXT=false
 SKIP_ARC=false
+SKIP_RMG=false
+ARC_INSTALLED=false
 RMG_ARGS=()
 ARC_ARGS=()
 EXT_ARGS=()
@@ -36,6 +38,7 @@ while [[ $# -gt 0 ]]; do
         --no-clean) SKIP_CLEAN=true ;;
         --no-ext)   SKIP_EXT=true  ;;
         --no-arc)   SKIP_ARC=true  ;;
+        --no-rmg)   SKIP_RMG=true  ;;
         --rmg-*)    RMG_ARGS+=("--${1#--rmg-}") ;;
         --arc-*)    ARC_ARGS+=("--${1#--arc-}") ;;
         --ext-*)    EXT_ARGS+=("--${1#--ext-}") ;;
@@ -44,6 +47,7 @@ while [[ $# -gt 0 ]]; do
 Usage: $0 [global-flags] [--rmg-xxx] [--arc-yyy] [--ext-zzz]
   --no-clean          Skip micromamba/conda cache cleanup
   --no-ext            Skip external tools (AutoTST, KinBot, …)
+  --no-rmg            Skip RMG-Py entirely
   --rmg-path          Forward '--path' to RMG installer
   --rmg-pip           Forward '--pip'  to RMG installer
   ...
@@ -67,16 +71,15 @@ echo "    EXT sub-flags : ${EXT_ARGS[*]:-(none)}"
 echo ">>> Beginning full ARC external repo installation…"
 pushd . >/dev/null
 
-# 1) RMG
-echo "=== Installing RMG ==="
-run_devtool install_rmg.sh "${RMG_ARGS[@]}"
-
-
- # 2) PyRDL
- echo "=== Installing PyRDL ==="
- bash devtools/install_pyrdl.sh
+# 1) RMG (optional)
+if [[ $SKIP_RMG == false ]]; then
+    echo "=== Installing RMG ==="
+    run_devtool install_rmg.sh "${RMG_ARGS[@]}"
+else
+    echo "ℹ️  --no-rmg flag set. Skipping RMG installation."
+fi
 
-# 3) ARC itself (skip env creation in CI or if user requests it)
+# 2) ARC itself (skip env creation in CI or if user requests it)
 if [[ "${CI:-false}" != "true" && "${SKIP_ARC:-false}" != "true" ]]; then
     if [[ $SKIP_CLEAN == false ]]; then
         echo "=== Cleaning up old ARC build artifacts ==="
@@ -88,10 +91,20 @@ if [[ "${CI:-false}" != "true" && "${SKIP_ARC:-false}" != "true" ]]; then
 
     echo "=== Installing ARC ==="
     run_devtool install_arc.sh "${ARC_ARGS[@]}"
+    ARC_INSTALLED=true
 else
+    ARC_INSTALLED=false
     echo ":information_source:  CI detected or --no-arc flag set. Skip cleaning ARC installation."
 fi
 
+# 3) PyRDL (depends on ARC)
+if [[ $ARC_INSTALLED == true ]]; then
+    echo "=== Installing PyRDL ==="
+    bash devtools/install_pyrdl.sh
+else
+    echo "ℹ️  Skipping PyRDL install because ARC installation was skipped."
+fi
+
 if [[ $SKIP_EXT == false ]]; then
     # map of friendly names → installer scripts
     declare -A EXT_INSTALLERS=(
@@ -100,6 +113,7 @@ if [[ $SKIP_EXT == false ]]; then
         [KinBot]=install_kinbot.sh
         [OpenBabel]=install_ob.sh
         [xtb]=install_xtb.sh
+        [CREST]=install_crest.sh
         [Sella]=install_sella.sh
         [TorchANI]=install_torchani.sh
     )
diff --git a/devtools/install_autotst.sh b/devtools/install_autotst.sh
index 5e3bc35288..0dfc34c63c 100644
--- a/devtools/install_autotst.sh
+++ b/devtools/install_autotst.sh
@@ -51,15 +51,36 @@ write_hook () {
     cat >"$act" <<EOF
 # AutoTST hook – $(date +%F)
 export AUTOTST_ROOT="$repo_path"
+export AUTOTST_OLD_PATH="\$PATH"
+export AUTOTST_OLD_PYTHONPATH="\${PYTHONPATH:-}"
+
+_strip_path () { local needle=":\$1:"; local haystack=":\$2:"; echo "\${haystack//\$needle/:}" | sed 's/^://;s/:$//'; }
+EOF
+
+    if [[ "$env" == "tst_env" ]]; then
+        cat >>"$act" <<'EOF'
+# Remove RMG-Py from PATH/PYTHONPATH to avoid clashes while AutoTST is active.
+if [[ -n "${RMG_PY_PATH:-}" ]]; then
+    export PATH="$(_strip_path "$RMG_PY_PATH" "$PATH")"
+    export PYTHONPATH="$(_strip_path "$RMG_PY_PATH" "${PYTHONPATH:-}")"
+fi
+EOF
+    fi
+
+    cat >>"$act" <<'EOF'
 case ":\$PYTHONPATH:" in *":\$AUTOTST_ROOT:"*) ;; \
   *) export PYTHONPATH="\$AUTOTST_ROOT:\${PYTHONPATH:-}" ;; esac
 EOF
 
     # --- de-activation -----------------------------------------------------
     cat >"$deact" <<'EOF'
-_strip () { local n=":$1:"; local s=":$2:"; echo "${s//$n/:}" | sed 's/^://;s/:$//'; }
-export PYTHONPATH=$(_strip "$AUTOTST_ROOT" ":${PYTHONPATH:-}:")
-unset AUTOTST_ROOT
+export PATH="${AUTOTST_OLD_PATH:-$PATH}"
+if [[ -n "${AUTOTST_OLD_PYTHONPATH+x}" ]]; then
+    export PYTHONPATH="$AUTOTST_OLD_PYTHONPATH"
+else
+    unset PYTHONPATH
+fi
+unset AUTOTST_ROOT AUTOTST_OLD_PATH AUTOTST_OLD_PYTHONPATH
 EOF
     echo "🔗  AutoTST hook refreshed in $env"
 }
@@ -115,12 +136,44 @@ fi
 
 if [[ $MODE == "path" ]]; then
 
-    AUTO_PATH_LINE="export PYTHONPATH=\"\$PYTHONPATH:$(pwd)\""
-    if ! grep -Fqx "$AUTO_PATH_LINE" ~/.bashrc; then
-        echo "$AUTO_PATH_LINE" >> ~/.bashrc
-        echo "✔️ Added AutoTST path to ~/.bashrc"
+    HOOK_SENTINEL="# AutoTST path-mode hook"
+    if ! grep -Fqx "$HOOK_SENTINEL" ~/.bashrc; then
+        cat <<'EOF' >> ~/.bashrc
+# AutoTST path-mode hook
+autotst_on () {
+    export AUTOTST_ROOT="__AUTOTST_PATH__"
+    export AUTOTST_OLD_PATH="$PATH"
+    export AUTOTST_OLD_PYTHONPATH="${PYTHONPATH:-}"
+
+    _strip_path () { local needle=":$1:"; local haystack=":$2:"; echo "${haystack//$needle/:}" | sed 's/^://;s/:$//'; }
+    if [[ -n "${RMG_PY_PATH:-}" ]]; then
+        PATH="$(_strip_path "$RMG_PY_PATH" "$PATH")"
+        PYTHONPATH="$(_strip_path "$RMG_PY_PATH" "${PYTHONPATH:-}")"
+    fi
+
+    case ":$PYTHONPATH:" in *":$AUTOTST_ROOT:"*) ;; \
+      *) PYTHONPATH="$AUTOTST_ROOT:${PYTHONPATH:-}" ;; esac
+    export PATH PYTHONPATH
+}
+
+autotst_off () {
+    export PATH="${AUTOTST_OLD_PATH:-$PATH}"
+    if [[ -n "${AUTOTST_OLD_PYTHONPATH+x}" ]]; then
+        export PYTHONPATH="$AUTOTST_OLD_PYTHONPATH"
+    else
+        unset PYTHONPATH
+    fi
+    unset AUTOTST_ROOT AUTOTST_OLD_PATH AUTOTST_OLD_PYTHONPATH
+}
+
+# Enable AutoTST by default in new shells and keep RMG-Py out of the way.
+autotst_on
+EOF
+        # replace placeholder with actual path
+        sed -i "s#__AUTOTST_PATH__#$(pwd | sed 's#/#\\\\/#g')#" ~/.bashrc
+        echo "✔️ Added AutoTST path-mode hook to ~/.bashrc"
     else
-        echo "ℹ️ AutoTST path already exists in ~/.bashrc"
+        echo "ℹ️ AutoTST path-mode hook already exists in ~/.bashrc"
     fi
 elif [[ $MODE == "conda" ]]; then
     write_hook tst_env   "$(pwd)"
diff --git a/devtools/install_crest.sh b/devtools/install_crest.sh
new file mode 100644
index 0000000000..f6df81e2a5
--- /dev/null
+++ b/devtools/install_crest.sh
@@ -0,0 +1,63 @@
+#!/bin/bash -l
+set -eo pipefail
+
+if command -v micromamba &> /dev/null; then
+    echo "✔️ Micromamba is installed."
+    COMMAND_PKG=micromamba
+elif command -v mamba &> /dev/null; then
+    echo "✔️ Mamba is installed."
+    COMMAND_PKG=mamba
+elif command -v conda &> /dev/null; then
+    echo "✔️ Conda is installed."
+    COMMAND_PKG=conda
+else
+    echo "❌ Micromamba, Mamba, or Conda is required. Please install one."
+    exit 1
+fi
+
+if [ "$COMMAND_PKG" = "micromamba" ]; then
+    eval "$(micromamba shell hook --shell=bash)"
+else
+    BASE=$(conda info --base)
+    . "$BASE/etc/profile.d/conda.sh"
+fi
+
+ENV_FILE="devtools/crest_environment.yml"
+
+if [ ! -f "$ENV_FILE" ]; then
+    echo "❌ File not found: $ENV_FILE"
+    exit 1
+fi
+
+if $COMMAND_PKG env list | grep -q '^crest_env\s'; then
+    echo ">>> Updating existing crest_env..."
+    $COMMAND_PKG env update -n crest_env -f "$ENV_FILE" --prune
+else
+    echo ">>> Creating new crest_env..."
+    $COMMAND_PKG env create -n crest_env -f "$ENV_FILE" -y
+fi
+
+if [ "$COMMAND_PKG" = "micromamba" ]; then
+    micromamba activate crest_env
+else
+    conda activate crest_env
+fi
+
+echo ">>> Checking CREST installation..."
+
+if crest --version &> /dev/null; then
+    version_output=$(crest --version 2>&1 | head -n 1)
+    echo "$version_output"
+    if ! grep -q "2\\.12" <<< "$version_output"; then
+        echo "❌ CREST version mismatch (expected 2.12)."
+        exit 1
+    fi
+    echo "✔️ CREST 2.12 is successfully installed."
+else
+    echo "❌ CREST is not found in PATH. Please check the environment."
+    exit 1
+fi
+
+$COMMAND_PKG deactivate
+
+echo "✅ Done installing CREST (crest_env)."
diff --git a/devtools/install_gcn.sh b/devtools/install_gcn.sh
index 8f83a2cda1..5273353d77 100644
--- a/devtools/install_gcn.sh
+++ b/devtools/install_gcn.sh
@@ -93,12 +93,12 @@ write_hook() {  # env_name  repo_path
   rm -f "$act" "$deact"
 
   # --- activation hook -----------------------------------------------------
-  cat <<'ACTHOOK' >"$act"
+  cat <<ACTHOOK >"$act"
 # TS-GCN hook – $(date +%F)
 export TSGCN_ROOT="$repo"
-case ":$PYTHONPATH:" in
-  *":$TSGCN_ROOT:") ;; \
-  *) export PYTHONPATH="$TSGCN_ROOT:\${PYTHONPATH:-}" ;; 
+case ":\$PYTHONPATH:" in
+  *":\$TSGCN_ROOT:") ;; \
+  *) export PYTHONPATH="\$TSGCN_ROOT:\${PYTHONPATH:-}" ;; 
 esac
 ACTHOOK
 
@@ -182,46 +182,43 @@ CORE_PKGS=(
 
 # ── inline env creation & unified PyTorch install --------------------------
 if $COMMAND_PKG env list | awk '{print $1}' | grep -qx ts_gcn; then
-  $COMMAND_PKG env update -n ts_gcn \
+  $COMMAND_PKG install -n ts_gcn \
     -c schrodinger -c conda-forge \
     --channel-priority flexible \
     "${CORE_PKGS[@]}" \
-    --prune -y
+    --yes
 else
-  $COMMAND_PKG env create -n ts_gcn \
+  $COMMAND_PKG create -n ts_gcn \
     -c schrodinger -c conda-forge \
     --channel-priority flexible \
     "${CORE_PKGS[@]}" \
-    -y
+    --yes
 fi
-  # 2) activate it - we set +u to avoid printing variable names
-  #    that are not set yet
-  set +u; $COMMAND_PKG activate ts_gcn; set -u
-
-  # 3) pip‐install exactly the CPU or CUDA wheels (no ROCm on that index)
-  WHEEL=https://download.pytorch.org/whl/torch_stable.html
-  if [[ $CUDA_VERSION == cpu ]]; then
-pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f $WHEEL
-  else
-    pip install torch==1.7.1+${CUDA_VERSION} \
-                torchvision==0.8.2+${CUDA_VERSION} \
-                torchaudio==0.7.2+${CUDA_VERSION} \
-      -f $WHEEL
-  fi
-  # for PyG wheels use the official PyG index—with a real '+' in the URL
-  TORCH_VER=1.7.1
-  WHEEL_URL="https://pytorch-geometric.com/whl/torch-${TORCH_VER}+${CUDA_VERSION}.html"
-
 
-  # install ONLY the prebuilt binaries, never fall back to source
-  pip install torch-scatter     -f "$WHEEL_URL" --only-binary torch-scatter
-  pip install torch-sparse      -f "$WHEEL_URL" --only-binary torch-sparse
-  pip install torch-cluster     -f "$WHEEL_URL" --only-binary torch-cluster
-  pip install torch-spline-conv -f "$WHEEL_URL" --only-binary torch-spline-conv
-
-  # finally the meta‐package (this one can install from PyPI)
-  pip install torch-geometric
-  echo "✅ ts_gcn environment ready"
+# 2) pip‐install exactly the CPU or CUDA wheels (no ROCm on that index)
+PIP_RUN=("$COMMAND_PKG" run -n ts_gcn)
+WHEEL=https://download.pytorch.org/whl/torch_stable.html
+if [[ $CUDA_VERSION == cpu ]]; then
+  "${PIP_RUN[@]}" pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f $WHEEL
+else
+  "${PIP_RUN[@]}" pip install torch==1.7.1+${CUDA_VERSION} \
+                              torchvision==0.8.2+${CUDA_VERSION} \
+                              torchaudio==0.7.2+${CUDA_VERSION} \
+    -f $WHEEL
+fi
+# for PyG wheels use the official PyG index—with a real '+' in the URL
+TORCH_VER=1.7.1
+WHEEL_URL="https://pytorch-geometric.com/whl/torch-${TORCH_VER}+${CUDA_VERSION}.html"
+
+# install ONLY the prebuilt binaries, never fall back to source
+"${PIP_RUN[@]}" pip install torch-scatter     -f "$WHEEL_URL" --only-binary torch-scatter
+"${PIP_RUN[@]}" pip install torch-sparse      -f "$WHEEL_URL" --only-binary torch-sparse
+"${PIP_RUN[@]}" pip install torch-cluster     -f "$WHEEL_URL" --only-binary torch-cluster
+"${PIP_RUN[@]}" pip install torch-spline-conv -f "$WHEEL_URL" --only-binary torch-spline-conv
+
+# finally the meta‐package (this one can install from PyPI)
+"${PIP_RUN[@]}" pip install torch-geometric
+echo "✅ ts_gcn environment ready"
 
 # ── write hooks into conda envs if required -------------------------------
 if [[ $MODE == conda ]]; then
diff --git a/devtools/install_pyrdl.sh b/devtools/install_pyrdl.sh
index 2b2cc9340c..87f1ccf454 100644
--- a/devtools/install_pyrdl.sh
+++ b/devtools/install_pyrdl.sh
@@ -49,8 +49,8 @@ fi
 
 # Ensure CMake is installed in the environment
 if ! command -v cmake &> /dev/null; then
-    echo "Installing CMake..."
-    "$COMMAND_PKG" install -y cmake
+    echo "Installing CMake into arc_env..."
+    "$COMMAND_PKG" install -n arc_env -c conda-forge -y cmake
 fi
 
 # Clone and build RingDecomposerLib
diff --git a/devtools/install_rmg.sh b/devtools/install_rmg.sh
index 036a6449ce..0a35192627 100644
--- a/devtools/install_rmg.sh
+++ b/devtools/install_rmg.sh
@@ -176,21 +176,34 @@ fi
 
 ACTIVE_RE="^[[:space:]]*[^#].*${RMG_PY_PATH//\//\\/}"   # uncommented, contains path
 COMMENT_RE="^[[:space:]]*#.*${RMG_PY_PATH//\//\\/}"     # commented-out, contains path
-NEW_LINE='export PATH="$PATH:'"$RMG_PY_PATH"'"'
 
 
-# If PATH_ADD is true, add RMG-Py to PATH
+# If PATH_ADD is true, add RMG-Py to PATH/PYTHONPATH via bashrc
 if [ "$MODE" == path ]; then
     if grep -Eq "$ACTIVE_RE" "$RC"; then
         printf 'ℹ️  RMG-Py already active in %s\n' "$RC"
 
     elif grep -Eq "$COMMENT_RE" "$RC"; then
-        printf '✅  Found commented entry; adding new active line\n'
-        printf '\n# RMG-Py added on %s\n%s\n' "$(date +%F)" "$NEW_LINE" >> "$RC"
+        printf '✅  Found commented entry; adding new active block\n'
+        cat <<EOF >> "$RC"
+
+# RMG-Py added on $(date +%F)
+export RMG_PY_PATH="$RMG_PY_PATH"
+case ":\$PATH:" in *":\$RMG_PY_PATH:"*) ;; *) export PATH="\$PATH:\$RMG_PY_PATH" ;; esac
+case "\${PYTHONPATH:+:\$PYTHONPATH:}" in *":\$RMG_PY_PATH:"*) ;; \
+  *) export PYTHONPATH="\${PYTHONPATH:+\$PYTHONPATH:}\$RMG_PY_PATH" ;; esac
+EOF
 
     else
-        printf '✅  No entry found; adding new active line\n'
-        printf '\n# RMG-Py added on %s\n%s\n' "$(date +%F)" "$NEW_LINE" >> "$RC"
+        printf '✅  No entry found; adding new active block\n'
+        cat <<EOF >> "$RC"
+
+# RMG-Py added on $(date +%F)
+export RMG_PY_PATH="$RMG_PY_PATH"
+case ":\$PATH:" in *":\$RMG_PY_PATH:"*) ;; *) export PATH="\$PATH:\$RMG_PY_PATH" ;; esac
+case "\${PYTHONPATH:+:\$PYTHONPATH:}" in *":\$RMG_PY_PATH:"*) ;; \
+  *) export PYTHONPATH="\${PYTHONPATH:+\$PYTHONPATH:}\$RMG_PY_PATH" ;; esac
+EOF
     fi
 elif [ "$MODE" == conda ]; then
     # conda envs already have the RMG_PY_PATH in PATH, so no need to add it
diff --git a/devtools/install_torchani.sh b/devtools/install_torchani.sh
index 2ceca9ac5c..1536609abf 100644
--- a/devtools/install_torchani.sh
+++ b/devtools/install_torchani.sh
@@ -2,9 +2,10 @@
 set -euo pipefail
 
 # Enable tracing of each command, but tee it to a logfile
+LOGFILE="tani_env_setup.log"
 exec 3>&1 4>&2
 trap 'exec 2>&4 1>&3' EXIT
-exec 1> >(tee   .log) 2>&1
+exec 1> >(tee "$LOGFILE") 2>&1
 set -x
 
 echo ">>> Starting TANI environment setup at $(date)"
@@ -53,7 +54,7 @@ fi
 echo ">>> Creating conda env from $ENV_YAML (name=$ENV_NAME)"
 if ! $COMMAND_PKG env create -n "$ENV_NAME" -f "$ENV_YAML" -v; then
     echo "❌  Environment creation failed. Dumping last 200 lines of log:"
-    tail -n 200 tani_env_setup.log
+    tail -n 200 "$LOGFILE"
     echo "---- Disk usage at failure ----"
     df -h .
     exit 1

From d2f5e031715ba686600b351eae533e91e4102a7b Mon Sep 17 00:00:00 2001
From: Calvin Pieters <calvinpieters@gmail.com>
Date: Fri, 28 Nov 2025 14:56:06 +0200
Subject: [PATCH 5/9] Added CREST test and also fixed CodeQL errors

---
 arc/job/adapters/ts/autotst_ts.py |   1 -
 arc/job/adapters/ts/crest.py      |   8 ++-
 arc/job/adapters/ts/crest_test.py | 100 ++++++++++++++++++++++++++++++
 arc/job/adapters/ts/heuristics.py |   3 -
 arc/settings/settings.py          |  12 ----
 5 files changed, 105 insertions(+), 19 deletions(-)
 create mode 100644 arc/job/adapters/ts/crest_test.py

diff --git a/arc/job/adapters/ts/autotst_ts.py b/arc/job/adapters/ts/autotst_ts.py
index d2058ecddc..d199c8e7d2 100644
--- a/arc/job/adapters/ts/autotst_ts.py
+++ b/arc/job/adapters/ts/autotst_ts.py
@@ -27,7 +27,6 @@
 
 if TYPE_CHECKING:
     from arc.level import Level
-    from autotst.reaction import Reaction as AutoTST_Reaction  # noqa: F401
 
 
 AUTOTST_PYTHON = settings['AUTOTST_PYTHON']
diff --git a/arc/job/adapters/ts/crest.py b/arc/job/adapters/ts/crest.py
index 7e8a5a0fc3..8693c0918f 100644
--- a/arc/job/adapters/ts/crest.py
+++ b/arc/job/adapters/ts/crest.py
@@ -140,7 +140,7 @@ def crest_ts_conformer_search(
 
             with open(os.path.join(path, "job.sh"), "w") as f:
                 f.write(crest_job)
-            os.chmod(os.path.join(path, "job.sh"), 0o777)
+            os.chmod(os.path.join(path, "job.sh"), 0o700)
 
             # Pre-create out/err for any status checkers that expect them
             for fname in ("out.txt", "err.txt"):
@@ -148,7 +148,7 @@ def crest_ts_conformer_search(
                 if not os.path.exists(fpath):
                     with open(fpath, "w") as f:
                         f.write("")
-                    os.chmod(fpath, 0o777)
+                    os.chmod(fpath, 0o600)
 
         elif cluster_soft == "pbs":
             # PBS branch that matches your 'crest' template above
@@ -172,7 +172,7 @@ def crest_ts_conformer_search(
             submit_path = os.path.join(path, submit_filename)
             with open(submit_path, "w") as f:
                 f.write(sub_job)
-            os.chmod(submit_path, 0o755)
+            os.chmod(submit_path, 0o750)
 
         else:
             raise ValueError(f"Unsupported cluster_soft for CREST: {cluster_soft!r}")
@@ -367,3 +367,5 @@ def get_h_abs_atoms(dataframe: pd.DataFrame) -> dict:
             }
         else:
             raise ValueError("No valid hydrogen atom found.")
+
+    return {}
diff --git a/arc/job/adapters/ts/crest_test.py b/arc/job/adapters/ts/crest_test.py
new file mode 100644
index 0000000000..1d5320ad5e
--- /dev/null
+++ b/arc/job/adapters/ts/crest_test.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+
+"""
+Unit tests for arc.job.adapters.ts.crest
+"""
+
+import os
+import tempfile
+import unittest
+
+from arc.species.converter import str_to_xyz
+
+
+class TestCrestAdapter(unittest.TestCase):
+    """
+    Tests for CREST input generation.
+    """
+
+    def setUp(self):
+        self.tmpdir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        self.tmpdir.cleanup()
+
+    def test_creates_valid_input_files(self):
+        """
+        Ensure CREST inputs are written with expected content/format.
+        """
+        from arc.job.adapters.ts import crest as crest_mod
+
+        xyz = str_to_xyz(
+            """O 0.0 0.0 0.0
+               H 0.0 0.0 0.96
+               H 0.9 0.0 0.0"""
+        )
+
+        backups = {
+            "settings": crest_mod.settings,
+            "submit_scripts": crest_mod.submit_scripts,
+            "CREST_PATH": crest_mod.CREST_PATH,
+            "CREST_ENV_PATH": crest_mod.CREST_ENV_PATH,
+            "SERVERS": crest_mod.SERVERS,
+        }
+
+        try:
+            crest_mod.settings = {"submit_filenames": {"PBS": "submit.sh"}}
+            crest_mod.submit_scripts = {
+                "local": {
+                    "crest": (
+                        "#PBS -q {queue}\n"
+                        "#PBS -N {name}\n"
+                        "#PBS -l select=1:ncpus={cpus}:mem={memory}gb\n"
+                    ),
+                    "crest_job": "{activation_line}\ncd {path}\n{commands}\n",
+                }
+            }
+            crest_mod.CREST_PATH = "/usr/bin/crest"
+            crest_mod.CREST_ENV_PATH = ""
+            crest_mod.SERVERS = {
+                "local": {"cluster_soft": "pbs", "cpus": 4, "memory": 8, "queue": "testq"}
+            }
+
+            crest_dir = crest_mod.crest_ts_conformer_search(
+                xyz_guess=xyz, a_atom=0, h_atom=1, b_atom=2, path=self.tmpdir.name, xyz_crest_int=0
+            )
+
+            coords_path = os.path.join(crest_dir, "coords.ref")
+            constraints_path = os.path.join(crest_dir, "constraints.inp")
+            submit_path = os.path.join(crest_dir, "submit.sh")
+
+            self.assertTrue(os.path.exists(coords_path))
+            self.assertTrue(os.path.exists(constraints_path))
+            self.assertTrue(os.path.exists(submit_path))
+
+            with open(coords_path) as f:
+                coords = f.read().strip().splitlines()
+            self.assertEqual(coords[0].strip(), "$coord")
+            self.assertEqual(coords[-1].strip(), "$end")
+            self.assertEqual(len(coords) - 2, len(xyz["symbols"]))
+
+            with open(constraints_path) as f:
+                constraints = f.read()
+            self.assertIn("atoms: 1, 2, 3", constraints)
+            self.assertIn("force constant: 0.5", constraints)
+            self.assertIn("reference=coords.ref", constraints)
+            self.assertIn("distance: 1, 2, auto", constraints)
+            self.assertIn("distance: 2, 3, auto", constraints)
+            self.assertIn("$metadyn", constraints)
+            self.assertTrue(constraints.strip().endswith("$end"))
+        finally:
+            crest_mod.settings = backups["settings"]
+            crest_mod.submit_scripts = backups["submit_scripts"]
+            crest_mod.CREST_PATH = backups["CREST_PATH"]
+            crest_mod.CREST_ENV_PATH = backups["CREST_ENV_PATH"]
+            crest_mod.SERVERS = backups["SERVERS"]
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/arc/job/adapters/ts/heuristics.py b/arc/job/adapters/ts/heuristics.py
index a97f0f0c36..1c15121755 100644
--- a/arc/job/adapters/ts/heuristics.py
+++ b/arc/job/adapters/ts/heuristics.py
@@ -37,8 +37,6 @@
     compare_zmats,
     relocate_zmat_dummy_atoms_to_the_end,
     str_to_xyz,
-    xyz_to_dmat,
-    xyz_to_str,
     zmat_from_xyz,
     zmat_to_xyz,
 )
@@ -303,7 +301,6 @@ def execute_incore(self):
 
             if len(self.reactions) < 5:
                 successes = [tsg for tsg in rxn.ts_species.ts_guesses if tsg.success]
-                heuristics_successes = len([tsg for tsg in successes if 'heuristics' in tsg.method.lower()])
                 crest_successes = len([tsg for tsg in successes if 'crest' in tsg.method.lower()])
                 if successes:
                     logger.info(f'Heuristics successfully found {len(successes)} TS guesses for {rxn.label}.')
diff --git a/arc/settings/settings.py b/arc/settings/settings.py
index 5315fa7598..50f1418504 100644
--- a/arc/settings/settings.py
+++ b/arc/settings/settings.py
@@ -385,19 +385,7 @@ def find_executable(env_name, executable_name='python'):
 
 for crest_path in [crest_path1, crest_path2, crest_path3, crest_path4, crest_path5, crest_path6, crest_path7]:
     if os.path.isfile(crest_path):
-        CREST_PATH = crest_path
         # check if using micromamba, mambaforge, anaconda3, miniconda3, or .conda
-        if 'micromamba' in crest_path:
-            #         CREST_ENV_PATH = "source ~/micromamba/etc/profile.d/micromamba.sh && micromamba activate crest_env"
-            CREST_ENV_PATH = "source ~/.bashrc && micromamba activate crest_env"
-        elif 'mambaforge' in crest_path:
-            CREST_ENV_PATH = "source ~/.bashrc && mamba activate crest_env"
-        elif 'anaconda3' in crest_path:
-            CREST_ENV_PATH = "source ~/.bashrc && conda activate crest_env"
-        elif 'miniconda3' in crest_path:
-            CREST_ENV_PATH = "source ~/.bashrc && conda activate crest_env"
-        elif '.conda' in crest_path:
-            CREST_ENV_PATH = "source ~/.bashrc && conda activate crest_env"
         break
 # If the path (environment) does not exist, then we use the binary
 

From f421c04e3acca932399f6f4eba1ea642f1828bf5 Mon Sep 17 00:00:00 2001
From: Calvin Pieters <calvinpieters@gmail.com>
Date: Fri, 28 Nov 2025 15:02:55 +0200
Subject: [PATCH 6/9]  Minor fixes

---
 arc/job/adapters/ts/autotst_ts.py | 4 ----
 arc/job/adapters/ts/crest.py      | 2 +-
 arc/job/adapters/ts/heuristics.py | 1 -
 3 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/arc/job/adapters/ts/autotst_ts.py b/arc/job/adapters/ts/autotst_ts.py
index d199c8e7d2..2eeffb7743 100644
--- a/arc/job/adapters/ts/autotst_ts.py
+++ b/arc/job/adapters/ts/autotst_ts.py
@@ -20,10 +20,6 @@
 from arc.species.species import ARCSpecies, TSGuess, colliding_atoms
 
 HAS_AUTOTST = True
-# try:
-#     from autotst.reaction import Reaction as AutoTST_Reaction
-# except (ImportError, ModuleNotFoundError):
-#     HAS_AUTOTST = False
 
 if TYPE_CHECKING:
     from arc.level import Level
diff --git a/arc/job/adapters/ts/crest.py b/arc/job/adapters/ts/crest.py
index 8693c0918f..e3a797e908 100644
--- a/arc/job/adapters/ts/crest.py
+++ b/arc/job/adapters/ts/crest.py
@@ -172,7 +172,7 @@ def crest_ts_conformer_search(
             submit_path = os.path.join(path, submit_filename)
             with open(submit_path, "w") as f:
                 f.write(sub_job)
-            os.chmod(submit_path, 0o750)
+            os.chmod(submit_path, 0o700)
 
         else:
             raise ValueError(f"Unsupported cluster_soft for CREST: {cluster_soft!r}")
diff --git a/arc/job/adapters/ts/heuristics.py b/arc/job/adapters/ts/heuristics.py
index 1c15121755..3aacaa3667 100644
--- a/arc/job/adapters/ts/heuristics.py
+++ b/arc/job/adapters/ts/heuristics.py
@@ -16,7 +16,6 @@
 """
 import datetime
 import itertools
-import os
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 
 from arc.common import almost_equal_coords, get_logger, is_angle_linear, is_xyz_linear, key_by_val

From 152dcbb15b84024ccba0b4f3ab72bd46c49c542f Mon Sep 17 00:00:00 2001
From: Calvin Pieters <calvinpieters@gmail.com>
Date: Fri, 28 Nov 2025 15:03:44 +0200
Subject: [PATCH 7/9] Stop tracking AGENTS.md

---
 AGENTS.md | 38 --------------------------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 AGENTS.md

diff --git a/AGENTS.md b/AGENTS.md
deleted file mode 100644
index 65a9b8337e..0000000000
--- a/AGENTS.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# Repository Guidelines
-
-## Project Structure & Module Organization
-- Core package lives in `arc/` (chemistry logic, schedulers, plotting utilities). Tests for these modules sit alongside as `*_test.py`.
-- Higher-level integration checks live in `functional/`. Examples and reproducible inputs are in `examples/` and `data/`.
-- Developer scripts are under `devtools/`; docs sources live in `docs/`; the CLI entry point is `ARC.py`.
-- Build helpers such as the `Makefile` and `utilities.py` are at the repo root; Docker assets are in `dockerfiles/` and `docker-compose.yml`.
-
-## Build, Test, and Development Commands
-- `python -m pip install -e .` — editable install for local development.
-- `make compile` — build the Cython extension (`arc.molecule`) in-place after dependency setup.
-- `make test` — run unit tests with coverage over `arc/`.
-- `make test-functional` — run functional/integration tests in `functional/`.
-- `make test-all` — run both suites with coverage; default report is `coverage.xml`.
-- `make clean` — remove build artifacts; `make check-env` prints Python path/version for debugging.
-
-## Coding Style & Naming Conventions
-- Follow PEP 8: 4-space indentation, readable line wraps (~100 cols), and descriptive variable names (species, reactions, jobs).
-- Prefer f-strings for logging and user messages; keep logging via the shared `logger` in `arc.common`.
-- Tests use `pytest` discovery; name files `*_test.py` and functions `test_*` near the code they cover.
-- Keep modules import-safe to avoid circular deps (e.g., `arc/common.py` avoids importing other ARC modules).
-
-## Testing Guidelines
-- When running tests or any code, you must activate the conda environment called arc_env
-- Primary framework: `pytest` (see `Makefile` targets above). Use `-ra -vv` locally when chasing failures.
-- Add unit tests under `arc/` for module-level behavior and functional tests under `functional/` for end-to-end job flows.
-- Aim to maintain existing coverage; `make test-all` produces `coverage.xml` for CI/codecov.
-- Record any required external programs (quantum chemistry engines, schedulers) in test docstrings or skip markers.
-
-## Commit & Pull Request Guidelines
-- Commit messages should be concise and action-oriented (e.g., `Fix species thermo parsing`, `Improve scheduler resubmission logging`).
-- Squash noisy WIP commits before raising a PR; keep each commit logically scoped (feature, fix, or refactor).
-- PRs should include: a brief summary, linked issues, test results (`make test`/`make test-functional` output), and notes on external requirements (e.g., queue systems, ORCA/Gaussian availability).
-- Add screenshots or sample log excerpts when changing plotting, logging, or job orchestration behavior to aid reviewers.
-
-## Environment & Configuration Tips
-- Use the provided `environment.yml` or `requirements.txt` to align dependencies; some features rely on external quantum chemistry backends configured via `arc/settings`.
-- Before running remote jobs, verify scheduler and credentials in your local settings, and prefer `make check-env` to confirm Python tooling paths.

From 55eb8ecdfbbf247612d3476070d9e647b9661053 Mon Sep 17 00:00:00 2001
From: Calvin Pieters <calvinpieters@gmail.com>
Date: Fri, 28 Nov 2025 15:09:40 +0200
Subject: [PATCH 8/9]  Installation documentation update

---
 docs/source/installation.rst | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 0d32bd5b86..26f3cdac8b 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -51,12 +51,20 @@ Install dependencies
 
      conda activate arc_env
 
-- Install the latest **DEVELOPER** version of RMG (which has Arkane).
-  It is recommended to follow RMG's `Developer installation by source using Anaconda
+- RMG-Py/Arkane is required: ARC relies on it for thermochemistry, kinetics parsing, conformer handling, and file formats.
+  Without RMG-Py, many ARC workflows (e.g., thermo, Arkane post‑processing) will not run.
+  You can either install RMG-Py manually following the `Developer install guide
   <http://reactionmechanismgenerator.github.io/RMG-Py/users/rmg/installation/index.html
-  #for-developers-installation-by-source-using-anaconda-environment>`_ instructions.
-  Make sure to add RMG-Py to your PATH and PYTHONPATH variables as explained in RMG's documentation.
-- Type ``make install-all`` under the ARC repository folder to install the following 3rd party repositories:
+  #for-developers-installation-by-source-using-anaconda-environment>`_ or let the ARC installers handle it automatically
+  (see below). Ensure RMG-Py is on ``PATH`` and ``PYTHONPATH`` (or use the conda hook option below) so ARC can import it.
+- Run ``make install`` under the ARC repository folder to install ARC plus RMG-Py/RMG-database (default bashrc/``PATH`` wiring),
+  PyRDL, and external tools (CREST, AutoTST, KinBot, TS-GCN, xTB, Sella, TorchANI, OpenBabel).
+  Use ``make install-lite`` (no externals) or call ``bash devtools/install_all.sh --no-rmg`` if you want to skip RMG.
+  These installers pin known-good versions (e.g., CREST 2.12) and set up per-tool conda environments where appropriate.
+  Additional installer knobs:
+
+  * ``bash devtools/install_all.sh --rmg-conda`` installs RMG-Py into a conda env and adds activation hooks (vs the default bashrc PATH/PYTHONPATH wiring).
+  * ``--no-ext`` skips external tools; ``--no-arc`` skips ARC env creation/compile (useful in CI); ``--no-clean`` retains conda/mamba caches.
   `AutoTST <https://github.com/ReactionMechanismGenerator/AutoTST>`_
   (`West et al. <https://doi.org/10.1021/acs.jpca.7b07361>`_),
   `KinBot <https://github.com/zadorlab/KinBot>`_

From 7b89b6e9a5f4be84dec43ccd2a1f6ec0b866532e Mon Sep 17 00:00:00 2001
From: Calvin Pieters <calvinpieters@gmail.com>
Date: Fri, 28 Nov 2025 17:58:08 +0200
Subject: [PATCH 9/9] Update fixes

---
 arc/job/adapters/ts/crest.py                  | 13 +++++--
 arc/species/converter.py                      |  1 +
 .../calcs/Species/spc1/spc1/err.txt           | 17 ---------
 .../calcs/Species/spc1/spc1/input.gjf         | 12 ------
 .../calcs/Species/spc1/spc1/submit.sub        | 37 -------------------
 devtools/install_autotst.sh                   |  8 +++-
 6 files changed, 17 insertions(+), 71 deletions(-)
 delete mode 100644 arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt
 delete mode 100644 arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf
 delete mode 100644 arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub

diff --git a/arc/job/adapters/ts/crest.py b/arc/job/adapters/ts/crest.py
index e3a797e908..2d6ce2a46a 100644
--- a/arc/job/adapters/ts/crest.py
+++ b/arc/job/adapters/ts/crest.py
@@ -303,11 +303,11 @@ def get_h_abs_atoms(dataframe: pd.DataFrame) -> dict:
     for hydrogen_key in hydrogen_keys:
         atom_neighbours = closest_atoms[hydrogen_key]
         is_heavy_present = any(
-            atom for atom in closest_atoms if not atom.startswith("H")
+            atom for atom in atom_neighbours if not atom.startswith("H")
         )
         if_hydrogen_present = any(
             atom
-            for atom in closest_atoms
+            for atom in atom_neighbours
             if atom.startswith("H") and atom != hydrogen_key
         )
 
@@ -330,7 +330,7 @@ def get_h_abs_atoms(dataframe: pd.DataFrame) -> dict:
                     distances = dataframe.loc[hydrogen_key, heavy_atoms].sum()
                     occurrence_distances.append((occurrence, distances))
                 except KeyError as e:
-                    print(f"Error accessing distances for occurrence {occurrence}: {e}")
+                    logger.error(f"Error accessing distances for occurrence {occurrence}: {e}")
 
             # Select the occurrence with the smallest distance
             best_occurrence = min(occurrence_distances, key=lambda x: x[1])[0]
@@ -339,6 +339,13 @@ def get_h_abs_atoms(dataframe: pd.DataFrame) -> dict:
                 "A": extract_digits(best_occurrence["A"]),
                 "B": extract_digits(best_occurrence["B"]),
             }
+        else:
+            single_occurrence = condition_occurrences[0]
+            return {
+                "H": extract_digits(single_occurrence["H"]),
+                "A": extract_digits(single_occurrence["A"]),
+                "B": extract_digits(single_occurrence["B"]),
+            }
     else:
 
         # Check the all the hydrogen atoms, and see the closest two heavy atoms and aggregate their distances to determine which Hyodrogen atom has the lowest distance aggregate
diff --git a/arc/species/converter.py b/arc/species/converter.py
index ef07cbd9f3..b2d7a5126e 100644
--- a/arc/species/converter.py
+++ b/arc/species/converter.py
@@ -57,6 +57,7 @@ def reorder_xyz_string(xyz_str: str,
     Args:
         xyz_str (str): The string xyz format to be converted.
         reverse_atoms (bool, optional): Whether to reverse the atoms and coordinates.
+        units (str, optional): Units of the input coordinates ('angstrom' or 'bohr').
         convert_to (str, optional): The units to convert to (either 'angstrom' or 'bohr').
         project_directory (str, optional): The path to the project directory.
     
diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt b/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt
deleted file mode 100644
index 17a55b3536..0000000000
--- a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-=>> PBS: job killed: walltime 86415 exceeded limit 86400
-Error: software termination
-   rax fffffffffffffffc, rbx 00007ffc0d4f90d0, rcx ffffffffffffffff
-   rdx 0000000000000000, rsp 00007ffc0d4f9098, rbp 0000000000000001
-   rsi 00007ffc0d4f90d0, rdi 0000000000038f1b, r8  00002b7af22a5700
-   r9  0000000000000000, r10 0000000000000000, r11 0000000000000246
-   r12 00007ffc0d4f90f0, r13 000000000000008f, r14 0000000000000000
-   r15 00007ffc0d4fff40
-Error: software termination
-   rax 0000000000024fa8, rbx 00002ae812e9f2c0, rcx 0000000000035498
-   rdx 00002ae8c4888bd0, rsp 00007ffde70fb680, rbp 00007ffde70fbf70
-   rsi 00002ae8c48be068, rdi 00002ae8c48f3508, r8  00002ae8c49289b0
-   r9  0000000000006a93, r10 0000000000006a95, r11 00002ae812ed4768
-   r12 00002ae812f66508, r13 00002ae812f9b9b0, r14 0000000000006a92
-   r15 00002ae81311f478
-  --- traceback not available
-  --- traceback not available
diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf b/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf
deleted file mode 100644
index 36f9d855ac..0000000000
--- a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf
+++ /dev/null
@@ -1,12 +0,0 @@
-%chk=check.chk
-%mem=14336mb
-%NProcShared=8
-
-#P opt=(calcfc)  cbs-qb3   IOp(2/9=2000)   
-
-spc1
-
-0 3
-O       0.00000000    0.00000000    1.00000000
-
-
diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub b/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub
deleted file mode 100644
index 00b840cd67..0000000000
--- a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash -l
-#SBATCH -p normal
-#SBATCH -J server1
-#SBATCH -N 1
-#SBATCH -n 8
-#SBATCH --time=120:00:00
-#SBATCH --mem-per-cpu=15770
-#SBATCH -o out.txt
-#SBATCH -e err.txt
-
-export g16root=/home/gridsan/groups/GRPAPI/Software
-export PATH=$g16root/g16/:$g16root/gv:$PATH
-which g16
-
-echo "============================================================"
-echo "Job ID : $SLURM_JOB_ID"
-echo "Job Name : $SLURM_JOB_NAME"
-echo "Starting on : $(date)"
-echo "Running on node : $SLURMD_NODENAME"
-echo "Current directory : $(pwd)"
-echo "============================================================"
-
-touch initial_time
-
-GAUSS_SCRDIR=/state/partition1/user/<username>/$SLURM_JOB_NAME-$SLURM_JOB_ID
-export $GAUSS_SCRDIR
-. $g16root/g16/bsd/g16.profile
-
-mkdir -p $GAUSS_SCRDIR
-
-g16 < input.gjf > input.log
-
-rm -rf $GAUSS_SCRDIR
-
-touch final_time
-
-        
\ No newline at end of file
diff --git a/devtools/install_autotst.sh b/devtools/install_autotst.sh
index 0dfc34c63c..7396326fde 100644
--- a/devtools/install_autotst.sh
+++ b/devtools/install_autotst.sh
@@ -140,12 +140,16 @@ if [[ $MODE == "path" ]]; then
     if ! grep -Fqx "$HOOK_SENTINEL" ~/.bashrc; then
         cat <<'EOF' >> ~/.bashrc
 # AutoTST path-mode hook
+_strip_path () {
+    local needle=":$1:"
+    local haystack=":$2:"
+    echo "${haystack//$needle/:}" | sed 's/^://;s/:$//'
+}
+
 autotst_on () {
     export AUTOTST_ROOT="__AUTOTST_PATH__"
     export AUTOTST_OLD_PATH="$PATH"
     export AUTOTST_OLD_PYTHONPATH="${PYTHONPATH:-}"
-
-    _strip_path () { local needle=":$1:"; local haystack=":$2:"; echo "${haystack//$needle/:}" | sed 's/^://;s/:$//'; }
     if [[ -n "${RMG_PY_PATH:-}" ]]; then
         PATH="$(_strip_path "$RMG_PY_PATH" "$PATH")"
         PYTHONPATH="$(_strip_path "$RMG_PY_PATH" "${PYTHONPATH:-}")"