From 033d1fadd5234a87bc1d6133e18797c92ce79245 Mon Sep 17 00:00:00 2001 From: Jintao Date: Mon, 22 Jan 2024 15:33:43 +0800 Subject: [PATCH 01/11] main.py sets up conf_generation_level --- arc/main.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arc/main.py b/arc/main.py index d7fb8131c9..22cdefc3be 100644 --- a/arc/main.py +++ b/arc/main.py @@ -81,6 +81,7 @@ class ARC(object): instead (e.g., ``opt_level``). composite_method (str, dict, Level, optional): Composite method. conformer_level (str, dict, Level, optional): Level of theory for conformer searches. + conf_generation_level (str, dict, Level, optional): Level of theory for conformer generation. opt_level (str, dict, Level, optional): Level of theory for geometry optimization. freq_level (str, dict, Level, optional): Level of theory for frequency calculations. sp_level (str, dict, Level, optional): Level of theory for single point calculations. @@ -167,6 +168,7 @@ class ARC(object): level_of_theory (str): A shortcut representing either sp//geometry levels or a composite method. composite_method (Level): Composite method. conformer_level (Level): Level of theory for conformer searches. + conf_generation_level (Level): Level of theory for conformer generation. opt_level (Level): Level of theory for geometry optimization. freq_level (Level): Level of theory for frequency calculations. sp_level (Level): Level of theory for single point calculations. @@ -242,6 +244,7 @@ def __init__(self, compute_thermo: bool = True, compute_transport: bool = False, conformer_level: Optional[Union[str, dict, Level]] = None, + conf_generation_level: Optional[Union[str, dict, Level]] = None, dont_gen_confs: List[str] = None, e_confs: float = 5.0, ess_settings: Dict[str, Union[str, List[str]]] = None, @@ -342,6 +345,7 @@ def __init__(self, self.level_of_theory = level_of_theory self.composite_method = composite_method or None self.conformer_level = conformer_level or None + self.conf_generation_level = conf_generation_level or None self.opt_level = opt_level or None self.freq_level = freq_level or None self.sp_level = sp_level or None @@ -485,6 +489,8 @@ def as_dict(self) -> dict: restart_dict['compute_transport'] = self.compute_transport if self.conformer_level is not None: restart_dict['conformer_level'] = self.conformer_level.as_dict() + if self.conf_generation_level is not None: + restart_dict['conf_generation_level'] = self.conf_generation_level.as_dict() if self.dont_gen_confs: restart_dict['dont_gen_confs'] = self.dont_gen_confs if self.ts_adapters: @@ -587,6 +593,7 @@ def execute(self) -> dict: rxn_list=self.reactions, composite_method=self.composite_method, conformer_level=self.conformer_level, + conf_generation_level=self.conf_generation_level, opt_level=self.opt_level, freq_level=self.freq_level, sp_level=self.sp_level, From 079cd4581f6eaa0ba4f841c669e0c992ad2b9bed Mon Sep 17 00:00:00 2001 From: Jintao Date: Sat, 27 Jan 2024 17:05:42 +0800 Subject: [PATCH 02/11] scheduler.py sets up conf_generation_level --- arc/scheduler.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arc/scheduler.py b/arc/scheduler.py index 1d1ec71868..164579d8c0 100644 --- a/arc/scheduler.py +++ b/arc/scheduler.py @@ -130,6 +130,7 @@ class Scheduler(object): project_directory (str): Folder path for the project: the input file path or ARC/Projects/project-name. composite_method (str, optional): A composite method to use. conformer_level (Union[str, dict], optional): The level of theory to use for conformer comparisons. + conf_generation_level (Union[str, dict], optional): The level of theory to use for conformer generation. opt_level (Union[str, dict], optional): The level of theory to use for geometry optimizations. freq_level (Union[str, dict], optional): The level of theory to use for frequency calculations. sp_level (Union[str, dict], optional): The level of theory to use for single point energy calculations. @@ -201,6 +202,7 @@ class Scheduler(object): Allowed values are He, Ne, Ar, Kr, H2, N2, O2. composite_method (str): A composite method to use. conformer_level (dict): The level of theory to use for conformer comparisons. + conf_generation_level (dict): The level of theory to use for conformer generation. opt_level (dict): The level of theory to use for geometry optimizations. freq_level (dict): The level of theory to use for frequency calculations. sp_level (dict): The level of theory to use for single point energy calculations. @@ -229,6 +231,7 @@ def __init__(self, project_directory: str, composite_method: Optional[Level] = None, conformer_level: Optional[Level] = None, + conf_generation_level: Optional[Level] = None, opt_level: Optional[Level] = None, freq_level: Optional[Level] = None, sp_level: Optional[Level] = None, @@ -308,6 +311,7 @@ def __init__(self, self.servers = list() self.composite_method = composite_method self.conformer_level = conformer_level + self.conf_generation_level = conf_generation_level self.ts_guess_level = ts_guess_level self.opt_level = opt_level self.freq_level = freq_level @@ -1098,7 +1102,10 @@ def run_conformer_jobs(self, labels: Optional[List[str]] = None): n_confs=n_confs, e_confs=self.e_confs, plot_path=os.path.join(self.project_directory, 'output', 'Species', - label, 'geometry', 'conformers')) + label, 'geometry', 'conformers'), + conf_generation_level=self.conf_generation_level if self.conf_generation_level is not None else None, + conf_path=os.path.join(self.project_directory, 'calcs', 'Species', f'{label}_multi'), + ) self.process_conformers(label) # TSs: elif self.species_dict[label].is_ts \ From 8d5ace0e66bf947f7b1111d50ac463bba01b5747 Mon Sep 17 00:00:00 2001 From: Jintao Date: Fri, 19 Jan 2024 12:40:08 +0800 Subject: [PATCH 03/11] generate_conformer in species.py considers solvent --- arc/species/species.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arc/species/species.py b/arc/species/species.py index 2ae0968a10..727aab738f 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -1059,6 +1059,8 @@ def generate_conformers(self, n_confs: int = 10, e_confs: float = 5, plot_path: str = None, + conf_generation_level: dict = None, + conf_path: str = None, ) -> None: """ Generate conformers. @@ -1070,6 +1072,8 @@ def generate_conformers(self, (unique) generated conformers will be stored in the .conformers attribute. plot_path (str, optional): A folder path in which the plot will be saved. If None, the plot will not be shown (nor saved). + conf_generation_level (dict, optional): The level of theory to use for conformer generation. + conf_path (str, optional): A folder path in which the conformers will be saved. """ if self.is_ts: return @@ -1095,6 +1099,8 @@ def generate_conformers(self, return_all_conformers=False, plot_path=plot_path, diastereomers=diastereomers, + conf_generation_level=conf_generation_level, + conf_path=conf_path, ) if len(lowest_confs): self.conformers.extend([conf['xyz'] for conf in lowest_confs]) From d7ed9374ac853aef0339aeb1e4304302a81d6e91 Mon Sep 17 00:00:00 2001 From: Jintao Date: Mon, 4 Mar 2024 14:08:26 +0200 Subject: [PATCH 04/11] get_force_field_energies_solvation in conformers.py The new added function get_force_field_energies_solvation is created to find conformers considering solvation effect. It opens a subprocess to run multi_species opt job with solvent over the conformers from the RDkit conformers (without solvent) --- arc/species/conformers.py | 90 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 3 deletions(-) diff --git a/arc/species/conformers.py b/arc/species/conformers.py index d427435bf1..8b163914cc 100644 --- a/arc/species/conformers.py +++ b/arc/species/conformers.py @@ -38,6 +38,7 @@ import copy import logging +import os import sys import time from itertools import product @@ -53,16 +54,21 @@ from rmgpy.molecule.molecule import Atom, Bond, Molecule from rmgpy.molecule.element import C as C_ELEMENT, H as H_ELEMENT, F as F_ELEMENT, Cl as Cl_ELEMENT, I as I_ELEMENT -from arc.common import (convert_list_index_0_to_1, +from arc.common import (ARC_PATH, + convert_list_index_0_to_1, determine_top_group_indices, get_single_bond_length, generate_resonance_structures, logger, + save_yaml_file, + read_yaml_file, ) from arc.exceptions import ConformerError, InputError +from arc.job.local import execute_command import arc.plotter +from arc.level import Level from arc.species import converter, vectors - +from arc.parser import parse_xyz_from_file # The number of conformers to generate per range of heavy atoms in the molecule # (will be increased if there are chiral centers) @@ -164,6 +170,8 @@ def generate_conformers(mol_list: Union[List[Molecule], Molecule], return_all_conformers=False, plot_path=None, print_logs=True, + conf_generation_level=None, + conf_path=None, ) -> Optional[Union[list, Tuple[list, list]]]: """ Generate conformers for (non-TS) species starting from a list of RMG Molecules. @@ -195,6 +203,8 @@ def generate_conformers(mol_list: Union[List[Molecule], Molecule], If None, the plot will not be shown (nor saved). print_logs (bool, optional): Whether define a logger so logs are also printed to stdout. Useful when run outside of ARC. True to print. + conf_generation_level (str, optional): The level of conformer generation to perform. + conf_path (str, optional): A folder path in which the conformers will be saved. Raises: ConformerError: If something goes wrong. @@ -267,7 +277,7 @@ def generate_conformers(mol_list: Union[List[Molecule], Molecule], torsions, tops = determine_rotors(mol_list) conformers = generate_force_field_conformers( mol_list=mol_list, label=label, xyzs=xyzs, torsion_num=len(torsions), charge=charge, multiplicity=multiplicity, - num_confs=num_confs_to_generate, force_field=force_field) + num_confs=num_confs_to_generate, force_field=force_field, conf_generation_level=conf_generation_level, conf_path=conf_path) lowest_confs = list() if len(conformers): @@ -630,6 +640,8 @@ def generate_force_field_conformers(label, xyzs=None, num_confs=None, force_field='MMFF94s', + conf_generation_level=None, + conf_path=None, ) -> List[dict]: """ Generate conformers using RDKit and OpenBabel and optimize them using a force field @@ -644,6 +656,8 @@ def generate_force_field_conformers(label, multiplicity (int): The species spin multiplicity. num_confs (int, optional): The number of conformers to generate. force_field (str, optional): The type of force field to use. + conf_generation_level (str, optional): The level of conformer generation to perform. + conf_path (str, optional): A folder path in which the conformers will be saved. Raises: ConformerError: If xyzs is given and it is not a list, or its entries are not strings. @@ -673,6 +687,12 @@ def generate_force_field_conformers(label, except ValueError as e: logger.warning(f'Could not generate conformers for {label}, failed with: {e}') if ff_xyzs: + if conf_generation_level is not None and conf_generation_level.solvent is not None: + ff_xyzs, ff_energies = get_force_field_energies_solvation(label, + ff_xyzs, + conf_generation_level=conf_generation_level, + conf_path=conf_path, + ) for xyz, energy in zip(ff_xyzs, ff_energies): conformers.append({'xyz': xyz, 'index': len(conformers), @@ -1153,6 +1173,70 @@ def get_force_field_energies(label: str, return xyzs, energies +def get_force_field_energies_solvation(label: str, + ff_xyzs: list, + conf_generation_level: Level, + conf_path: str, + ) -> Tuple[list, list]: + """ + Determine force field energies with solvation effect using Gaussian. + + Args: + label (str): The species' label. + ff_xyzs (list): Entries are xyz coordinates in dict format. + conf_generation_level (dict): The level of solvation to use. + conf_path (str): A folder path in which the conformers will be saved. + + Returns: + Tuple[list, list]: + - Entries are xyz coordinates, each in a dict format. + - Entries are the FF energies (in kJ/mol). + """ + ARC_child_path = conf_path + content = dict() + content['project'] = f'conf_gen_multi_{label}' + content['opt_level'] = {'method': conf_generation_level.method, + 'solvation_method': conf_generation_level.solvation_method or 'SMD', + 'solvent': conf_generation_level.solvent, + } + content['report_e_elect'] = True + content['job_types'] = {'opt': True, + 'sp': False, + 'fine': False, + 'freq': False, + 'rotors': False, + 'conformers': False, + } + content['n_confs'] = 1 + content['compute_thermo'] = False + content['species'] = list() + for i, xyz in enumerate(ff_xyzs): + spc_dict = dict() + spc_dict['label'] = f'{label}_multi_{i}' + spc_dict['xyz'] = xyz + spc_dict['multi_species'] = f'{label}_multi' + content['species'].append(spc_dict) + save_yaml_file(path=os.path.join(ARC_child_path, 'input.yml'), content=content) + commands = ['source ~/.bashrc', 'conda activate arc_env', f'python {ARC_PATH}/ARC.py {ARC_child_path}/input.yml'] + command = '; '.join(commands) + stdout, stderr = execute_command(command=command, shell=True, executable='/bin/bash') + with open(os.path.join(ARC_child_path, 'stdout.txt'), 'w') as f: + stdout_str = '\n'.join(str(x) for x in stdout) + f.write(stdout_str) + with open(os.path.join(ARC_child_path, 'stderr.txt'), 'w') as f: + stderr_str = '\n'.join(str(x) for x in stderr) + f.write(stderr_str) + if len(stderr) > 0 or len(stdout) == 0: + logger.warning(f'Got the following error when trying to submit job:\n{stderr}.') + xyzs = list() + energies = list() + energy_dict = read_yaml_file(os.path.join(ARC_child_path, 'output', 'e_elect_summary.yml')) + for i in range(len(ff_xyzs)): + xyzs.append(parse_xyz_from_file(os.path.join(ARC_child_path, 'output', 'Species', f'{label}_multi_{i}', 'geometry', f'{label}_multi_{i}.xyz'))) + energies.append(energy_dict[f'{label}_multi_{i}']) + return xyzs, energies + + def openbabel_force_field_on_rdkit_conformers(label, rd_mol, force_field='MMFF94s', optimize=True): """ Optimize RDKit conformers by OpenBabel using a force field (MMFF94 or MMFF94s are recommended). From 9373102b9c54624541d9cc4e6102be2431280e0b Mon Sep 17 00:00:00 2001 From: Jintao Date: Mon, 22 Jan 2024 15:01:12 +0800 Subject: [PATCH 05/11] settings.py include default conf_generation level of theory --- arc/settings/settings.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arc/settings/settings.py b/arc/settings/settings.py index 5d5d86d5fe..20bea287ac 100644 --- a/arc/settings/settings.py +++ b/arc/settings/settings.py @@ -175,6 +175,9 @@ } default_levels_of_theory = {'conformer': 'wb97xd/def2svp', # it's recommended to choose a method with dispersion + 'conf_generation': {'method': 'UFF', + 'solvation_method': 'SMD', + 'solvent': 'water'}, # good default for Gaussian 'ts_guesses': 'wb97xd/def2svp', 'opt': 'wb97xd/def2tzvp', # good default for Gaussian # 'opt': 'wb97m-v/def2tzvp', # good default for QChem From 390855bc84d32b94d477e19cc7c3da0ac42933d4 Mon Sep 17 00:00:00 2001 From: Jintao Date: Mon, 4 Mar 2024 14:08:32 +0200 Subject: [PATCH 06/11] ARC.set_levels_of_theory in main.py considers conf_generation_level --- arc/main.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arc/main.py b/arc/main.py index 22cdefc3be..5663b2a98e 100644 --- a/arc/main.py +++ b/arc/main.py @@ -970,6 +970,11 @@ def set_levels_of_theory(self): self.conformer_level = Level(repr=self.conformer_level) logger.info(f'Conformers:{default_flag} {self.conformer_level}') + if self.conf_generation_level is not None: + default_flag = '' + self.conf_generation_level = Level(repr=self.conf_generation_level) + logger.info(f'Conformers_generation:{default_flag} {self.conf_generation_level}') + if self.reactions or any([spc.is_ts for spc in self.species]): if not self.ts_guess_level: self.ts_guess_level = default_levels_of_theory['ts_guesses'] From b7939ec68afaa89a67c862836d49d64925399f3c Mon Sep 17 00:00:00 2001 From: Jintao Date: Mon, 4 Mar 2024 14:08:41 +0200 Subject: [PATCH 07/11] Prevent update of running_job list to empty one if it exists --- arc/scheduler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arc/scheduler.py b/arc/scheduler.py index 164579d8c0..d299a00be4 100644 --- a/arc/scheduler.py +++ b/arc/scheduler.py @@ -428,8 +428,9 @@ def __init__(self, if not self.job_types['conformers'] and len(species.conformers) == 1: # conformers weren't asked for, assign initial_xyz species.initial_xyz = species.conformers[0] - if species.label not in self.running_jobs: - self.running_jobs[species.label if not species.multi_species else species.multi_species] = list() + label = species.label if not species.multi_species else species.multi_species + if label not in self.running_jobs.keys(): + self.running_jobs[label] = list() if self.output[species.label]['convergence']: continue if species.is_monoatomic(): From 68ee6f09693b595a718d94756766af0458a6aebc Mon Sep 17 00:00:00 2001 From: Jintao Date: Sun, 25 Feb 2024 12:06:42 +0800 Subject: [PATCH 08/11] New conditions to decide which jobs to run for given xyzs --- arc/scheduler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arc/scheduler.py b/arc/scheduler.py index d299a00be4..e6240e7eab 100644 --- a/arc/scheduler.py +++ b/arc/scheduler.py @@ -568,9 +568,9 @@ def schedule_jobs(self): self.determine_most_stable_conformer(label) # also checks isomorphism if self.species_dict[label].initial_xyz is not None: # if initial_xyz is None, then we're probably troubleshooting conformers, don't opt - if not self.composite_method: + if not self.composite_method and (self.job_types['opt'] or self.job_types['fine']): self.run_opt_job(label, fine=self.fine_only) - else: + elif self.composite_method and self.job_types['composite']: self.run_composite_job(label) self.timer = False break From f9997651d37e55360d1dd61989f2731bfb2a1f21 Mon Sep 17 00:00:00 2001 From: Jintao Date: Thu, 7 Mar 2024 09:12:22 +0200 Subject: [PATCH 09/11] Write conformer yaml file including xyz and energy Add log information for conformer with solvation spawned from the subprocess --- arc/species/conformers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arc/species/conformers.py b/arc/species/conformers.py index 8b163914cc..0a8efc2b4e 100644 --- a/arc/species/conformers.py +++ b/arc/species/conformers.py @@ -1230,10 +1230,14 @@ def get_force_field_energies_solvation(label: str, logger.warning(f'Got the following error when trying to submit job:\n{stderr}.') xyzs = list() energies = list() + content = dict() energy_dict = read_yaml_file(os.path.join(ARC_child_path, 'output', 'e_elect_summary.yml')) for i in range(len(ff_xyzs)): xyzs.append(parse_xyz_from_file(os.path.join(ARC_child_path, 'output', 'Species', f'{label}_multi_{i}', 'geometry', f'{label}_multi_{i}.xyz'))) energies.append(energy_dict[f'{label}_multi_{i}']) + content[f'{label}_multi_{i}'] = {'xyz': xyzs[-1], 'energy': energies[-1]} + save_yaml_file(path=os.path.join(ARC_child_path, 'output', 'energy_geometry_summary.yml'), content=content) + logger.info(f'{label} conformer with solvation effect are spawned from a subprocess.') return xyzs, energies From eba0dff8edc946115051db41e75e49d252bdd20b Mon Sep 17 00:00:00 2001 From: Jintao Date: Thu, 28 Mar 2024 10:01:19 +0200 Subject: [PATCH 10/11] Rewrite the log info for multi_species consideration process --- arc/main.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arc/main.py b/arc/main.py index 5663b2a98e..e8d7ae2557 100644 --- a/arc/main.py +++ b/arc/main.py @@ -575,15 +575,21 @@ def execute(self) -> dict: Status dictionary indicating which species converged successfully. """ logger.info('\n') + considered_list = list() for species in self.species: if not isinstance(species, ARCSpecies): raise ValueError(f'All species must be ARCSpecies objects. Got {type(species)}') if species.is_ts: logger.info(f'Considering transition state: {species.label}') else: - logger.info(f'Considering species: {species.label}') - if species.mol is not None: - display(species.mol.copy(deep=True)) + if not species.multi_species: + logger.info(f'Considering species: {species.label}') + if species.mol is not None: + display(species.mol.copy(deep=True)) + elif species.multi_species not in considered_list: + logger.info(f'Considering species: {species.multi_species}') + considered_list.append(species.multi_species) + logger.info('\n') for rxn in self.reactions: if not isinstance(rxn, ARCReaction): From efab85091d59f635576b4c1f3f3600af48aa61fe Mon Sep 17 00:00:00 2001 From: Jintao Date: Thu, 28 Mar 2024 10:03:20 +0200 Subject: [PATCH 11/11] Divide the multi_species cluster into smaller amount This allows us to launch more opt jobs with smaller amount of conformers individually and parallelly, to speed up the process. --- arc/species/conformers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arc/species/conformers.py b/arc/species/conformers.py index 0a8efc2b4e..8b787aee59 100644 --- a/arc/species/conformers.py +++ b/arc/species/conformers.py @@ -1210,11 +1210,12 @@ def get_force_field_energies_solvation(label: str, content['n_confs'] = 1 content['compute_thermo'] = False content['species'] = list() + species_per_job = 500 for i, xyz in enumerate(ff_xyzs): spc_dict = dict() spc_dict['label'] = f'{label}_multi_{i}' spc_dict['xyz'] = xyz - spc_dict['multi_species'] = f'{label}_multi' + spc_dict['multi_species'] = f'{label}_multi_cluster_{i//species_per_job}' content['species'].append(spc_dict) save_yaml_file(path=os.path.join(ARC_child_path, 'input.yml'), content=content) commands = ['source ~/.bashrc', 'conda activate arc_env', f'python {ARC_PATH}/ARC.py {ARC_child_path}/input.yml']