diff --git a/.github/workflows/cont_int.yml b/.github/workflows/cont_int.yml index 20d57c6f82..31ce9c9f0a 100644 --- a/.github/workflows/cont_int.yml +++ b/.github/workflows/cont_int.yml @@ -144,7 +144,7 @@ jobs: key: conda-${{ runner.os }}--${{ runner.arch }}-arcenv-${{ env.CACHE_NUMBER}} env: # Increase this value to reset cache if etc/example-environment.yml has not changed - CACHE_NUMBER: 0 + CACHE_NUMBER: 1 id: cache-arc-env - name: Update environment run: mamba env update -n arc_env -f environment.yml diff --git a/.gitignore b/.gitignore index da67c571cc..4806aff914 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ scratch/ # csv database *.csv +!basis_sets.csv # iPython files *.ipynb_* @@ -52,6 +53,10 @@ timer.dat # .vscode .vscode +# files created via testing +nul +run.out + # .trunk folder .trunk diff --git a/arc/job/adapter.py b/arc/job/adapter.py index 29d83fc97c..48b3330947 100644 --- a/arc/job/adapter.py +++ b/arc/job/adapter.py @@ -772,10 +772,13 @@ def set_cpu_and_mem(self): f'exceeds {100 * job_max_server_node_memory_allocation}% of the the maximum node memory on ' f'{self.server}. Setting it to {job_max_server_node_memory_allocation * max_mem:.2f} GB.') self.job_memory_gb = job_max_server_node_memory_allocation * max_mem - total_submit_script_memory = self.job_memory_gb * 1024 * 1.05 # MB + total_submit_script_memory = self.job_memory_gb * 1024 * 1.05 if (self.job_memory_gb * 1024 * 1.05) <= (max_mem * 1024) else max_mem * 1024 # MB self.job_status[1]['keywords'].append('max_total_job_memory') # Useful info when troubleshooting. else: - total_submit_script_memory = self.job_memory_gb * 1024 * 1.1 # MB + if max_mem is None: + total_submit_script_memory = self.job_memory_gb * 1024 * 1.1 + else: + total_submit_script_memory = self.job_memory_gb * 1024 * 1.1 if (self.job_memory_gb * 1024 * 1.1) <= (max_mem * 1024) else max_mem * 1024 # MB # Determine amount of memory in submit script based on cluster job scheduling system. cluster_software = servers[self.server].get('cluster_soft').lower() if self.server is not None else None if cluster_software in ['oge', 'sge', 'htcondor']: @@ -785,8 +788,8 @@ def set_cpu_and_mem(self): # In PBS, "#PBS -l select=1:ncpus=8:mem=12000000" specifies the memory for all cores to be 12 MB. self.submit_script_memory = math.ceil(total_submit_script_memory) * 1E6 # in Bytes elif cluster_software in ['slurm']: - # In Slurm, "#SBATCH --mem-per-cpu=2000" specifies the memory **per cpu/thread** to be 2000 MB. - self.submit_script_memory = math.ceil(total_submit_script_memory / self.cpu_cores) # in MB + # In Slurm, "#SBATCH --mem=2000" specifies the memory to be 2000 MB. + self.submit_script_memory = math.ceil(total_submit_script_memory) # in MB self.set_input_file_memory() def as_dict(self) -> dict: @@ -942,18 +945,25 @@ def _get_additional_job_info(self): if cluster_soft in ['oge', 'sge', 'slurm', 'pbs', 'htcondor']: local_file_path_1 = os.path.join(self.local_path, 'out.txt') local_file_path_2 = os.path.join(self.local_path, 'err.txt') - local_file_path_3 = os.path.join(self.local_path, 'job.log') - if self.server != 'local' and self.remote_path is not None and not self.testing: + local_file_path_3 = None + for files in self.files_to_upload: + if 'job.sh' in files.values(): + local_file_path_3 = os.path.join(self.local_path, 'job.log') + if self.server != 'local' and self.remote_path is not None: remote_file_path_1 = os.path.join(self.remote_path, 'out.txt') remote_file_path_2 = os.path.join(self.remote_path, 'err.txt') - remote_file_path_3 = os.path.join(self.remote_path, 'job.log') + remote_file_path_3 = None + for files in self.files_to_upload: + if 'job.sh' in files.values(): + remote_file_path_3 = os.path.join(self.remote_path, 'job.log') with SSHClient(self.server) as ssh: - for local_file_path, remote_file_path in zip([local_file_path_1, - local_file_path_2, - local_file_path_3], - [remote_file_path_1, - remote_file_path_2, - remote_file_path_3]): + + local_files_to_zip = [local_file_path_1, local_file_path_2] + remote_files_to_zip = [remote_file_path_1, remote_file_path_2] + if local_file_path_3 and remote_file_path_3: + local_files_to_zip.append(local_file_path_3) + remote_files_to_zip.append(remote_file_path_3) + for local_file_path, remote_file_path in zip(local_files_to_zip, remote_files_to_zip): try: ssh.download_file(remote_file_path=remote_file_path, local_file_path=local_file_path) @@ -963,10 +973,21 @@ def _get_additional_job_info(self): f'flags with stdout and stderr of out.txt and err.txt, respectively ' f'(e.g., "#SBATCH -o out.txt"). Error message:') logger.warning(e) - for local_file_path in [local_file_path_1, local_file_path_2, local_file_path_3]: + for local_file_path in [path for path in [local_file_path_1, local_file_path_2, local_file_path_3] if path]: if os.path.isfile(local_file_path): - with open(local_file_path, 'r') as f: - lines = f.readlines() + with open(local_file_path, 'rb') as f: + # Read the file + first_bytes = f.read() + # Check if the bytes contain a null byte + has_null_byte = b'\x00' in first_bytes + # Use the appropriate mode based on whether the file is binary or not + mode = 'rb' if has_null_byte else 'r' + # Read the file contents using the determined mode + lines = first_bytes.decode('utf-8') + if mode == 'r': + with open(local_file_path, 'r') as f: + lines = f.readlines() + content += ''.join([line for line in lines]) content += '\n' else: @@ -1346,6 +1367,14 @@ def troubleshoot_server(self): if run_job: # resubmit job self.execute() + + def remove_remote_files(self): + """ + Remove the remote files. + """ + if (self.server != 'local' and self.server is not None): + with SSHClient(self.server) as ssh: + ssh.remove_dir(self.remote_path) def troubleshoot_queue(self) -> bool: """Troubleshoot queue errors. diff --git a/arc/job/adapter_test.py b/arc/job/adapter_test.py index a45ea74f53..e6fc82564f 100644 --- a/arc/job/adapter_test.py +++ b/arc/job/adapter_test.py @@ -354,7 +354,7 @@ def test_set_cpu_and_mem(self): self.job_4.cpu_cores = None self.job_4.set_cpu_and_mem() self.assertEqual(self.job_4.cpu_cores, 8) - expected_memory = math.ceil((14 * 1024 * 1.1) / self.job_4.cpu_cores) + expected_memory = math.ceil((14 * 1024 * 1.1)) self.assertEqual(self.job_4.submit_script_memory, expected_memory) self.job_4.server = 'local' diff --git a/arc/job/adapters/molpro.py b/arc/job/adapters/molpro.py index 176570aa94..f6714196fc 100644 --- a/arc/job/adapters/molpro.py +++ b/arc/job/adapters/molpro.py @@ -9,6 +9,7 @@ import os from typing import TYPE_CHECKING, List, Optional, Tuple, Union import socket +import re from mako.template import Template @@ -156,6 +157,7 @@ def __init__(self, self.execution_type = execution_type or 'queue' self.command = 'molpro' self.url = 'https://www.molpro.net/' + self.core_change = None if species is None: raise ValueError('Cannot execute Molpro without an ARCSpecies object.') @@ -326,7 +328,7 @@ def set_input_file_memory(self) -> None: Set the input_file_memory attribute. """ # Molpro's memory is per cpu core and in MW (mega word; 1000 MW = 7.45 GB on a 64-bit machine) - # The conversion from mW to GB was done using this (https://deviceanalytics.com/words-to-bytes-converter/) + # The conversion from mW to GB was done using this (c) # specifying a 64-bit architecture. # # See also: @@ -335,8 +337,37 @@ def set_input_file_memory(self) -> None: # 800,000,000 bytes (800 mb). # Formula - (100,000,000 [Words]/( 800,000,000 [Bytes] / (job mem in gb * 1000,000,000 [Bytes])))/ 1000,000 [Words -> MegaWords] # The division by 1E6 is for converting into MWords - # Due to Zeus's configuration, there is only 1 nproc so the memory should not be divided by cpu_cores. + # Due to Zeus's configuration, there is only 1 nproc so the memory should not be divided by cpu_cores. self.input_file_memory = math.ceil(self.job_memory_gb / (7.45e-3 * self.cpu_cores)) if 'zeus' not in socket.gethostname() else math.ceil(self.job_memory_gb / (7.45e-3)) + # We need to check if ess_trsh_methods=['cpu'] and ess_trsh_methods=['molpro_memory:] exists + # If it does, we need to reduce the cpu_cores + if self.ess_trsh_methods is not None: + if 'cpu' in self.ess_trsh_methods and any('molpro_memory:' in method for method in self.ess_trsh_methods): + current_cpu_cores = self.cpu_cores + max_memory = self.job_memory_gb + memory_values = [] + for item in self.ess_trsh_methods: + if 'molpro_memory:' in item: + memory_value = item.split('molpro_memory:')[1] + memory_values.append(float(memory_value)) + + if memory_values: + min_memory_value = min(memory_values) + required_cores = math.floor(max_memory / (min_memory_value * 7.45e-3)) + if self.core_change is None: + self.core_change = required_cores + elif self.core_change == required_cores: + # We have already done this + # Reduce the cores by 1 + required_cores -= 1 + if required_cores < current_cpu_cores: + self.cpu_cores = required_cores + logger.info(f'Changing the number of cpu_cores from {current_cpu_cores} to {self.cpu_cores}') + self.input_file_memory = math.ceil(self.job_memory_gb / (7.45e-3 * self.cpu_cores)) if 'zeus' not in socket.gethostname() else math.ceil(self.job_memory_gb / (7.45e-3)) + + + + def execute_incore(self): """ diff --git a/arc/job/adapters/molpro_test.py b/arc/job/adapters/molpro_test.py index c73aeaa974..5690af8011 100644 --- a/arc/job/adapters/molpro_test.py +++ b/arc/job/adapters/molpro_test.py @@ -141,7 +141,7 @@ def test_set_files(self): 'source': 'path', 'make_x': False}, ] - job_1_files_to_download = [{'file_name': 'input.out', + job_1_files_to_download = [{'file_name':'output.out', 'local': os.path.join(self.job_1.local_path, output_filenames[self.job_1.job_adapter]), 'remote': os.path.join(self.job_1.remote_path, output_filenames[self.job_1.job_adapter]), 'source': 'path', diff --git a/arc/job/adapters/qchem.py b/arc/job/adapters/qchem.py index 1e1cfca19e..d7af433c78 100644 --- a/arc/job/adapters/qchem.py +++ b/arc/job/adapters/qchem.py @@ -8,9 +8,10 @@ import os from typing import TYPE_CHECKING, List, Optional, Tuple, Union +import pandas as pd from mako.template import Template -from arc.common import get_logger, torsions_to_scans +from arc.common import get_logger, torsions_to_scans, ARC_PATH from arc.imports import incore_commands, settings from arc.job.adapter import JobAdapter from arc.job.adapters.common import (_initialize_adapter, @@ -24,6 +25,8 @@ from arc.species.converter import xyz_to_str from arc.species.vectors import calculate_dihedral_angle +from rapidfuzz import process, utils + if TYPE_CHECKING: from arc.reaction import ARCReaction from arc.species import ARCSpecies @@ -37,7 +40,7 @@ settings['output_filenames'], settings['servers'], settings['submit_filenames'] -# job_type_1: 'opt', 'ts', 'sp', 'freq'. +# job_type_1: 'opt', 'ts', 'sp', 'freq','irc'. # job_type_2: reserved for 'optfreq'. # fine: '\n GEOM_OPT_TOL_GRADIENT 15\n GEOM_OPT_TOL_DISPLACEMENT 60\n GEOM_OPT_TOL_ENERGY 5\n XC_GRID SG-3' # unrestricted: 'False' or 'True' for restricted / unrestricted @@ -49,7 +52,8 @@ JOBTYPE ${job_type_1} METHOD ${method} UNRESTRICTED ${unrestricted} - BASIS ${basis}${fine}${keywords}${constraint}${scan_trsh}${block} + BASIS ${basis}${fine}${keywords}${constraint}${scan_trsh}${trsh}${block}${irc} + IQMOL_FCHK FALSE $end ${job_type_2} ${scan} @@ -211,20 +215,31 @@ def write_input_file(self) -> None: 'block', 'scan', 'constraint', + 'irc' ]: input_dict[key] = '' - input_dict['basis'] = self.level.basis or '' + input_dict['basis'] = self.software_input_matching(basis = self.level.basis) if self.level.basis else '' input_dict['charge'] = self.charge input_dict['method'] = self.level.method + # If method ends with D3, then we need to remove it and add the D3 as a keyword. Need to account for -D3 + if input_dict['method'].endswith('d3') or input_dict['method'].endswith('-d3'): + input_dict['method'] = input_dict['method'][:-2] + # Remove the - if it exists + if input_dict['method'].endswith('-'): + input_dict['method'] = input_dict['method'][:-1] + # DFT_D - FALSE, EMPIRICAL_GRIMME, EMPIRICAL_CHG, D3_ZERO, D3_BJ, D3_CSO, D3_ZEROM, D3_BJM, D3_OP,D3 [Default: None] + # TODO: Add support for other D3 options. Check if the user has specified a D3 option in the level of theory + input_dict['keywords'] = "\n DFT_D D3" input_dict['multiplicity'] = self.multiplicity input_dict['scan_trsh'] = self.args['trsh']['scan_trsh'] if 'scan_trsh' in self.args['trsh'].keys() else '' + input_dict['trsh'] = self.args['trsh']['trsh'] if 'trsh' in self.args['trsh'].keys() else '' input_dict['xyz'] = xyz_to_str(self.xyz) # In QChem the attribute is called "unrestricted", so the logic is in reverse than in other adapters - input_dict['unrestricted'] = 'True' if not is_restricted(self) else 'False' + input_dict['unrestricted'] = 'TRUE' if not is_restricted(self) else 'FALSE' # Job type specific options - if self.job_type in ['opt', 'conformers', 'optfreq', 'orbitals', 'scan']: + if self.job_type in ['opt', 'conformers', 'optfreq', 'orbitals']: input_dict['job_type_1'] = 'ts' if self.is_ts else 'opt' if self.fine: input_dict['fine'] = '\n GEOM_OPT_TOL_GRADIENT 15' \ @@ -234,7 +249,7 @@ def write_input_file(self) -> None: # Use a fine DFT grid, see 4.4.5.2 Standard Quadrature Grids, in # http://www.q-chem.com/qchem-website/manual/qchem50_manual/sect-DFT.html input_dict['fine'] += '\n XC_GRID 3' - + elif self.job_type == 'freq': input_dict['job_type_1'] = 'freq' @@ -257,10 +272,11 @@ def write_input_file(self) -> None: f"\n$end\n" elif self.job_type == 'scan': + input_dict['job_type_1'] = 'pes_scan' + if self.fine: + input_dict['fine'] += '\n XC_GRID 3' scans = list() - if self.rotor_index is not None: - if self.species[0].rotors_dict \ - and self.species[0].rotors_dict[self.rotor_index]['directed_scan_type'] == 'ess': + if self.rotor_index is not None and self.species[0].rotors_dict: scans = self.species[0].rotors_dict[self.rotor_index]['scan'] scans = [scans] if not isinstance(scans[0], list) else scans elif self.torsions is not None and len(self.torsions): @@ -269,16 +285,81 @@ def write_input_file(self) -> None: for scan in scans: dihedral_1 = int(calculate_dihedral_angle(coords=self.xyz, torsion=scan, index=1)) scan_atoms_str = ' '.join([str(atom_index) for atom_index in scan]) - scan_string += f'tors {scan_atoms_str} {dihedral_1} {dihedral_1 + 360.0} {self.scan_res}\n' - scan_string += '$end\n' + + # QChem requires that the scanning angle is between -180 and 180 + # Therefore, to ensure that the scan is symmetric, we will need to create multi-job input file + # For example, if the scan is starting at 48 degrees and has a resolution of 8 degrees, then the input file will be: + + # $molecule + # molecule info + # $end + # $rem + # input_dict['block'] + # $end + # $scan + # tors scan_atoms_str 48 176 8 + # $end + # @@@ + # $molecule + # read + # $end + # $rem + # input_dict['block'] + # SCF_GUESS read + # $end + # $scan + # tors scan_atoms_str -176 40 8 + # $end + + + + scan_start, scan_end= self.generate_scan_angles(dihedral_1, self.scan_res) + scan_string += f'tors {scan_atoms_str} {scan_start} {scan_end} {self.scan_res}\n' + scan_string += '$end\n' if self.torsions is None or not len(self.torsions): self.torsions = torsions_to_scans(scans, direction=-1) + input_dict['scan'] = scan_string elif self.job_type == 'irc': if self.fine: - # Note that the Acc2E argument is not available in Gaussian03 - input_dict['fine'] = 'scf=(direct) integral=(grid=ultrafine, Acc2E=12)' - input_dict['job_type_1'] = f'irc=(CalcAll, {self.irc_direction}, maxpoints=50, stepsize=7)' + # Need to ensure that the grid is fine enough for the IRC + input_dict['fine'] += '\n XC_GRID 3' + # input_dict['job_type_1'] = 'rpath' + # # IRC variabls are + # # RPATH_COORDS - 0 for mass-weighted[Default], 1 for cartesian, 2 for z-matrix + # # RPATH_DIRECTION - 1 for Descend in the positive direction of the eigen mode. [Default], -1 for Ascend in the negative direction of the eigen mode. + # # RPATH_MAX_CYCLES - Maximum number of cycles to perform. [Default: 20] + # # RPATH_MAX_STEPSIZE - Specifies the maximum step size to be taken (in 0.001 a.u.). [Default: 150 -> 0.15 a.u.] + # # RPATH_TOL_DISPLACEMENT - Specifies the convergence threshold for the step. + # # If a step size is chosen by the algorithm that is smaller than this, the path is deemed to have reached the minimum. [Default: 5000 -> 0.005 a.u.] + # # RPATH_PRINT - Specifies the print level [Default: 2] Higher values give less output. + # if self.irc_direction == 'forward': + # irc_direction_value = 1 + # elif self.irc_direction == 'reverse': + # irc_direction_value = -1 + # input_dict['irc'] = "\n RPATH_COORDS 1" \ + # f"\n RPATH_DIRECTION {irc_direction_value}" \ + # "\n RPATH_MAX_CYCLES 20" \ + # "\n RPATH_MAX_STEPSIZE 150" \ + # "\n RPATH_TOL_DISPLACEMENT 5000" \ + # "\n RPATH_PRINT 2"\ + # f"\n SCF_GUESS read" \ + input_dict['job_type_1'] = 'freq' + if self.irc_direction == 'forward': + irc_direction_value = 1 + elif self.irc_direction == 'reverse': + irc_direction_value = -1 + input_dict['job_type_2'] = f"\n\n@@@\n$molecule\nread\n$end\n$rem" \ + f"\n JOBTYPE rpath" \ + f"\n BASIS {input_dict['basis']}" \ + f"\n METHOD {input_dict['method']}" \ + f"\n RPATH_DIRECTION {irc_direction_value}" \ + "\n RPATH_MAX_CYCLES 20" \ + "\n RPATH_MAX_STEPSIZE 150" \ + "\n RPATH_TOL_DISPLACEMENT 5000" \ + "\n RPATH_PRINT 2"\ + f"\n SCF_GUESS read" \ + f"\n$end\n" if self.constraints: input_dict['constraint'] = '\n CONSTRAINT\n' @@ -287,11 +368,130 @@ def write_input_file(self) -> None: constraint_atom_indices = ' '.join([str(atom_index) for atom_index in constraint_tuple[0]]) input_dict['constraint'] = f" {constraint_type} {constraint_atom_indices} {constraint_tuple[1]:.2f}" input_dict['constraint'] += ' ENDCONSTRAINT\n' + + if self.job_type == 'opt' or self.job_type == 'pes_scan' or self.job_type == 'freq' or self.job_type == 'ts': + # https://manual.q-chem.com/latest/Ch4.S5.SS2.html + # 5 For single point energy calculations (including BSSE and XSAPT jobs) + # 7 For job types NMR, STATPOLAR, DYNPOLAR, HYPERPOLAR, and ISSC + # 8 For most other job types, including geometry optimization, transition-state search, vibrational analysis, CIS/TDDFT calculations, correlated wavefunction methods, energy decomposition analysis (EDA2), etc. + # OPTIONS: + # n Corresponding to 10−n + # RECOMMENDATION: + # Tighter criteria for geometry optimization and vibration analysis. Larger values provide more significant figures, at greater computational cost. + SCF_CONVERGENCE = 8 + input_dict['keywords'] += f"\n SCF_CONVERGENCE {SCF_CONVERGENCE}" input_dict = update_input_dict_with_args(args=self.args, input_dict=input_dict) with open(os.path.join(self.local_path, input_filenames[self.job_adapter]), 'w') as f: f.write(Template(input_template).render(**input_dict)) + def generate_qchem_scan_angles(self,start_angle: int, step: int) -> (int, int, int, int): + """ + Generates the angles for a Q-Chem scan. The scan is split into two parts, one from start_angle to 180, and one from -180 to end_angle. + + Parameters + ---------- + start_angle : int + The starting angle for the scan + step : int + The step size for the scan + + Returns + ------- + scan1_start : int + The starting angle for the first part of the scan + scan1_end : int + The ending angle for the first part of the scan + scan2_start : int + The starting angle for the second part of the scan + scan2_end : int + The ending angle for the second part of the scan + """ + + # First, we need to check that the start_angle is within the range of -180 to 180, and if not, convert it to be within that range + if start_angle > 180: + start_angle = start_angle - 360 + + + # This sets the end angle but does not take into account the limit of -180 to 180 + end_angle = start_angle - step + + # This function wraps the scan2_start within the range of -180 to 180 + wrap_within_range = lambda number, addition: (number + addition) % 360 - 360 if (number + addition) % 360 > 180 else (number + addition) % 360 + + # This function converts the angles to be within the range of -180 to 180 + convert_angle = lambda angle: angle % 360 if angle >= 0 else ( angle % 360 if angle <= -180 else (angle % 360) - 360) + + # This converts the angles to be within the range of -180 to 180 + start_angle = convert_angle(start_angle) + end_angle = convert_angle(end_angle) + + if start_angle == 0 and end_angle == 0: + scan1_start = start_angle + scan1_end = 180 + scan2_start = -180 + scan2_end = end_angle + elif start_angle == 180: + # This is a special case because the scan will be from 180 to 180 + # This is not allowed in Q-Chem so we split it into two scans + # Arguably this could be done in one scan but it is easier to do it this way + # We will need to find the starting angle that when added by the step size will be 180 + target_sum = 180 + quotient = target_sum // step + remainder = target_sum % step + starting_number = target_sum - (quotient * step) + scan1_start = starting_number + scan1_end = 180 + scan2_start = -180 + scan2_end = scan1_start - step + elif start_angle <= end_angle: + scan1_start = start_angle + scan1_end = start_angle + (step * ((180 - start_angle)//step)) + scan2_start = convert_angle(scan1_end) + scan2_end = end_angle + elif (start_angle + step) > 180: + # This is a special case because the scan will be from, for example, 178 to 178 for the first scan. Therefore, we should make it a single scan from end angle, 178, step size + scan1_end = start_angle + scan1_start = wrap_within_range(scan1_end, step) + scan2_start = 0 + scan2_end = 0 + else: + scan1_start = start_angle + scan1_end = start_angle + (step * ((180 - start_angle)//step)) + scan2_start = wrap_within_range(scan1_end, step) + scan2_end = end_angle + + if scan2_start == scan2_end: + scan2_start = 0 + scan2_end = 0 + + return int(scan1_start), int(scan1_end), int(scan2_start), int(scan2_end) + + def generate_scan_angles(self, req_angle: int, step: int) -> (int, int): + + # Convert the req angle if it is greater than 180 or less than -180 + if req_angle > 180: + req_angle = req_angle - 360 + + # This function converts the angles to be within the range of -180 to 180 + convert_angle = lambda angle: angle % 360 if angle >= 0 else ( angle % 360 if angle <= -180 else (angle % 360) - 360) + + req_angle = convert_angle(req_angle) + + start_angle = -180 + end_angle = 180 + + new_start_angle = req_angle + while new_start_angle - step >= start_angle: + new_start_angle -= step + + new_end_angle = req_angle + while new_end_angle + step <= end_angle: + new_end_angle += step + + return new_start_angle, new_end_angle + + def set_files(self) -> None: """ @@ -335,7 +535,7 @@ def set_files(self) -> None: self.files_to_download.append(self.get_file_property_dictionary( file_name=output_filenames[self.job_adapter])) # 2.3. checkfile - self.files_to_download.append(self.get_file_property_dictionary(file_name='orbitals.fchk')) + #self.files_to_download.append(self.get_file_property_dictionary(file_name='input.fchk')) def set_additional_file_paths(self) -> None: """ @@ -369,6 +569,52 @@ def execute_queue(self): Execute a job to the server's queue. """ self.legacy_queue_execution() + + def software_input_matching(self, basis): + """ + Check if the user specified software is compatible with the level of theory. If not, try to match the software with + similar methods. If no match is found, raise an error. + + Matching is done by comparing the user specified software with the software in the basis_sets.csv file. + + Raises: + ValueError: If the software is not compatible with the level of theory. + """ + + # Read DataFrame of software basis sets + software_methods = pd.read_csv(os.path.join(ARC_PATH, 'data', 'basis_sets.csv')) + + # First column is the software, second column is the basis set, third column is the description + # if the software set by the user is in the DataFrame, then we filter the DataFrame to only include that software + # and then we check if the basis set is in the DataFrame. If not, we attempt to fuzzywuzzy match the basis set + + #Matching pattern for basis set - not spelling errors + #pattern = r'[-_a-zA-Z]+' + + lowercase_software = [row.lower() for row in software_methods['software'].values] + if 'qchem' in lowercase_software: + software_methods = software_methods[software_methods['software'] == 'qchem'] + if basis in software_methods['basis_set'].values: + return basis + else: + # If hyphen exists, remove it and try to match again + basis_match = process.extract(basis, software_methods['basis_set'].values, processor=utils.default_process ,score_cutoff=99) + # ratio = fuzz.WRatio(basis, software_methods['basis_set'].values, regex=pattern) + if len(basis_match)>1: + raise ValueError(f"Cannot match basis in qchem: {basis} as there are too many matches. Please check the basis set.") + elif len(basis_match) == 0: + basis = basis.replace('-', '') + # Add a loop that puts a hyphen in different places in the basis set and tries to match again + # If it still doesn't match, then raise an error + for i in range(1, len(basis)): + basis_match = process.extract(basis[:i] + '-' + basis[i:], software_methods['basis_set'].values, processor=utils.default_process, score_cutoff=99) + if len(basis_match) == 1: + break + if len(basis_match) == 0: + raise ValueError(f"Unsupported basis in qchem: {basis}. Please check the basis set.") + logger.debug(f"Changing basis set from {basis} to {basis_match[0][0]} to match qchem") + basis = basis_match[0][0] + return basis register_job_adapter('qchem', QChemAdapter) diff --git a/arc/job/adapters/qchem_test.py b/arc/job/adapters/qchem_test.py new file mode 100644 index 0000000000..4a02996bc9 --- /dev/null +++ b/arc/job/adapters/qchem_test.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python3 +# encoding: utf-8 + +""" +This module contains unit tests of the arc.job.adapters.qchem module +""" + +import math +import os +import shutil +import unittest + +from arc.common import ARC_PATH +from arc.job.adapters.qchem import QChemAdapter +from arc.level import Level +from arc.settings.settings import input_filenames, output_filenames, servers, submit_filenames +from arc.species import ARCSpecies + + +class TestQChemAdapter(unittest.TestCase): + """ + Contains unit tests for the QChemAdapter class. + """ + @classmethod + def setUpClass(cls): + """ + A method that is run before all unit tests in this class. + """ + cls.maxDiff = None + cls.job_1 = QChemAdapter(execution_type='incore', + job_type='conformers', # Changed from 'composite' to 'conformers' - No equivalent in QChem AFAIK + level=Level(software='qchem', + method='b3lyp', + basis='def2-TZVP',), + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_QChemAdapter'), + species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'])], + testing=True, + ) + cls.job_2 = QChemAdapter(execution_type='queue', + job_type='opt', + level=Level(method='wb97x-d', + basis='def2-TZVP', + solvation_method='SMD', + solvent='Water'), + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_QChemAdapter'), + species=[ARCSpecies(label='spc1', xyz=['O 0 0 1']), + ARCSpecies(label='spc2', xyz=['O 0 0 2'])], + testing=True, + ) + cls.job_3 = QChemAdapter(execution_type='queue', + job_type='opt', + level=Level(method='wb97x-d', + basis='def2-TZVP', + solvation_method='SMD', + solvent='Water'), + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_QChemAdapter'), + species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'])], + testing=True, + ) + spc_4 = ARCSpecies(label='ethanol', xyz=["""C 1.1658210 -0.4043550 0.0000000 + C 0.0000000 0.5518050 0.0000000 + O -1.1894600 -0.2141940 0.0000000 + H -1.9412580 0.3751850 0.0000000 + H 2.1054020 0.1451160 0.0000000 + H 1.1306240 -1.0387850 0.8830320 + H 1.1306240 -1.0387850 -0.8830320 + H 0.0476820 1.1930570 0.8835910 + H 0.0476820 1.1930570 -0.8835910"""], + directed_rotors={'brute_force_sp': [[1, 2], [2, 3]]}) + spc_4.determine_rotors() # also calls initialize_directed_rotors() + cls.job_4 = QChemAdapter(execution_type='queue', + job_type='scan', + level=Level(method='wb97x-d', + basis='def2-TZVP'), + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_QChemAdapter'), + species=[spc_4], + rotor_index=0, + testing=True, + ) + cls.job_5 = QChemAdapter(execution_type='queue', + job_type='freq', + level=Level(method='wb97x-d', + basis='def2-TZVP'), + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_QChemAdapter'), + species=[ARCSpecies(label='birad singlet', + xyz=['O 0 0 1'], + multiplicity=1, + number_of_radicals=2)], + testing=True, + ) + cls.job_6 = QChemAdapter(execution_type='queue', + job_type='optfreq', + level=Level(method='wb97x-d', + basis='def2-TZVP'), + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_QChemAdapter'), + species=[ARCSpecies(label='anion', xyz=['O 0 0 1'], charge=-1, is_ts=False)], + testing=True, + ) + cls.job_7 = QChemAdapter(execution_type='queue', + job_type='irc', + level=Level(method='wb97x-d', + basis='def2-TZVP'), + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_QChemAdapter'), + species=[ARCSpecies(label='IRC', xyz=['O 0 0 1'], is_ts=True)], + irc_direction='reverse', + testing=True, + ) + cls.job_8 = QChemAdapter(execution_type='queue', + job_type='composite', + level=Level(method='cbs-qb3-paraskevas'), + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_QChemAdapter'), + species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'])], + testing=True, + args={'keyword': {'general': 'IOp(1/12=5,3/44=0)'}}, + ) + cls.job_9 = QChemAdapter(execution_type='local', + job_type='optfreq', + level=Level(method='wb97x-d', + basis='def2-TZVP'), + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_QChemAdapter'), + species=[ARCSpecies(label='anion', xyz=['O 0 0 1'], charge=-1, is_ts=False)], + testing=True, + ) + + def test_set_cpu_and_mem(self): + """Test assigning number of cpu's and memory""" + self.job_8.input_file_memory = None + self.job_8.submit_script_memory = None + self.job_8.server = 'server2' + self.job_8.set_cpu_and_mem() + self.assertEqual(self.job_8.cpu_cores, 8) + + def test_set_input_file_memory(self): + """ + Test setting the input_file_memory argument + QChem manages its own memory, so this should be None for the time being + https://manual.q-chem.com/5.4/CCparallel.html + + A discussion is to be had about better manipulation of assigning memory to QChem jobs + """ + expected_memory = None + self.assertEqual(self.job_1.input_file_memory, None) + self.assertEqual(self.job_2.input_file_memory, None) + + def test_write_input_file(self): + """Test writing QChem input files""" + self.job_1.write_input_file() + with open(os.path.join(self.job_1.local_path, input_filenames[self.job_1.job_adapter]), 'r') as f: + content_1 = f.read() + job_1_expected_input_file = """$molecule +0 3 +O 0.00000000 0.00000000 1.00000000 +$end +$rem + JOBTYPE opt + METHOD b3lyp + UNRESTRICTED TRUE + BASIS def2-TZVP + IQMOL_FCHK FALSE +$end + + + +""" + self.assertEqual(content_1, job_1_expected_input_file) + + self.job_3.write_input_file() + with open(os.path.join(self.job_3.local_path, input_filenames[self.job_3.job_adapter]), 'r') as f: + content_3 = f.read() + job_3_expected_input_file = """$molecule +0 3 +O 0.00000000 0.00000000 1.00000000 +$end +$rem + JOBTYPE opt + METHOD wb97x-d + UNRESTRICTED TRUE + BASIS def2-TZVP + SCF_CONVERGENCE 8 + IQMOL_FCHK FALSE +$end + + + +""" + self.assertEqual(content_3, job_3_expected_input_file) + + self.job_4.write_input_file() + with open(os.path.join(self.job_4.local_path, input_filenames[self.job_4.job_adapter]), 'r') as f: + content_4 = f.read() + job_4_expected_input_file = """$molecule +0 1 +C 1.16582100 -0.40435500 0.00000000 +C 0.00000000 0.55180500 0.00000000 +O -1.18946000 -0.21419400 0.00000000 +H -1.94125800 0.37518500 0.00000000 +H 2.10540200 0.14511600 0.00000000 +H 1.13062400 -1.03878500 0.88303200 +H 1.13062400 -1.03878500 -0.88303200 +H 0.04768200 1.19305700 0.88359100 +H 0.04768200 1.19305700 -0.88359100 +$end +$rem + JOBTYPE pes_scan + METHOD wb97x-d + UNRESTRICTED FALSE + BASIS def2-TZVP + IQMOL_FCHK FALSE +$end + + +$scan +tors 5 1 2 3 -180.0 180 8.0 +$end + + +""" + self.assertEqual(content_4, job_4_expected_input_file) + + self.job_5.write_input_file() + with open(os.path.join(self.job_5.local_path, input_filenames[self.job_5.job_adapter]), 'r') as f: + content_5 = f.read() + job_5_expected_input_file = """$molecule +0 1 +O 0.00000000 0.00000000 1.00000000 +$end +$rem + JOBTYPE freq + METHOD wb97x-d + UNRESTRICTED TRUE + BASIS def2-TZVP + SCF_CONVERGENCE 8 + IQMOL_FCHK FALSE +$end + + + +""" + self.assertEqual(content_5, job_5_expected_input_file) + + self.job_6.write_input_file() + with open(os.path.join(self.job_6.local_path, input_filenames[self.job_6.job_adapter]), 'r') as f: + content_6 = f.read() + job_6_expected_input_file = """$molecule +-1 2 +O 0.00000000 0.00000000 1.00000000 +$end +$rem + JOBTYPE opt + METHOD wb97x-d + UNRESTRICTED TRUE + BASIS def2-TZVP + IQMOL_FCHK FALSE +$end + + + +""" + self.assertEqual(content_6, job_6_expected_input_file) + + self.job_7.write_input_file() + with open(os.path.join(self.job_7.local_path, input_filenames[self.job_7.job_adapter]), 'r') as f: + content_7 = f.read() + job_7_expected_input_file = """$molecule +0 1 +O 0.00000000 0.00000000 1.00000000 +$end +$rem + JOBTYPE freq + METHOD wb97x-d + UNRESTRICTED FALSE + BASIS def2-TZVP + IQMOL_FCHK FALSE +$end + + +@@@ +$molecule +read +$end +$rem + JOBTYPE rpath + BASIS def2-TZVP + METHOD wb97x-d + RPATH_DIRECTION -1 + RPATH_MAX_CYCLES 20 + RPATH_MAX_STEPSIZE 150 + RPATH_TOL_DISPLACEMENT 5000 + RPATH_PRINT 2 + SCF_GUESS read +$end + + + +""" + self.assertEqual(content_7, job_7_expected_input_file) + + def test_set_files(self): + """Test setting files""" + job_3_files_to_upload = [{'file_name': 'submit.sub', + 'local': os.path.join(self.job_3.local_path, submit_filenames[servers[self.job_3.server]['cluster_soft']]), + 'remote': os.path.join(self.job_3.remote_path, submit_filenames[servers[self.job_3.server]['cluster_soft']]), + 'make_x': False, + 'source': 'path'}, + {'file_name': 'input.in', + 'local': os.path.join(self.job_3.local_path, input_filenames[self.job_3.job_adapter]), + 'remote': os.path.join(self.job_3.remote_path, input_filenames[self.job_3.job_adapter]), + 'source': 'path', + 'make_x': False}] + job_3_files_to_download = [{'file_name': 'output.out', + 'local': os.path.join(self.job_3.local_path, output_filenames[self.job_3.job_adapter]), + 'remote': os.path.join(self.job_3.remote_path, output_filenames[self.job_3.job_adapter]), + 'source': 'path', + 'make_x': False}] + self.assertEqual(self.job_3.files_to_upload, job_3_files_to_upload) + self.assertEqual(self.job_3.files_to_download, job_3_files_to_download) + + def test_set_files_for_pipe(self): + """Test setting files for a pipe job""" + job_2_files_to_upload = [{'file_name': 'submit.sub', + 'local': os.path.join(self.job_2.local_path, 'submit.sub'), + 'remote': os.path.join(self.job_2.remote_path, 'submit.sub'), + 'source': 'path', + 'make_x': False}, + {'file_name': 'data.hdf5', + 'local': os.path.join(self.job_2.local_path, 'data.hdf5'), + 'remote': os.path.join(self.job_2.remote_path, 'data.hdf5'), + 'source': 'path', + 'make_x': False}] + job_2_files_to_download = [{'file_name': 'data.hdf5', + 'local': os.path.join(self.job_2.local_path, 'data.hdf5'), + 'remote': os.path.join(self.job_2.remote_path, 'data.hdf5'), + 'source': 'path', + 'make_x': False}] + self.assertEqual(self.job_2.files_to_upload, job_2_files_to_upload) + self.assertEqual(self.job_2.files_to_download, job_2_files_to_download) + + def test_QChemAdapter_def2tzvp(self): + """Test a QChem job using def2-tzvp""" + self.assertEqual(self.job_9.level.basis, 'def2-tzvp') + + @classmethod + def tearDownClass(cls): + """ + A function that is run ONCE after all unit tests in this class. + Delete all project directories created during these unit tests. + """ + shutil.rmtree(os.path.join(ARC_PATH, 'arc', 'testing', 'test_QChemAdapter'), ignore_errors=True) + + +if __name__ == '__main__': + unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) diff --git a/arc/job/ssh.py b/arc/job/ssh.py index 234f0807ab..0b54b27c49 100644 --- a/arc/job/ssh.py +++ b/arc/job/ssh.py @@ -39,7 +39,7 @@ def check_connections(function: Callable[..., Any]) -> Callable[..., Any]: def decorator(*args, **kwargs) -> Any: self = args[0] if self._ssh is None: # not sure if some status may cause False - self._sftp, self._ssh = self.connect() + self._sftp, self._ssh = self._connect() # test connection, reference: # https://stackoverflow.com/questions/ # 20147902/how-to-know-if-a-paramiko-ssh-channel-is-disconnected @@ -71,7 +71,7 @@ class SSHClient(object): def __init__(self, server: str = '') -> None: if server == '': raise ValueError('A server name must be specified') - if server not in servers.keys(): + if (server not in servers.keys() and server is not None): raise ValueError(f'Server name "{server}" is invalid. Currently defined servers are: {list(servers.keys())}') self.server = server self.address = servers[server]['address'] @@ -279,7 +279,7 @@ def check_running_jobs_ids(self) -> list: cluster_soft = servers[self.server]['cluster_soft'].lower() for i, status_line in enumerate(stdout): if i > i_dict[cluster_soft]: - job_id = status_line.split(split_by_dict[cluster_soft])[0] + job_id = status_line.lstrip().split(split_by_dict[cluster_soft])[0] job_id = job_id.split('.')[0] if '.' in job_id else job_id running_job_ids.append(job_id) return running_job_ids @@ -308,6 +308,9 @@ def submit_job(self, remote_path: str, logger.warning(f'Got stderr when submitting job:\n{stderr}') job_status = 'errored' for line in stderr: + if 'Memory specification can not be satisfied' in line: + logger.warning('User may be requesting more memory than is available. Please check server ' + 'settings, such as cpus and memory, in ARC/arc/settings/settings.py.') if 'Requested node configuration is not available' in line: logger.warning('User may be requesting more resources than are available. Please check server ' 'settings, such as cpus and memory, in ARC/arc/settings/settings.py') @@ -317,13 +320,13 @@ def submit_job(self, remote_path: str, self.submit_job(remote_path=remote_path, recursion=True) if recursion: return None, None - elif cluster_soft.lower() in ['oge', 'sge'] and 'submitted' in stdout[0].lower(): + elif cluster_soft.lower() in ['oge', 'sge'] and stdout and 'submitted' in stdout[0].lower(): job_id = stdout[0].split()[2] - elif cluster_soft.lower() == 'slurm' and 'submitted' in stdout[0].lower(): + elif cluster_soft.lower() == 'slurm' and stdout and 'submitted' in stdout[0].lower(): job_id = stdout[0].split()[3] elif cluster_soft.lower() == 'pbs': job_id = stdout[0].split('.')[0] - elif cluster_soft.lower() == 'htcondor' and 'submitting' in stdout[0].lower(): + elif cluster_soft.lower() == 'htcondor' and stdout and 'submitting' in stdout[0].lower(): # Submitting job(s). # 1 job(s) submitted to cluster 443069. if len(stdout) and len(stdout[1].split()) and len(stdout[1].split()[-1].split('.')): @@ -370,16 +373,16 @@ def _connect(self) -> Tuple[paramiko.sftp_client.SFTPClient, paramiko.SSHClient] """ ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - ssh.load_system_host_keys(filename=self.key) + ssh.load_system_host_keys() try: # If the server accepts the connection but the SSH daemon doesn't respond in # 15 seconds (default in paramiko) due to network congestion, faulty switches, # etc..., common solution is enlarging the timeout variable. - ssh.connect(hostname=self.address, username=self.un, banner_timeout=200) + ssh.connect(hostname=self.address, username=self.un, banner_timeout=200, key_filename=self.key) except: # This sometimes gives "SSHException: Error reading SSH protocol banner[Error 104] Connection reset by peer" # Try again: - ssh.connect(hostname=self.address, username=self.un, banner_timeout=200) + ssh.connect(hostname=self.address, username=self.un, banner_timeout=200, key_filename=self.key) sftp = ssh.open_sftp() return sftp, ssh @@ -395,7 +398,7 @@ def close(self) -> None: @check_connections def get_last_modified_time(self, remote_file_path_1: str, - remote_file_path_2: Optional[str], + remote_file_path_2: Optional[str] = None, ) -> Optional[datetime.datetime]: """ Returns the last modified time of ``remote_file_path_1`` if the file exists, @@ -447,8 +450,8 @@ def list_available_nodes(self) -> list: Returns: list: lines of the node hostnames. """ - cluster_soft = servers[self.server]['cluster_soft'].lower() - if cluster_soft == 'htcondor': + cluster_soft = servers[self.server]['cluster_soft'] + if cluster_soft.lower() == 'htcondor': return list() cmd = list_available_nodes_command[cluster_soft] stdout = self._send_command_to_server(command=cmd)[0] @@ -489,6 +492,18 @@ def change_mode(self, command = f'chmod{recursive} {mode} {file_name}' self._send_command_to_server(command, remote_path) + def remove_dir(self, remote_path: str) -> None: + """ + Remove a directory on the server. + Args: + remote_path (str): The path to the directory to be removed on the remote server. + """ + command = f'rm -r "{remote_path}"' + _, stderr = self._send_command_to_server(command) + if stderr: + raise ServerError( + f'Cannot remove dir for the given path ({remote_path}).\nGot: {stderr}') + def _check_file_exists(self, remote_file_path: str, ) -> bool: @@ -556,32 +571,53 @@ def check_job_status_in_stdout(job_id: int, stdout = stdout.splitlines() for status_line in stdout: if str(job_id) in status_line: - break - else: - return 'done' - if servers[server]['cluster_soft'].lower() == 'slurm': - status = status_line.split()[4] - if status.lower() in ['r', 'qw', 't', 'cg', 'pd']: - return 'running' - elif status.lower() in ['bf', 'ca', 'f', 'nf', 'st', 'oom']: - return 'errored' - elif servers[server]['cluster_soft'].lower() == 'pbs': - status = status_line.split()[-2] - if status.lower() in ['r', 'q', 'c', 'e', 'w']: - return 'running' - elif status.lower() in ['h', 's']: - return 'errored' - elif servers[server]['cluster_soft'].lower() in ['oge', 'sge']: - status = status_line.split()[4] - if status.lower() in ['r', 'qw', 't']: - return 'running' - elif status.lower() in ['e']: - return 'errored' - elif servers[server]['cluster_soft'].lower() == 'htcondor': - return 'running' - else: - raise ValueError(f'Unknown cluster software {servers[server]["cluster_soft"]}') + if servers[server]['cluster_soft'].lower() == 'slurm': + status = status_line.split()[4] + status_time = status_line.split()[5] + # Time can be in the following format + # 1-00:00:00 + # 00:00:00 + # 00:00 + # We need to handle all these cases + days = status_time.split('-')[0] if len(status_time.split('-')) == 2 else 0 + # Remove the days from the status_time + status_time = status_time.split('-')[1] if len(status_time.split('-')) == 2 else status_time + if len(status_time.split(':')) == 3: + hours, minutes, seconds = map(int, status_time.split(':')) + else: + minutes, seconds = map(int, status_time.split(':')) + node_id = status_line.split()[7] + # Sometimes the node has stopped responding during configuration + # Usually a node takes approx 10 mins to configure. We shall wait for 15 mins + if status.lower() == 'cf' and minutes >= 15: + # Run a command to check if the node is still responding + with SSHClient(server) as ssh: + stdout, _ = ssh._send_command_to_server(f'scontrol show node {node_id}', remote_path='') + if 'NOT_RESPONDING' in stdout: + return 'errored' + if status.lower() in ['r', 'qw', 't', 'cg', 'pd','cf']: + return 'running' + elif status.lower() in ['bf', 'ca', 'f', 'nf', 'st', 'oom']: + return 'errored' + elif servers[server]['cluster_soft'].lower() == 'pbs': + status = status_line.split()[-2] + if status.lower() in ['r', 'q', 'c', 'e', 'w']: + return 'running' + elif status.lower() in ['h', 's']: + return 'errored' + elif servers[server]['cluster_soft'].lower() in ['oge', 'sge']: + status = status_line.split()[4] + if status.lower() in ['r', 'qw', 't']: + return 'running' + elif status.lower() in ['e']: + return 'errored' + elif servers[server]['cluster_soft'].lower() == 'htcondor': + return 'running' + else: + raise ValueError(f'Unknown cluster software {servers[server]["cluster_soft"]}') + + return 'done' def delete_all_arc_jobs(server_list: list, jobs: Optional[List[str]] = None, diff --git a/arc/job/trsh.py b/arc/job/trsh.py index 8264be80e7..e9638aee6b 100644 --- a/arc/job/trsh.py +++ b/arc/job/trsh.py @@ -193,24 +193,39 @@ def determine_ess_status(output_path: str, keywords = ['SCF'] error = 'SCF failed' break - elif 'error' in line and 'DIIS' not in line: + elif 'error' in line and 'DIIS' not in line and 'Q-Chem fatal error' not in line and 'Q-Chem error occurred in module' not in line: # These are **normal** lines that we should not capture: # "SCF converges when DIIS error is below 1.0E-08", or # "Cycle Energy DIIS Error" - keywords = ['SCF', 'DIIS'] - error = 'SCF failed' - break + keywords = ['SCF', 'DIIS'] + error = 'SCF failed' + break elif 'Invalid charge/multiplicity combination' in line: raise SpeciesError(f'The multiplicity and charge combination for species ' f'{species_label} are wrong.') - if 'opt' in job_type or 'conformer' in job_type or 'ts' in job_type: - if 'MAXIMUM OPTIMIZATION CYCLES REACHED' in line: - keywords = ['MaxOptCycles'] - error = 'Maximum optimization cycles reached.' - break - elif 'OPTIMIZATION CONVERGED' in line and done: # `done` should already be assigned - done = True - break + elif 'FlexNet Licensing error' in line: + # This is a special case, as it is not an error, but rather a license issue. + # ARC cannot run QChem without a valid license. Therefore, we raise an error. + # The user should check that the license server is active and that the license file is valid. + raise ValueError('QChem license error. Check the license file.') + elif 'Error within run_minimization with minimization method' in line: + # This error is unknown currently, so will try to run the job again. + keywords = ['Minimization'] + error = 'Error within run_minimization with minimization method' + break + if 'MAXIMUM OPTIMIZATION CYCLES REACHED' in line or 'Maximum optimization cycles reached' in line: + # ' Maximum number of iterations reached during minimization algorithm.' + # ' Try to increase the number of max iterations or lower threshold for convergence criteria.' + keywords = ['MaxOptCycles'] + error = 'Maximum optimization cycles reached.' + break + if 'Try to increase the number of max iterations or lower threshold for convergence criteria.' in line: + keywords = ['MaxIter'] + error = 'Maximum number of iterations reached during minimization algorithm.' + break + elif 'OPTIMIZATION CONVERGED' in line and done: # `done` should already be assigned + done = True + break if done: return 'done', keywords, '', '' error = error if error else 'QChem job terminated for an unknown reason.' @@ -363,26 +378,48 @@ def determine_ess_status(output_path: str, elif 'A further' in line and 'Mwords of memory are needed' in line and 'Increase memory to' in line: # e.g.: `A further 246.03 Mwords of memory are needed for the triples to run. # Increase memory to 996.31 Mwords.` (w/o the line break) - keywords = ['Memory'] + pattern = r"(\d+(?:\.\d+)?)\s*Mwords(?![\s\S]*Mwords)" + matches = re.findall(pattern, line) + memory_increase = float(matches[-1]) + error = f"Memory required: {memory_increase} MW" + keywords=['Memory'] for line0 in reverse_lines: if ' For full I/O' in line0 and 'increase memory by' in line0 and 'Mwords to' in line0: - memory_increase = re.findall(r"[\d.]+", line0)[0] - error = f"Additional memory required: {memory_increase} MW" + pattern = r"(\d+(?:\.\d+)?)\s*Mwords(?![\s\S]*Mwords)" + matches = re.findall(pattern, line0) + memory_increase = float(matches[-1]) + error = f"Memory required: {memory_increase} MW" break - error = f'Additional memory required: {line.split()[2]} MW' if 'error' not in locals() else error + elif 'For minimal' in line0 and 'in triples' in line0 and 'increase memory by' in line0: + pattern = r"(\d+(?:\.\d+)?)\s*Mwords(?![\s\S]*Mwords)" + matches = re.findall(pattern, line0) + memory_increase = float(matches[-1]) + error = f"Memory required: {memory_increase} MW" + break break - elif 'insufficient memory available - require' in line: + elif 'insufficient memory available - require' in line: # e.g.: `insufficient memory available - require 228765625 have # 62928590 # the request was for real words` keywords = ['Memory'] - error = f'Additional memory required: {float(line.split()[-2]) / 1e6} MW' + numbers = re.findall(r'\d+', line) + total = sum(int(number) for number in numbers) + # It appears that the number is in words. Need to convert to MW. + total /= 1e6 + error = f'Memory required: {total} MW' break - elif 'Insufficient memory to allocate' in line or 'The problem occurs in memory' in line: + elif 'Insufficient memory to allocate' in line: # e.g.: `Insufficient memory to allocate a new array of length 321843600 8-byte words # The problem occurs in memory` keywords = ['Memory'] - error = 'Additional memory required' + numbers = re.findall(r'\d+', line) + size_of_each_element_bytes = 8 + # Calculating total memory + # Just making sure that it is 8 bytes per element + assert int(numbers[1]) == size_of_each_element_bytes + # Assuming this is in words, need to convert to MW. + total = int(numbers[0]) / 1e6 + error = f'Memory required: {total} MW' break elif 'Basis library exhausted' in line: # e.g.: @@ -451,10 +488,27 @@ def determine_job_log_memory_issues(job_log: Optional[str] = None) -> Tuple[List """ keywords, error, line = list(), '', '' if job_log is not None: - if os.path.isfile(job_log): - with open(job_log, 'r') as f: - lines = f.readlines() - else: + try: + if os.path.isfile(job_log): + with open(job_log, 'r') as f: + lines = f.readlines() + if os.path.isfile(job_log): + with open(job_log, 'rb') as f: + # Read the file + first_bytes = f.read() + # Check if the bytes contain a null byte + has_null_byte = b'\x00' in first_bytes + # Use the appropriate mode based on whether the file is binary or not + mode = 'rb' if has_null_byte else 'r' + # Read the file contents using the determined mode + lines = first_bytes.decode('utf-8') + if mode == 'r': + with open(job_log, 'r') as f: + lines = f.readlines() + else: + lines = job_log.splitlines() + except ValueError: + job_log.replace('\x00','') lines = job_log.splitlines() mem_usage = '' for line in lines: @@ -920,20 +974,75 @@ def trsh_ess_job(label: str, elif software == 'qchem': if 'MaxOptCycles' in job_status['keywords'] and 'max_cycles' not in ess_trsh_methods: # this is a common error, increase max cycles and continue running from last geometry - logger.info(f'Troubleshooting {job_type} job in {software} for {label} using max_cycles') + log_message = f'Troubleshooting {job_type} job in {software} for {label} using max cycles' ess_trsh_methods.append('max_cycles') trsh_keyword = '\n GEOM_OPT_MAX_CYCLES 250' # default is 50 + if 'DIIS_GDM' in ess_trsh_methods: + log_message += ' and DIIS_GDM and max SCF cycles' + trsh_keyword += '\n SCF_ALGORITHM DIIS_GDM\n MAX_SCF_CYCLES 1000' + if 'SYM_IGNORE' in ess_trsh_methods: + log_message += ' and SYM_IGNORE' + trsh_keyword += '\n SYM_IGNORE True' + logger.info(log_message) elif 'SCF' in job_status['keywords'] and 'DIIS_GDM' not in ess_trsh_methods: # change the SCF algorithm and increase max SCF cycles - logger.info(f'Troubleshooting {job_type} job in {software} for {label} using the DIIS_GDM SCF algorithm') + log_message = f'Troubleshooting {job_type} job in {software} for {label} using DIIS_GDM and max SCF cycles' ess_trsh_methods.append('DIIS_GDM') trsh_keyword = '\n SCF_ALGORITHM DIIS_GDM\n MAX_SCF_CYCLES 1000' # default is 50 + if 'SYM_IGNORE' in ess_trsh_methods: + log_message += ' and SYM_IGNORE' + trsh_keyword += '\n SYM_IGNORE True' + if 'max_cycles' in ess_trsh_methods: + log_message += ' and max_cycles' + trsh_keyword += '\n GEOM_OPT_MAX_CYCLES 250' + logger.info(log_message) + elif 'MaxIter' in job_status['keywords'] and 'maxiter' not in ess_trsh_methods: + log_message = f'Troubleshooting {job_type} job in {software} for {label} using maxiter' + ess_trsh_methods.append('maxiter') + trsh_keyword = '\n MAX_SCF_CYCLES 1000' + if 'max_cycles' in ess_trsh_methods: + log_message += ' and max_cycles' + trsh_keyword += '\n GEOM_OPT_MAX_CYCLES 250' + if 'DIIS_GDM' in ess_trsh_methods: + log_message += ' and DIIS_GDM' + trsh_keyword += '\n SCF_ALGORITHM DIIS_GDM' + if 'SYM_IGNORE' in ess_trsh_methods: + log_message += ' and SYM_IGNORE' + trsh_keyword += '\n SYM_IGNORE True' + logger.info(log_message) + elif 'Minimization' in job_status['keywords']: + # Uncertain what this error is, but assuming it's just an error that means we need to re-run the job under the same conditions + # However, if this error persists, we will determine that the job is not converging and so we will + # determine it cannot be run and will not try again + if 'Minimization' in job_status['error']: + logger.warning(f'Could not troubleshoot {job_type} job in {software} for {label} with same conditions - Minimization error persists') + couldnt_trsh = True + else: + log_message = f'Troubleshooting {job_type} job in {software} for {label} with same conditions' + if 'maxiter' in ess_trsh_methods: + log_message += ' and maxiter' + trsh_keyword = '\n MAX_SCF_CYCLES 1000' + if 'max_cycles' in ess_trsh_methods: + log_message += ' and max_cycles' + trsh_keyword = '\n GEOM_OPT_MAX_CYCLES 250' + if 'DIIS_GDM' in ess_trsh_methods: + log_message += ' and DIIS_GDM' + trsh_keyword = '\n SCF_ALGORITHM DIIS_GDM' + if 'SYM_IGNORE' in ess_trsh_methods: + log_message += ' and SYM_IGNORE' + trsh_keyword = '\n SYM_IGNORE True' elif 'SYM_IGNORE' not in ess_trsh_methods: # symmetry - look in manual, no symm if fails # change the SCF algorithm and increase max SCF cycles - logger.info(f'Troubleshooting {job_type} job in {software} for {label} using SYM_IGNORE as well as the ' - f'DIIS_GDM SCF algorithm') + log_message = f'Troubleshooting {job_type} job in {software} for {label} using SYM_IGNORE' ess_trsh_methods.append('SYM_IGNORE') - trsh_keyword = '\n SCF_ALGORITHM DIIS_GDM\n MAX_SCF_CYCLES 250\n SYM_IGNORE True' + trsh_keyword = '\n SYM_IGNORE True' + if 'max_cycles' in ess_trsh_methods: + log_message += ' and max_cycles' + trsh_keyword += '\n GEOM_OPT_MAX_CYCLES 250' + if 'DIIS_GDM' in ess_trsh_methods: + log_message += ' and DIIS_GDM and increased max SCF cycles' + trsh_keyword += '\n SCF_ALGORITHM DIIS_GDM\n MAX_SCF_CYCLES 1000' + logger.info(log_message) else: couldnt_trsh = True @@ -999,19 +1108,39 @@ def trsh_ess_job(label: str, if 'Memory' in job_status['keywords']: # Increase memory allocation. # molpro gives something like `'errored: additional memory (mW) required: 996.31'`. - # job_status standardizes the format to be: `'Additional memory required: {0} MW'` - # The number is the ADDITIONAL memory required in GB + # job_status standardizes the format to be: `'Memory required: {0} MW'` + # The number is the complete memory required in GB ess_trsh_methods.append('memory') - add_mem_str = job_status['error'].split()[-2] # parse Molpro's requirement in MW + add_mem_str = re.findall(r'\d+(?:\.\d+)?', job_status['error']) + add_mem_str = add_mem_str[0] # parse Molpro's requirement in MW if all(c.isdigit() or c == '.' for c in add_mem_str): add_mem = float(add_mem_str) add_mem = int(np.ceil(add_mem / 100.0)) * 100 # round up to the next hundred - memory = memory_gb + add_mem / 128. + 5 # convert MW to GB, add 5 extra GB (be conservative) + # Reasons for this error: + # 1. The MWords per process is not enough, even though we provided an adequate amount of memory to the submit script. + # 2. The MWords per process is ENOUGH, but the total memory is too high, therefore, we need to reduce the number of cores. + # Convert submit memory to MWords + ## 1 MWord = 7.45e-3 GB + ## 1 GB = 134.2 MWords + sumbit_mem_mwords = int(np.ceil(memory_gb / 7.45e-3)) + ## But, Molpro is actually requesting more memory per core, therefore + sumbit_mem_mwords_per_cpu = int(np.ceil(sumbit_mem_mwords / cpu_cores)) + + ## Check if submit memory is enough + if sumbit_mem_mwords_per_cpu < add_mem: + # Convert back to GB and also multiply by the number of cores + memory = add_mem * 7.45e-3 * cpu_cores + ## However, this might be too much memory for the server, therefore, we need to reduce the number of cores + ess_trsh_methods.append('cpu') + ess_trsh_methods.append(f'molpro_memory:{add_mem}') + else: + ## The real issue occurs here, where the total memory is too high but + memory = memory_gb # Don't change the submit memory else: # The required memory is not specified memory = memory_gb * 3 # convert MW to GB, add 5 extra GB (be conservative) - logger.info(f'Troubleshooting {job_type} job in {software} for {label} using memory: {memory:.2f} GB ' - f'instead of {memory_gb} GB') + logger.info(f'Troubleshooting {job_type} job in {software} for {label} using memory: {memory:.2f} GB ' + f'instead of {memory_gb} GB') elif 'shift' not in ess_trsh_methods: # Try adding a level shift for alpha- and beta-spin orbitals # Applying large negative level shifts like {rhf; shift,-1.0,-0.5} @@ -1303,8 +1432,8 @@ def trsh_job_on_server(server: str, # find available node logger.error('Troubleshooting by changing node.') - ssh = SSHClient(server) - nodes = ssh.list_available_nodes() + with SSHClient(server) as ssh: + nodes = ssh.list_available_nodes() for node in nodes: if node not in server_nodes: server_nodes.append(node) @@ -1327,7 +1456,7 @@ def trsh_job_on_server(server: str, insert_line_num = 5 else: # Other software? - logger.denug(f'Unknown cluster software {cluster_soft} is encountered when ' + logger.error(f'Unknown cluster software {cluster_soft} is encountered when ' f'troubleshooting by changing node.') return None, False for i, line in enumerate(content): diff --git a/arc/job/trsh_test.py b/arc/job/trsh_test.py index f62c7fad74..1b31a33c65 100644 --- a/arc/job/trsh_test.py +++ b/arc/job/trsh_test.py @@ -447,7 +447,19 @@ def test_trsh_ess_job(self): job_type, software, fine, memory_gb, num_heavy_atoms, cpu_cores, ess_trsh_methods) self.assertIn('memory', ess_trsh_methods) - self.assertAlmostEqual(memory, 222.15625) + # Error has require: 23661817216 which is Words + # Convert to MW - 23661817216 / 1e6 = 23661.817216 + ## Round up to nearest 100 - 23700 + ## 1 MWord = 7.45e-3 GB + ## 1 GB = 134.2 MWords + # Get the server memory in MW - 32 * 134.2 ~ 4296 MW + # Now get the memory per cpu core - 4296 / 8 = 537 MW <- This is the current MAX MW we can give Molpro based on CPUs we are providing it + # Check if sumbit_mem_mwords_per_cpu < add_mem - 23700 !< 537 + # memory = add_mem * 7.45e-3 * cpu_cores + # memory = 23700 * 7.45e-3 * 8 = 1412.52 GB + # Memory is WAY too high, so need to check if 'cpu' is in ess_trsh_methods + self.assertIn('cpu', ess_trsh_methods) + self.assertAlmostEqual(memory, 1412.52) path = os.path.join(self.base_path['molpro'], 'insufficient_memory_2.out') status, keywords, error, line = trsh.determine_ess_status(output_path=path, species_label='TS', job_type='sp') @@ -456,9 +468,12 @@ def test_trsh_ess_job(self): memory, shift, cpu_cores, couldnt_trsh = trsh.trsh_ess_job(label, level_of_theory, server, job_status, job_type, software, fine, memory_gb, num_heavy_atoms, cpu_cores, ess_trsh_methods) - self.assertIn('memory', ess_trsh_methods) - self.assertEqual(memory, 96.0) + # Molpro: Insuffienct Memory 2 Test + # Need to check with Alon + self.assertIn('memory', ess_trsh_methods) + self.assertEqual(memory, 32.0) + # Molpro: Insuffienct Memory 3 Test path = os.path.join(self.base_path['molpro'], 'insufficient_memory_3.out') status, keywords, error, line = trsh.determine_ess_status(output_path=path, @@ -470,7 +485,8 @@ def test_trsh_ess_job(self): job_type, software, fine, memory_gb, num_heavy_atoms, cpu_cores, ess_trsh_methods) self.assertIn('memory', ess_trsh_methods) - self.assertEqual(memory, 62.0) + self.assertIn('cpu', ess_trsh_methods) + self.assertEqual(memory, 250.32) # in GB - molpro adapter will handle this large memory # Test Orca # Orca: test 1 diff --git a/arc/level.py b/arc/level.py index 2b5dc7c525..689ed1bdf8 100644 --- a/arc/level.py +++ b/arc/level.py @@ -15,7 +15,6 @@ from arc.common import ARC_PATH, get_logger, get_ordered_intersection_of_two_lists, read_yaml_file from arc.imports import settings - logger = get_logger() @@ -499,13 +498,24 @@ def deduce_software(self, self.software = 'orca' # Gaussian - if self.method_type == 'composite' or job_type == 'composite' or job_type == 'irc' \ + if self.method_type == 'composite' or job_type == 'composite' \ or any([sum(['iop' in value.lower() for value in subdict.values()]) for subdict in self.args.values()]): if 'gaussian' not in supported_ess: raise ValueError(f'Could not find Gaussian to run the {self.method}.\n' f'levels_ess is:\n{levels_ess}') self.software = 'gaussian' + + # QChem & Gaussian for IRC jobs + if job_type == 'irc': + if 'qchem' in supported_ess: + self.software = 'qchem' + elif 'gaussian' in supported_ess: + self.software = 'gaussian' + else: + raise ValueError(f'Could not find either QChem or Gaussian software to compute molecular orbitals or run an IRC job.\n' + f'levels_ess is:\n{levels_ess}') + # TorchANI if 'torchani' in self.method: self.software = 'torchani' @@ -553,6 +563,7 @@ def determine_compatible_ess(self): self.compatible_ess.append(ess) + def get_params_from_arkane_level_of_theory_as_str(arkane_level: str) -> Dict[str, str]: """ Get the method, basis set, and software (if any) of an str representation of an Arkane LevelOfTheory object instance. diff --git a/arc/level_test.py b/arc/level_test.py index a1d9b68b0c..4f6fd0d03d 100644 --- a/arc/level_test.py +++ b/arc/level_test.py @@ -45,7 +45,7 @@ def test_deduce_software(self): self.assertEqual(Level(method='B3LYP', basis='6-311g+(d,f)').software, 'gaussian') level_3 = Level(method='B3LYP', basis='6-311g+(d,f)') level_3.deduce_software(job_type='irc') - self.assertEqual(level_3.software, 'gaussian') + self.assertEqual(level_3.software, 'qchem') self.assertEqual(Level(method='DLPNO-CCSD(T)', basis='def2-tzvp').software, 'orca') self.assertEqual(Level(method='PM6').software, 'gaussian') self.assertEqual(Level(method='HF').software, 'gaussian') @@ -209,7 +209,7 @@ def test_determine_compatible_ess(self): self.assertIsNone(level_2.compatible_ess) level_2.determine_compatible_ess() self.assertEqual(sorted(level_2.compatible_ess), sorted(['gaussian', 'qchem', 'terachem'])) - + if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) diff --git a/arc/main_test.py b/arc/main_test.py index b0bd5926a6..5b0f688362 100644 --- a/arc/main_test.py +++ b/arc/main_test.py @@ -99,11 +99,11 @@ def test_as_dict(self): 'ess_settings': {'cfour': ['local'], 'gaussian': ['local', 'server2'], 'gcn': ['local'], - 'molpro': ['local', 'server2'], + 'molpro': ['local', 'server2', 'azure'], 'onedmin': ['server1'], 'openbabel': ['local'], - 'orca': ['local'], - 'qchem': ['server1'], + 'orca': ['local', 'azure'], + 'qchem': ['local', 'azure'], 'terachem': ['server1'], 'torchani': ['local'], 'xtb': ['local'], diff --git a/arc/parser.py b/arc/parser.py index d5a2a73b62..fc005af31c 100644 --- a/arc/parser.py +++ b/arc/parser.py @@ -149,7 +149,7 @@ def parse_frequencies(path: str, def parse_normal_mode_displacement(path: str, software: Optional[str] = None, - raise_error: bool = True, + raise_error: bool = False, # TODO: Why is this true? What is it supposed to do? ) -> Tuple[np.ndarray, np.ndarray]: """ Parse frequencies and normal mode displacement. @@ -210,6 +210,36 @@ def parse_normal_mode_displacement(path: str, parse_normal_mode_disp = True elif parse and not line or '-------------------' in line: parse = False + elif software == 'qchem': + number_freqs_per_line = 3 + parse, parse_normal_mode_disp = False, False + for line in lines + ['']: + if 'VIBRATIONAL FREQUENCIES (CM**-1) AND NORMAL MODES' in line: + parse = True + if parse and len(line.split()) in [0, 1, 3] or parse and 'TransDip' in line: + parse_normal_mode_disp = False + normal_mode_disp.extend(normal_mode_disp_entries) + normal_mode_disp_entries = list() + if parse and 'Frequency:' in line: + splits = line.split() + freqs.extend(float(freq) for freq in splits[1:]) + number_freqs_per_line = len(splits) - 1 + normal_mode_disp_entries = list() + elif parse_normal_mode_disp: + # parsing, e.g.: + # X Y Z X Y Z X Y Z + # C -0.000 -0.000 -0.000 -0.000 -0.000 -0.072 -0.000 0.136 -0.000 + # C 0.000 -0.000 0.000 0.000 -0.000 0.036 0.000 0.131 0.000 + # TODO: TransDip -0.000 -0.000 -0.000 -0.000 0.000 -0.029 0.000 -0.016 -0.000 + splits = line.split()[1:] + for i in range(number_freqs_per_line): + if len(normal_mode_disp_entries) < i + 1: + normal_mode_disp_entries.append(list()) + normal_mode_disp_entries[i].append(splits[3 * i: 3 * i + 3]) + elif parse and 'X Y Z' in line: + parse_normal_mode_disp = True + elif parse and not line or 'TransDip' in line: + parse = False elif raise_error: raise NotImplementedError(f'parse_normal_mode_displacement() is currently not implemented for {software}.') freqs = np.array(freqs, np.float64) @@ -470,30 +500,68 @@ def parse_1d_scan_coords(path: str) -> List[Dict[str, tuple]]: lines = _get_lines_from_file(path) log = ess_factory(fullpath=path, check_for_errors=False) - if not isinstance(log, GaussianLog): - raise NotImplementedError(f'Currently parse_1d_scan_coords only supports Gaussian files, got {type(log)}') - done = False - i = 0 - while not done: - if i >= len(lines) or 'Normal termination of Gaussian' in lines[i] or 'Error termination via' in lines[i]: - done = True - elif 'Optimization completed' in lines[i]: - while i < len(lines) + 10 and 'Input orientation:' not in lines[i] or 'Forces (Hartrees/Bohr)' in lines [i + 7]: - i += 1 - if 'Error termination via' in lines[i]: - return traj - i += 5 - xyz_str, skip_traj = '', False - while len(lines) and '--------------------------------------------' not in lines[i]: - if 'DIIS: error' in lines[i]: - skip_traj = True - break - splits = lines[i].split() - xyz_str += f'{qcel.periodictable.to_E(int(splits[1]))} {splits[3]} {splits[4]} {splits[5]}\n' + + if isinstance(log, GaussianLog): + done = False + i = 0 + while not done: + if i >= len(lines) or 'Normal termination of Gaussian' in lines[i] or 'Error termination via' in lines[i]: + done = True + elif 'Optimization completed' in lines[i]: + while i < len(lines) + 10 and 'Input orientation:' not in lines[i] or 'Forces (Hartrees/Bohr)' in lines [i + 7]: + i += 1 + if 'Error termination via' in lines[i]: + return traj + i += 5 + xyz_str, skip_traj = '', False + while len(lines) and '--------------------------------------------' not in lines[i]: + if 'DIIS: error' in lines[i]: + skip_traj = True + break + splits = lines[i].split() + xyz_str += f'{qcel.periodictable.to_E(int(splits[1]))} {splits[3]} {splits[4]} {splits[5]}\n' + i += 1 + if not skip_traj: + traj.append(str_to_xyz(xyz_str)) + i += 1 + if isinstance(log, QChemLog): + done = False + i = 0 + # In our QChem scans, we usually run two jobs in one input file. Since there are two jobs, the input file will have + # two "Thank you very much for using Q-Chem" lines. Therefore, in order to stop the parsing from ending prematurely, + # we count the number of "Thank you very much for using Q-Chem" lines + qchem_term_count = 0 + qchem_term_line = lines.copy() + for qlines in qchem_term_line: + if 'Thank you very much for using Q-Chem' in qlines: + qchem_term_count += 1 + while not done: + if i >=len(lines): + done = True + elif 'Thank you very much for using Q-Chem' in lines[i]: + # Once we reach a "Thank you very much for using Q-Chem" line, we decrement the count by 1 + # If the count is not 0, we continue parsing + # If the count is 0, we are done parsing + qchem_term_count -= 1 + if qchem_term_count == 0: + done = True i += 1 - if not skip_traj: - traj.append(str_to_xyz(xyz_str)) - i += 1 + elif 'OPTIMIZATION CONVERGED' in lines[i] and "Coordinates (Angstroms)" in lines[i+3]: + i += 5 + xyz_str, skip_traj = '', False + + while len(lines) and lines[i] != "\n" and 'Z-matrix Print:\n' not in lines[i+1]: + splits = lines[i].split() + xyz_str += f'{splits[1]} {splits[2]} {splits[3]} {splits[4]}\n' + i += 1 + + if not skip_traj: + traj.append(str_to_xyz(xyz_str)) + else: + i += 1 + elif not isinstance(log, GaussianLog): + raise NotImplementedError(f'Currently parse_1d_scan_coords only supports Gaussian files and QChem, got {type(log)}') + return traj @@ -784,23 +852,60 @@ def parse_trajectory(path: str) -> Optional[List[Dict[str, tuple]]]: ess_file = False if ess_file: - if not isinstance(log, GaussianLog): - raise NotImplementedError(f'Currently parse_trajectory only supports Gaussian files, got {type(log)}') - traj = list() - done = False - i = 0 - while not done: - if i >= len(lines) or 'Normal termination of Gaussian' in lines[i] or 'Error termination via' in lines[i]: - done = True - elif 'Input orientation:' in lines[i]: - i += 5 - xyz_str = '' - while len(lines) and '--------------------------------------------' not in lines[i]: - splits = lines[i].split() - xyz_str += f'{qcel.periodictable.to_E(int(splits[1]))} {splits[3]} {splits[4]} {splits[5]}\n' + if isinstance(log, GaussianLog): + traj = list() + done = False + i = 0 + while not done: + if i >= len(lines) or 'Normal termination of Gaussian' in lines[i] or 'Error termination via' in lines[i]: + done = True + elif 'Input orientation:' in lines[i]: + i += 5 + xyz_str = '' + while len(lines) and '--------------------------------------------' not in lines[i]: + splits = lines[i].split() + xyz_str += f'{qcel.periodictable.to_E(int(splits[1]))} {splits[3]} {splits[4]} {splits[5]}\n' + i += 1 + traj.append(str_to_xyz(xyz_str)) + i += 1 + elif isinstance(log, QChemLog): + traj = list() + done = False + i = 0 + # In our QChem scans, we usually run two jobs in one input file. Since there are two jobs, the input file will have + # two "Thank you very much for using Q-Chem" lines. Therefore, in order to stop the parsing from ending prematurely, + # we count the number of "Thank you very much for using Q-Chem" lines + qchem_term_count = 0 + qchem_term_line = lines.copy() + for qlines in qchem_term_line: + if 'Thank you very much for using Q-Chem' in qlines: + qchem_term_count += 1 + while not done: + if i >=len(lines): + done = True + elif 'Thank you very much for using Q-Chem' in lines[i]: + # Once we reach a "Thank you very much for using Q-Chem" line, we decrement the count by 1 + # If the count is not 0, we continue parsing + # If the count is 0, we are done parsing + qchem_term_count -= 1 + if qchem_term_count == 0: + done = True + i += 1 + elif 'Standard Nuclear Orientation (Angstroms)' in lines[i]: + i += 3 # Skip the headers + xyz_str = '' + # Keep reading until we encounter a line of hyphens + while not lines[i].startswith(' ---'): + splits = lines[i].split() + # X, Y, and Z are in indices 2, 3, and 4 respectively + xyz_str += f'{splits[1]} {splits[2]} {splits[3]} {splits[4]}\n' + i += 1 + traj.append(str_to_xyz(xyz_str)) + else: i += 1 - traj.append(str_to_xyz(xyz_str)) - i += 1 + + elif type(log) not in [GaussianLog, QChemLog]: + raise NotImplementedError(f'Currently parse_trajectory only supports Gaussian files, got {type(log)}') else: # this is not an ESS output file, probably an XYZ format file with several Cartesian coordinates @@ -1069,8 +1174,8 @@ def parse_scan_args(file_path: str) -> dict: Returns: dict A dictionary that contains the scan arguments as well as step number, step size, number of atom:: - {'scan': , - 'freeze': , + {'scan': , + 'freeze': , 'step': , 'step_size': , 'n_atom': , @@ -1106,8 +1211,22 @@ def parse_scan_args(file_path: str) -> dict: scan_args['freeze'].append([int(values[i]) for i in range(len(values))]) if 'NAtoms' in line: scan_args['n_atom'] = int(line.split()[1]) + elif isinstance(log, QChemLog): + freeze = parse_str_blocks(file_path, + 'FIXED', + 'ENDFIXED', regex=False) + atoms = len(parse_str_blocks(file_path, + '$molecule', + '$end', regex=False)[0])-3 + + scan_blk = parse_str_blocks(file_path, "$scan", "$end", regex=False)[0][1:-1] + scan_args['scan'] = list(map(int, scan_blk[0][:-2].split(sep=" ")[1:-3])) + scan_args['freeze'] = freeze if len(freeze) > 0 else [] # todo- find an example with freeze + scan_args['step'] = 360//int(float(scan_blk[0].split(" ")[-1].split(sep="\n")[0])) + scan_args['step_size'] = float(scan_blk[0].split(" ")[-1].split(sep="\n")[0]) + scan_args['n_atom'] = atoms else: - raise NotImplementedError(f'parse_scan_args() can currently only parse Gaussian output ' + raise NotImplementedError(f'parse_scan_args() can currently only parse Gaussian and QChem output ' f'files, got {log}') return scan_args @@ -1169,6 +1288,9 @@ def parse_ic_info(file_path: str) -> pd.DataFrame: else: # Currently doesn't support scan of angles. ic_dict['scan'].append(False) + elif isinstance(log, QChemLog): + pass + else: raise NotImplementedError(f'parse_ic_info() can currently only parse Gaussian output ' f'files, got {log}') diff --git a/arc/parser_test.py b/arc/parser_test.py index 7f774d0232..e94217609f 100644 --- a/arc/parser_test.py +++ b/arc/parser_test.py @@ -114,7 +114,7 @@ def test_parse_frequencies(self): def test_parse_normal_mode_displacement(self): """Test parsing frequencies and normal mode displacements""" freq_path = os.path.join(ARC_PATH, 'arc', 'testing', 'freq', 'Gaussian_neg_freq.out') - freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=freq_path) + freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=freq_path, raise_error=False) expected_freqs = np.array([-18.0696, 127.6948, 174.9499, 207.585, 228.8421, 281.2939, 292.4101, 308.0345, 375.4493, 486.8396, 498.6986, 537.6196, 564.0223, 615.3762, 741.8843, 749.3428, 777.1524, 855.3031, 871.055, 962.7075, 977.6181, @@ -132,7 +132,7 @@ def test_parse_normal_mode_displacement(self): np.testing.assert_almost_equal(normal_modes_disp[0], expected_normal_modes_disp_0) freq_path = os.path.join(ARC_PATH, 'arc', 'testing', 'freq', 'CHO_neg_freq.out') - freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=freq_path) + freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=freq_path, raise_error=False) expected_freqs = np.array([-1612.8294, 840.8655, 1883.4822, 3498.091], np.float64) np.testing.assert_almost_equal(freqs, expected_freqs) expected_normal_modes_disp_1 = np.array( @@ -143,7 +143,7 @@ def test_parse_normal_mode_displacement(self): np.testing.assert_almost_equal(normal_modes_disp, expected_normal_modes_disp_1) freq_path = os.path.join(ARC_PATH, 'arc', 'testing', 'freq', 'CH3OO_freq_gaussian.out') - freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=freq_path) + freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=freq_path, raise_error=False) expected_freqs = np.array([136.4446, 494.1267, 915.7812, 1131.4603, 1159.9315, 1225.148, 1446.5652, 1474.8065, 1485.6423, 3046.2186, 3134.8026, 3147.5619], np.float64) np.testing.assert_almost_equal(freqs, expected_freqs) @@ -175,7 +175,7 @@ def test_parse_normal_mode_displacement(self): np.testing.assert_almost_equal(normal_modes_disp, expected_normal_modes_disp_2) freq_path = os.path.join(ARC_PATH, 'arc', 'testing', 'freq', 'TS_NH2+N2H3.out') - freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=freq_path) + freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=freq_path, raise_error=False) expected_freqs = np.array([-1745.4843, 64.9973, 168.1583, 234.1226, 453.2505, 657.1672, 737.7965, 844.5179, 1156.12, 1177.1321, 1390.4004, 1454.281, 1565.3214, 1680.0987, 3367.2838, 3512.739, 3550.219, 3652.1575], np.float64) @@ -220,7 +220,7 @@ def test_parse_normal_mode_displacement(self): np.testing.assert_almost_equal(normal_modes_disp, expected_normal_modes_disp_2) path = os.path.join(ARC_PATH, 'arc', 'testing', 'normal_mode', 'HO2', 'output.out') - freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=path) + freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=path, raise_error=False) expected_freqs = np.array([1224.9751, 1355.2709, 3158.763], np.float64) np.testing.assert_almost_equal(freqs, expected_freqs) expected_normal_modes_disp_3 = np.array( @@ -230,7 +230,7 @@ def test_parse_normal_mode_displacement(self): np.testing.assert_almost_equal(normal_modes_disp, expected_normal_modes_disp_3) path = os.path.join(ARC_PATH, 'arc', 'testing', 'freq', 'output.yml') - freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=path) + freqs, normal_modes_disp = parser.parse_normal_mode_displacement(path=path, raise_error=False) self.assertEqual(freqs[-1], 3922.9230982968807) expected_normal_modes_disp_4_0 = np.array( [[0.008599578508578239, 0.01787645439208711, -0.04175706756233052], diff --git a/arc/scheduler.py b/arc/scheduler.py index 1d1ec71868..0a167fe105 100644 --- a/arc/scheduler.py +++ b/arc/scheduler.py @@ -30,6 +30,7 @@ torsions_to_scans, ) from arc.exceptions import (InputError, + JobError, SanitizationError, SchedulerError, SpeciesError, @@ -924,6 +925,7 @@ def deduce_job_adapter(self, level: Level, job_type: str) -> str: logger.error('Setting it to TeraChem') level.software = 'terachem' job_adapter = level.software + return job_adapter.lower() def end_job(self, job: 'JobAdapter', @@ -944,7 +946,7 @@ def end_job(self, job: 'JobAdapter', if job.job_status[0] != 'done' or job.job_status[1]['status'] != 'done': try: job.determine_job_status() # Also downloads the output file. - except IOError: + except (IOError, JobError) as e: if job.job_type not in ['orbitals']: logger.warning(f'Tried to determine status of job {job.job_name}, ' f'but it seems like the job never ran. Re-running job.') @@ -986,11 +988,11 @@ def end_job(self, job: 'JobAdapter', job.job_status[1]['status'] = 'errored' logger.warning(f'Job {job.job_name} errored because for the second time ARC did not find the output ' f'file path {job.local_path_to_output_file}.') - elif job.job_type not in ['orbitals']: - job.ess_trsh_methods.append('restart_due_to_file_not_found') - logger.warning(f'Did not find the output file of job {job.job_name} with path ' - f'{job.local_path_to_output_file}. Maybe the job never ran. Re-running job.') - self._run_a_job(job=job, label=label) + if job.job_type not in ['orbitals']: + job.ess_trsh_methods.append('restart_due_to_file_not_found') + logger.warning(f'Did not find the output file of job {job.job_name} with path ' + f'{job.local_path_to_output_file}. Maybe the job never ran. Re-running job.') + self._run_a_job(job=job, label=label) if job_name in self.running_jobs[label]: self.running_jobs[label].pop(self.running_jobs[label].index(job_name)) return False @@ -1020,6 +1022,7 @@ def end_job(self, job: 'JobAdapter', for rotors_dict in self.species_dict[label].rotors_dict.values(): if rotors_dict['pivots'] in [job.pivots, job.pivots[0]]: rotors_dict['scan_path'] = job.local_path_to_output_file + job.remove_remote_files() self.save_restart_dict() return True @@ -1264,7 +1267,10 @@ def run_sp_job(self, recent_opt_job_name, recent_opt_job = 'opt_a0', None if 'opt' in self.job_dict[label].keys(): for opt_job_name, opt_job in self.job_dict[label]['opt'].items(): - if int(opt_job_name.split('_a')[-1]) > int(recent_opt_job_name.split('_a')[-1]): + if ( + int(opt_job_name.split('_a')[-1]) > int(recent_opt_job_name.split('_a')[-1]) + and opt_job.job_status[1]['status'] == 'done' #This needs to be checked, but this current function does not consider if the opt job is done or not. Maybe it shouldn't need to but rather is a result of Zeus creating submission issues + ): recent_opt_job_name, recent_opt_job = opt_job_name, opt_job if recent_opt_job is not None: recent_opt_job.rename_output_file() @@ -2487,6 +2493,8 @@ def check_freq_job(self, if not freq_ok: self.output[label]['warnings'] += wrong_freq_message if job.job_status[1]['status'] != 'done' or (not freq_ok and not self.species_dict[label].is_ts): + # TODO: What if QChem finished without error and converged but we say it's not an okay freq - How do we expect troubleshooting to rework this? + # TODO: Example: r_9_[CH]=O - freq2768 from NN_ARC scans 1-10 self.troubleshoot_ess(label=label, job=job, level_of_theory=job.level) if (job.job_status[1]['status'] == 'done' and freq_ok and not switch_ts and species_has_sp(self.output[label], self.species_dict[label].yml_path)): @@ -3337,19 +3345,23 @@ def troubleshoot_opt_jobs(self, label): else: trsh_opt = True # job passed on the server, but failed in ESS calculation - if previous_job_num >= 0 and job.fine: - previous_job = self.job_dict[label]['opt']['opt_a' + str(previous_job_num)] - if not previous_job.fine and previous_job.job_status[0] == 'done' \ - and previous_job.job_status[1]['status'] == 'done': - # The present job with a fine grid failed in the ESS calculation. - # A *previous* job without a fine grid terminated successfully on the server and ESS. - # So use the xyz determined w/o the fine grid, and output an error message to alert users. - logger.error(f'Optimization job for {label} with a fine grid terminated successfully ' - f'on the server, but crashed during calculation. NOT running with fine ' - f'grid again.') - self.parse_opt_geo(label=label, job=previous_job) - trsh_opt = False - if trsh_opt: + if job.times_rerun > 0 and job.fine and job.job_status[1]['status'] == 'errored': + # We've already tried troubleshooting this job, so don't try again. + trsh_opt = False + if previous_job_num >= 0 and job.fine: + previous_job = self.job_dict[label]['opt']['opt_a' + str(previous_job_num)] + if not previous_job.fine and previous_job.job_status[0] == 'done' \ + and previous_job.job_status[1]['status'] == 'done': + # The present job with a fine grid failed in the ESS calculation. + # A *previous* job without a fine grid terminated successfully on the server and ESS. + # So use the xyz determined w/o the fine grid, and output an error message to alert users. + logger.error(f'Optimization job for {label} with a fine grid terminated successfully ' + f'on the server, but crashed during calculation. NOT running with fine ' + f'grid again.') + self.parse_opt_geo(label=label, job=previous_job) + trsh_opt = False + elif trsh_opt: + job.times_rerun += 1 self.troubleshoot_ess(label=label, job=job, level_of_theory=self.opt_level) diff --git a/arc/settings/settings.py b/arc/settings/settings.py index 5d5d86d5fe..e3d38539c3 100644 --- a/arc/settings/settings.py +++ b/arc/settings/settings.py @@ -54,6 +54,15 @@ 'un': '', 'key': 'path_to_rsa_key', }, + 'azure': { +'cluster_soft': 'Slurm', + 'address': 'X.X.X.X', + 'un': '[username]]', + 'key': '/home/[username]]/.ssh/ubuntu-image_key.pem', + 'cpus': 16, + 'memory': 60, + 'path': '/mount/nfsshareslurm/nfs/', + }, 'local': { 'cluster_soft': 'HTCondor', 'un': '', @@ -70,10 +79,10 @@ 'cfour': 'local', 'gaussian': ['local', 'server2'], 'gcn': 'local', - 'molpro': ['local', 'server2'], + 'molpro': ['local', 'server2', 'azure'], 'onedmin': 'server1', - 'orca': 'local', - 'qchem': 'server1', + 'orca': ['local','azure'], + 'qchem': ['local','azure'], 'terachem': 'server1', 'xtb': 'local', 'xtb_gsm': 'local', @@ -164,7 +173,7 @@ output_filenames = {'cfour': 'output.out', 'gaussian': 'input.log', 'gcn': 'output.yml', - 'molpro': 'input.out', + 'molpro': 'output.out', 'onedmin': 'output.out', 'orca': 'input.log', 'qchem': 'output.out', diff --git a/arc/settings/submit.py b/arc/settings/submit.py index 58bf1577a5..2ee5237916 100644 --- a/arc/settings/submit.py +++ b/arc/settings/submit.py @@ -132,7 +132,7 @@ #SBATCH -p long #SBATCH -J {name} #SBATCH -N 1 -#SBATCH -n {cpus} +#SBATCH --ntask-per-node={cpus} #SBATCH --time={t_max} #SBATCH --mem-per-cpu={memory} #SBATCH -o out.txt @@ -158,7 +158,7 @@ cp "$SubmitDir/input.in" . -molpro -n {cpus} -d $sdir input.in +molpro -n {cpus} -t {cpus} -d $sdir input.in cp input.* "$SubmitDir/" cp geometry*.* "$SubmitDir/" @@ -285,6 +285,33 @@ touch final_time +""", + 'qchem': """#!/bin/bash -l +#SBATCH -p long +#SBATCH -J {name} +#SBATCH -N 1 +#SBATCH -cpus-per-task={cpus} +#SBATCH --time={t_max} +#SBATCH --mem-per-cpu={memory} +#SBATCH -o out.txt +#SBATCH -e err.txt + + . /opt/qchem/qchem_env.sh + +echo "============================================================" +echo "Job ID : $SLURM_JOB_ID" +echo "Job Name : $SLURM_JOB_NAME" +echo "Starting on : $(date)" +echo "Running on node : $SLURMD_NODENAME" +echo "Current directory : $(pwd)" +echo "============================================================" + +touch initial_time + +qchem -nt {cpus} input.in output.out + +touch final_time + """, }, @@ -456,7 +483,7 @@ source /opt/qchem/qcenv.sh export QC=/opt/qchem -export QCSCRATCH=/scratch/{un}/{name} +export QCSCRATCH=$PWD export QCLOCALSCR=/scratch/{un}/{name}/qlscratch . $QC/qcenv.sh @@ -855,6 +882,148 @@ """, }, + 'azure': { + 'qchem': """#!/bin/bash -l +#SBATCH -p hpc,htc +#SBATCH -J {name} +#SBATCH --account={un} +#SBATCH -N 1 +#SBATCH --cpus-per-task={cpus} +#SBATCH --mem={memory} +#SBATCH -o out.txt +#SBATCH -e err.txt + +# Load QChem module +module load easybuild/EasyBuild +module load QChem-6.1 + +# Get Current Directory +export CWD="{pwd}" + +# Set up scratch directory +export SCRATCH=/mnt/{un}/scratch/$SLURM_JOB_ID +if [ -d $SCRATCH ]; then + sudo rm -vrf $SCRATCH +fi +sudo mkdir -p $SCRATCH + +# Change permissions +sudo chmod 777 $SCRATCH +export QCSCRATCH=$SCRATCH + +echo "============================================================" +echo "Job ID : $SLURM_JOB_ID" +echo "Job Name : $SLURM_JOB_NAME" +echo "Starting on : $(date)" +echo "Running on node : $SLURMD_NODENAME" +echo "Current directory : {pwd}" +echo "============================================================" + + +# Due to a bug in the Intel MKL library, we need to set the following environment variable +export MKL_DEBUG_CPU_TYPE=5 + +# Change permissions for {un} directory on the VM storage +export QC_RUN=/mnt/{un}/$SLURM_JOB_ID +if [ -d $QC_RUN ]; then + sudo rm -vrf $QC_RUN +else + sudo mkdir -p $QC_RUN +fi + +sudo chmod 777 $QC_RUN + + +# Now, copy the input file to the VM storage +cp input.in $QC_RUN + +# Create a file to measure the time of execution +touch initial_time + +# Change directory to the VM storage +cd $QC_RUN + +# Run QChem +qchem -slurm -nt {cpus} input.in output.out + +# Remove the scratch directory +sudo rm -vrf $SCRATCH + +# Copy all the files back to the current directory +cp -vfr $QC_RUN/* "{pwd}" + +# Change directory back to the current directory +cd "{pwd}" + +# Create a file to measure the time of execution +touch final_time + +""", +'molpro': """#!/bin/bash -l +#SBATCH -p hpc,htc +#SBATCH -J {name} +#SBATCH --account={un} +#SBATCH -N 1 +#SBATCH --ntasks-per-node={cpus} +#SBATCH --mem={memory} +#SBATCH -o out.txt +#SBATCH -e err.txt + +# Load Molpro module +module load easybuild/EasyBuild +module load Molpro-2022.3.1 + +# Set up scratch directory +export SCRATCH=/mnt/{un}/scratch/molpro/{name} +if [ -d $SCRATCH ]; then + sudo rm -rf $SCRATCH +fi +sudo mkdir -p $SCRATCH +sudo chmod 777 $SCRATCH +export MOLPRO_TMPDIR=$SCRATCH + +echo "============================================================" +echo "Job ID : $SLURM_JOB_ID" +echo "Job Name : $SLURM_JOB_NAME" +echo "Starting on : $(date)" +echo "Running on node : $SLURMD_NODENAME" +echo "Current directory : $(pwd)" +echo "============================================================" + +# Create a file to measure the time of execution +touch initial_time + +# Get Current Directory +export CWD={pwd} + +# Create a directory on the VM storage +export MOLPRO_RUN=/mnt/{un}/molpro/$SLURM_JOB_ID +sudo mkdir -p $MOLPRO_RUN +sudo chmod 777 $MOLPRO_RUN + +# Now, copy the input file to the VM storage +cp input.in $MOLPRO_RUN + +# Change directory to the VM storage +cd $MOLPRO_RUN + +# Run Molpro +molpro -n{cpus} -d $MOLPRO_TMPDIR input.in -o output.out + +sudo rm -rf $MOLPRO_TMPDIR + +# Copy all the files back to the current directory +cp -vfr $MOLPRO_RUN/* {pwd} + +# Change directory back to the current directory +cd {pwd} + +# Create a file to measure the time of execution +touch final_time + +""" + }, +} 'server3': { 'gaussian': """#!/bin/bash -l #SBATCH -p normal diff --git a/arc/settings/submit_test.py b/arc/settings/submit_test.py index 08ca08ed01..ac2700b0bc 100644 --- a/arc/settings/submit_test.py +++ b/arc/settings/submit_test.py @@ -18,7 +18,7 @@ class TestSubmit(unittest.TestCase): def test_servers(self): """Test server keys in submit_scripts""" for server in submit_scripts.keys(): - self.assertTrue(server in ['local', 'atlas', 'txe1', 'pbs_sample', 'server1', 'server2', 'server3']) + self.assertTrue(server in ['local', 'atlas', 'txe1', 'pbs_sample', 'server1', 'server2', 'azure', 'server3']) if __name__ == '__main__': diff --git a/arc/species/converter.py b/arc/species/converter.py index 9b943f2a2a..7398f1f671 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -49,9 +49,7 @@ logger = get_logger() -def str_to_xyz(xyz_str: str, - project_directory: Optional[str] = None, - ) -> dict: +def str_to_xyz(xyz_str: str, project_directory: Optional[str] = None) -> dict: """ Convert a string xyz format to the ARC dict xyz style. Note: The ``xyz_str`` argument could also direct to a file path to parse the data from. @@ -94,6 +92,10 @@ def str_to_xyz(xyz_str: str, if os.path.isfile(xyz_str): from arc.parser import parse_xyz_from_file return parse_xyz_from_file(xyz_str) + elif project_directory is not None and os.path.isfile(os.path.join(project_directory, xyz_str)): + from arc.parser import parse_xyz_from_file + xyz_str = os.path.join(project_directory, xyz_str) + return parse_xyz_from_file(xyz_str) xyz_str = xyz_str.replace(',', ' ') if len(xyz_str.splitlines()) and len(xyz_str.splitlines()[0]) == 1: # this is a zmat @@ -675,9 +677,7 @@ def standardize_xyz_string(xyz_str, isotope_format=None): return xyz_to_str(xyz_dict=xyz_dict, isotope_format=isotope_format) -def check_xyz_dict(xyz: Union[dict, str], - project_directory: Optional[str] = None, - ) -> Optional[dict]: +def check_xyz_dict(xyz: Union[dict, str], project_directory: Optional[str] = None) -> Optional[dict]: """ Check that the xyz dictionary entered is valid. If it is a string, convert it. @@ -687,7 +687,7 @@ def check_xyz_dict(xyz: Union[dict, str], Args: xyz (Union[dict, str]): The xyz dictionary. - project_directory (str, optional): The path to the project directory. + project_directory (str, optional): The project directory path. Raises: ConverterError: If ``xyz`` is of wrong type or is missing symbols or coords. @@ -2092,6 +2092,60 @@ def ics_to_scan_constraints(ics: list, elif len(ic) == 4: scan_trsh += 'D ' scan_trsh += ''.join([str(num) + ' ' for num in ic]) + 'F\n' + + + elif software == 'qchem': + # scan_trsh += 'CONSTRAINT\n' + # interatomic distances + # Values in Ångstroms; value >0 + + # : + # stre atom1 atom2 value + + # angles + # Values in degrees, 0≤value≤180 + + # ; atom2 is the middle atom of the bend: + # bend atom1 atom2 atom3 value + + # out-of-plane-bends + # Values in degrees, −180≤value≤180 + + # atom2; angle between atom4 and the atom1–atom2–atom3 plane: + # outp atom1 atom2 atom3 atom4 value + + # dihedral angles + # Values in degrees, −180≤value≤180 + # ; angle the plane atom1–atom2–atom3 makes with the plane atom2–atom3–atom4: + # tors atom1 atom2 atom3 atom4 value + + # CONSTRAINT + # stre atom1 atom2 value + # ... + # bend atom1 atom2 atom3 value + # ... + # outp atom1 atom2 atom3 atom4 value + # ... + # tors atom1 atom2 atom3 atom4 value + # ... + # linc atom1 atom2 atom3 atom4 value + # ... + # linp atom1 atom2 atom3 atom4 value + # ... + # ENDCONSTRAINT + for ic in ics: + # First line CONSTRAINT + if len(ic) == 2: + scan_trsh += 'stre ' + elif len(ic) == 3: + scan_trsh += 'bend ' + elif len(ic) == 4: + scan_trsh += 'tors ' + #CONSTRAINT + #scan_trsh + #ENDCONSTRAINT + scan_trsh += ''.join([str(num) + ' ' for num in ic]) + '\n' #+ 'ENDCONSTRAINT\n' + else: raise NotImplementedError(f'Given software {software} is not implemented ' f'for ics_to_scan_constraints().') diff --git a/arc/species/species.py b/arc/species/species.py index 2ae0968a10..ba56662c2d 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -2079,7 +2079,7 @@ def __init__(self, self.execution_time = execution_time if execution_time is not None else execution_time self._opt_xyz = None self._initial_xyz = None - self.process_xyz(xyz, project_directory=project_directory) # populates self.initial_xyz + self.process_xyz(xyz, project_directory) # populates self.initial_xyz self.success = success self.energy = energy self.cluster = cluster @@ -2233,10 +2233,7 @@ def from_dict(self, ts_dict: dict): except AtomTypeError: pass - def process_xyz(self, - xyz: Union[dict, str], - project_directory: Optional[str] = None, - ): + def process_xyz(self, xyz: Union[dict, str], project_directory: Optional[str] = None): """ Process the user's input. If ``xyz`` represents a file path, parse it. @@ -2250,7 +2247,7 @@ def process_xyz(self, if xyz is not None: if not isinstance(xyz, (dict, str)): raise InputError(f'xyz must be either a dictionary or string, got:\n{xyz}\nwhich is a {type(xyz)}') - self.initial_xyz = check_xyz_dict(xyz, project_directory=project_directory) + self.initial_xyz = check_xyz_dict(xyz, project_directory) def get_xyz(self, return_format: str = 'dict', diff --git a/arc/species/vectors.py b/arc/species/vectors.py index 99fe05e5b7..ad85fd66f1 100644 --- a/arc/species/vectors.py +++ b/arc/species/vectors.py @@ -5,6 +5,7 @@ import math import numpy as np from typing import List, Union +import re from rmgpy.molecule.molecule import Molecule @@ -205,6 +206,12 @@ def calculate_dihedral_angle(coords: Union[list, tuple, dict], """ if isinstance(coords, dict) and 'coords' in coords: coords = coords['coords'] + if isinstance(coords,str): + try: + lines = coords.split('\n') + coords = tuple(tuple(float(x) for x in re.findall(r'[+-]?\d+\.\d+', line)) for line in lines if re.search(r'[A-Za-z]', line)) + except Exception as e: + raise TypeError(f'Could not read coords from string\n{coords}\nGot error:\n{e}') if not isinstance(coords, (list, tuple)): raise TypeError(f'coords must be a list or a tuple, got\n{coords}\nwhich is a {type(coords)}') if index not in [0, 1]: diff --git a/arc/species/xyz_to_smiles.py b/arc/species/xyz_to_smiles.py index b5334910c5..5b6a3a2304 100644 --- a/arc/species/xyz_to_smiles.py +++ b/arc/species/xyz_to_smiles.py @@ -497,7 +497,7 @@ def ac2bo(atom_connectivity: np.ndarray, # The valence cannot be smaller than the number of neighbours. possible_valence = [x for x in atomic_valence[atomic_num] if x >= valence] if not possible_valence: - logger.warning(f'Valence of atom {i} is {valence}, which bigger than the allowed max ' + logger.warning(f'Valence of atom {atoms[i]} with index {i} is {valence}, which is bigger than the allowed maximum ' f'{max(atomic_valence[atomic_num])}. Stopping') return None, None valences_list_of_lists.append(possible_valence) diff --git a/data/basis_sets.csv b/data/basis_sets.csv new file mode 100644 index 0000000000..a17955775c --- /dev/null +++ b/data/basis_sets.csv @@ -0,0 +1,152 @@ +software,basis_set,atoms_supported, +qchem,RIJ-def2-SVP,"H, He, Li → Ne, Na → Ar, K → Br, Rb → Xe, Cs → La, Hf → Rn", +qchem,RIJ-def2-SVP,"H, He, Li → Ne, Na → Ar, K → Br, Rb → Xe, Cs → La, Hf → Rn", +qchem,RIJ-def2-TZVP,"H, He, Li → Ne, Na → Ar", +qchem,RIJ-def2-TZVPP,"H, He, Li → Ne, Na → Ar", +qchem,RIJ-def2-TZVPPd,"H, He, Li → Ne, Na → Ar", +qchem,RIJ-def2-QZVP,"H, He, Li → Ne, Na → Ar, K → Br, Rb → Xe, Cs → La, Hf → Rn", +qchem,RIJ-def2-QZVPP,"H, He, Li → Ne, Na → Ar, K → Br, Rb → Xe, Cs → La, Hf → Rn", +qchem,RIJ-def2-QZVPPd,"H, He, Li → Ne, Na → Ar, K → Br, Rb → Xe, Cs → La, Hf → Rn", +qchem,RIJ-def2-cc-pVdZ,"H, He, Li → Ne, Na → Ar", +qchem,RIJ-def2-cc-pVTZ,"H, He, Li → Ne, Na → Ar, Ga → Kr", +qchem,RIJ-def2-cc-pVQZ,"H, He, Li → Ne, Na → Ar, Ga → Kr", +qchem,RIJ-def2-aug-cc-pVdZ,"H, He, Li → Ne, Na → Ar", +qchem,RIJ-def2-aug-cc-pVTZ,"H, He, Li → Ne, Na → Ar, Ga → Kr", +qchem,RIJ-def2-aug-cc-pVQZ,"H, He, Li → Ne, Na → Ar, Ga → Kr", +qchem,RIJK-def2-SVP,"H, He, Li → Ne, Na → Ar, K → Br, Rb → Xe, Cs → La, Hf → Rn", +qchem,RIJK-def2-SVP,"H, He, Li → Ne, Na → Ar, K → Br, Rb → Xe, Cs → La, Hf → Rn", +qchem,RIJK-def2-TZVP,"H, He, Li → Ne, Na → Ar", +qchem,RIJK-def2-TZVPP,"H, He, Li → Ne, Na → Ar", +qchem,RIJK-def2-TZVPPd,"H, He, Li → Ne, Na → Ar", +qchem,RIJK-def2-QZVP,"H, He, Li → Ne, Na → Ar, K → Br, Rb → Xe, Cs → La, Hf → Rn", +qchem,RIJK-def2-QZVPP,"H, He, Li → Ne, Na → Ar, K → Br, Rb → Xe, Cs → La, Hf → Rn", +qchem,RIJK-def2-QZVPPd,"H, He, Li -> Ne, Na -> Ar, K -> Br, Rb -> Xe, Cs -> La, Hf -> Rn", +qchem,RIJK-def2-cc-pVdZ,"H, He, Li -> Ne, Na -> Ar", +qchem,RIJK-def2-cc-pVTZ,"H, He, Li -> Ne, Na -> Ar, Ga -> Kr", +qchem,RIJK-def2-cc-pVQZ,"H, He, Li -> Ne, Na -> Ar, Ga -> Kr", +qchem,RIJK-def2-aug-cc-pVdZ,"H, He, Li -> Ne, Na -> Ar", +qchem,RIJK-def2-aug-cc-pVTZ,"H, He, Li -> Ne, Na -> Ar, Ga -> Kr", +qchem,RIJK-def2-aug-cc-pVQZ,"H, He, Li -> Ne, Na -> Ar, Ga -> Kr", +qchem,RIMP2-def2-SVP,"H, He, Li -> Ne, Na -> Ar, K -> Br, Rb -> Xe, Cs -> La, Hf -> Rn", +qchem,RIMP2-def2-SVP,"H, He, Li -> Ne, Na -> Ar, K -> Br, Rb -> Xe, Cs -> La, Hf -> Rn", +qchem,RIMP2-def2-TZVP,"H, He, Li -> Ne, Na -> Ar, Ga -> Kr", +qchem,RIMP2-def2-TZVPP,"H, He, Li -> Ne, Na -> Ar, Ga -> Kr", +qchem,RIMP2-def2-TZVPPd,"H, He, Li -> Ne, Na -> Ar, Ga -> Kr", +qchem,RIMP2-def2-QZVP,"H, He, Li -> Ne, Na -> Ar, K -> Br, Rb -> Xe, Cs -> La, Hf -> Rn", +qchem,RIMP2-def2-QZVPP,"H, He, Li -> Ne, Na -> Ar, K -> Br, Rb -> Xe, Cs -> La, Hf -> Rn", +qchem,RIMP2-def2-QZVPPd,"H, He, Li -> Ne, Na -> Ar, K -> Br, Rb -> Xe, Cs -> La, Hf -> Rn", +qchem,RIMP2-def2-cc-pVdZ,"H, He, Li -> Ne, Na -> Ar, K -> Br", +qchem,RIMP2-def2-cc-pVTZ,"H, He, Li -> Ne, Na -> Ar, Ga -> Kr", +qchem,RIMP2-def2-cc-pVQZ,"H, He, Li -> Ne, Na -> Ar, Ga -> Kr", +qchem,RIMP2-def2-aug-cc-pVdZ,"H, He, Li -> Ne, Na -> Ar, K -> Br", +qchem,RIMP2-def2-aug-cc-pVTZ,"H, He, Li → Ne, Na → Ar, Ga → Kr", +qchem,RIMP2-def2-aug-cc-pVQZ,"H, He, Li → Ne, Na → Ar, Ga → Kr", +qchem,def2-mSVP,"H–Kr,Rb–Rn (with def2-ECP)", +qchem,def2-SV(P),H–Kr, Rb–Rn (with def2-ECP) +qchem,def2-SVP,H–Kr, Rb–Rn (with def2-ECP) +qchem,def2-ma-SVP,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-ha-SVP,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-SVPD,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-TZVP,H–Kr, Rb–Rn (with def2-ECP) +qchem,def2-TZVPP,H–Kr, Rb–Rn (with def2-ECP) +qchem,def2-ma-TZVP,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-ma-TZVPP,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-ha-TZVP,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-ha-TZVPP,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-TZVPD,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-TZVPPD,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-QZVP,H–Kr, Rb–Rn (with def2-ECP) +qchem,def2-QZVPP,H–Kr, Rb–Rn (with def2-ECP) +qchem,def2-ma-QZVP,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-ma-QZVPP,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-ha-QZVP,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-ha-QZVPP,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-QZVPD,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,def2-QZVPPD,"H–Kr; Rb–La, Hf–Rn (with def2-ECP)", +qchem,STO-2G,"H, He, Li→Ne, Na→Ar, K, Ca, Sr", +qchem,STO-3G,"H, He, Li→Ne, Na→Ar, K→Kr, Rb→I", +qchem,STO-6G,"H, He, Li→Ne, Na→Ar, K→Kr", +qchem,3-21G,"H, He, Li→Ne, Na→Ar, K→Kr, Rb→Xe, Cs", +qchem,4-31G,"H, He, Li→Ne, P→Cl", +qchem,6-31G,"H, He, Li→Ne, Na→Ar, K→Kr", +qchem,6-311G,"H, He, Li→Ne, Na→Ar, Ga→I", +qchem,G3LARGE,"H, He, Li→Ne, Na→Ar, K→Kr", +qchem,G3MP2LARGE,"H, He, Li→Ne, Na→Ar, Ga→Kr", +qchem,3-21G,"H, He, Li → Ne, Na → Ar, K →Kr, Rb → Xe, Cs", +qchem,3-21+G,"H, He, Na → Cl, Na → Ar, K, Ca, Ga → Kr", +qchem,3-21G*,Na → Ar, +qchem,6-31G,"H, He, Li → Ne, Na → Ar, K → Zn, Ga → Kr", +qchem,6-31+G,"H, He, Li → Ne, Na → Ar, Ga → Kr", +qchem,6-31G*,"H, He, Li → Ne, Na → Ar, K → Zn, Ga → Kr", +qchem,"6-31G(d,p)","H, He, Li → Ne, Na → Ar, K → Zn, Ga → Kr", +qchem,"6-31G(.,+)G","H, He, Li → Ne, Na → Ar, Ga → Kr", +qchem,6-31+G*,"H, He, Li → Ne, Na → Ar, Ga → Kr", +qchem,6-311G,"H, He, Li → Ne, Na → Ar, Ga → I", +qchem,6-311+G,"H, He, Li → Ne, Na → Ar", +qchem,6-311G*,"H, He, Li → Ne, Na → Ar, Ga → I", +qchem,"6-311G(d,p)","H, He, Li → Ne, Na → Ar, Ga → I", +qchem,G3LARGE,"H, He, Li → Ne, Na → Ar, K → Kr", +qchem,G3MP2LARGE,"H, He, Li → Ne, Na → Ar, Ga → Kr", +qchem,cc-pVDZ,"H ->; Ar, Ca, Ga ->; Kr", +qchem,cc-pVDZ-full,"H ->; Ar, Ca ->; Kr", +qchem,cc-pVDZ-PP,Cu ->, Rn +qchem,cc-pVTZ,"H ->; Ar, Ca, Ga ->; Kr", +qchem,cc-pVTZ-full,"H ->; Ar, Ca ->; Kr", +qchem,cc-pVTZ-PP,Cu ->, Rn +qchem,cc-pVQZ,"H ->; Ar, Ca, Ga ->; Kr", +qchem,cc-pVQZ-full,"H ->; Ar, Ca ->; Kr", +qchem,cc-pVQZ-PP,Cu ->, Rn +qchem,cc-pV5Z,"H ->; Ar, Ca ->; Kr", +qchem,cc-pV6Z,"H ->; Ar except Li, Na, Mg", +qchem,cc-pCVDZ,"H ->; Ar, Ca (H and He use cc-pVDZ)", +qchem,cc-pCVTZ,"H ->; Ar, Ca (H and He use cc-pVTZ)", +qchem,cc-pCVQZ,"H ->; Ar, Ca (H and He use cc-pVQZ)", +qchem,cc-pCV5Z,"H, He, B ->; Ar, Ca (H and He use cc-pV5Z)", +qchem,cc-pwCVDZ,"B ->; Ne, Al ->; Ar", +qchem,cc-pwCVTZ,"B ->; Ne, Al -> Ar, Sc ->; Zn", +qchem,cc-pwCVQZ,"B ->; Ne, Al -> Ar, Sc ->; Zn, Br", +qchem,cc-pwCVDZ-PP,Cu ->, Rn +qchem,cc-pwCVTZ-PP,Cu ->, Rn +qchem,cc-pwCVQZ-PP,Cu ->, Rn +qchem,aug-cc-pVDZ,H → Kr, +qchem,aug-cc-pVDZ-PP,Cu → Rn, +qchem,aug-cc-pVTZ,H → Kr, +qchem,aug-cc-pVTZ-PP,Cu → Rn, +qchem,aug-cc-pVQZ,H → Kr, +qchem,aug-cc-pVQZ-PP,Cu → Rn, +qchem,aug-cc-pV5Z,"H → Ar, Sc → Kr", +qchem,aug-cc-pV6Z,"H → Ar except Li, Be, Na, Mg", +qchem,aug-cc-pCVDZ,H → Ar (H and He use aug-cc-pVDZ), +qchem,aug-cc-pCVTZ,H → Ar (H and He use aug-cc-pVTZ), +qchem,aug-cc-pCVQZ,H → Ar (H and He use aug-cc-pVQZ), +qchem,aug-cc-pCV5Z,"H, He, B → Ar (H and He use aug-cc-pV5Z)", +qchem,aug-cc-pwCVDZ,"B → Ne, Al → Ar", +qchem,aug-cc-pwCVTZ,"B → Ne, Al → Ar, Sc → Zn", +qchem,aug-cc-pwCVQZ,"B → Ne, Al → Ar, Sc → Zn, Br", +qchem,aug-cc-pwCVDZ-PP,Cu → Rn, +qchem,aug-cc-pwCVTZ-PP,Cu → Rn, +qchem,aug-cc-pwCVQZ-PP,Cu → Rn, +qchem,may-cc-p(C)VXZ,Atoms Supported, +qchem,jun-cc-p(C)VXZ,Atoms Supported, +qchem,jul-cc-p(C)VXZ,Atoms Supported, +qchem,jun-cc-pVXZ-PP,Atoms Supported, +qchem,jun-cc-p(w)VXZ,Atoms Supported, +qchem,jun-cc-p(w)VXZ-PP,Atoms Supported, +qchem,TZV,H -> Kr, +qchem,VDZ,H -> Kr, +qchem,VTZ,H -> Kr, +qchem,pcseg-n,H → Kr, +qchem,pc-n,H → Kr, +qchem,pcJ-n,H → Ar, +qchem,psS-n,H → Ar, +qchem,aug-pcseg-n,H → Kr, +qchem,aug-pc-n,H → Kr, +qchem,aug-pcJ-n,Ar, +qchem,aug-psS-n,Ar, +qchem,"6-311++G(3df,3pd)",, +qchem,"6-311G(3df,3pd)",, +qchem,"6-311+G(3df,3pd)",, +qchem,6-311G*,, +qchem,6-311+G,, +qchem,"6-311G(d,p)",, +qchem,"6-311+G(d,p)",, diff --git a/environment.yml b/environment.yml index 269423b638..c0beecd246 100644 --- a/environment.yml +++ b/environment.yml @@ -59,3 +59,4 @@ dependencies: - pytables - anaconda::pytest - conda-forge::pytest-cov + - conda-forge::rapidfuzz