diff --git a/.conda/meta.yaml b/.conda/meta.yaml index b1df7678e6..7cd02969fe 100644 --- a/.conda/meta.yaml +++ b/.conda/meta.yaml @@ -30,7 +30,6 @@ requirements: - cairocffi - cantera >=2.3.0 - cclib >=1.6.3 - - chemprop - coolprop - coverage - cython >=0.25.2 diff --git a/arkane/common.py b/arkane/common.py index 5227298895..a706417b88 100644 --- a/arkane/common.py +++ b/arkane/common.py @@ -191,11 +191,11 @@ def update_species_attributes(self, species=None): self.multiplicity = species.molecule[0].multiplicity self.formula = species.molecule[0].get_formula() try: - inchi = to_inchi(species.molecule[0], backend='try-all', aug_level=0) + inchi = to_inchi(species.molecule[0], backend='openbabel-first', aug_level=0) except ValueError: inchi = '' try: - inchi_key = to_inchi_key(species.molecule[0], backend='try-all', aug_level=0) + inchi_key = to_inchi_key(species.molecule[0], backend='openbabel-first', aug_level=0) except ValueError: inchi_key = '' self.inchi = inchi diff --git a/environment.yml b/environment.yml index 4596f580ee..97b22265fe 100644 --- a/environment.yml +++ b/environment.yml @@ -14,6 +14,8 @@ # Changelog: # - May 15, 2023 Added this changelog, added inline documentation, # made dependency list more explicit (@JacksonBurns). +# - October 16, 2023 Switched RDKit and descripatastorus to conda-forge, +# moved diffeqpy to pip and (temporarily) removed chemprop # name: rmg_env channels: @@ -44,6 +46,7 @@ dependencies: - conda-forge::mopac - conda-forge::cclib >=1.6.3,!=1.8.0 - conda-forge::openbabel >= 3 + - conda-forge::rdkit >=2022.09.1 # general-purpose external software tools - conda-forge::julia=1.9.1 @@ -54,7 +57,7 @@ dependencies: - coverage - cython >=0.25.2 - scikit-learn - - scipy + - scipy <1.11 - numpy >=1.10.0 - pydot - jinja2 @@ -94,14 +97,8 @@ dependencies: # rather than ours (which is only made so that we can get it from conda) # It is only on pip, so we will need to do something like: # https://stackoverflow.com/a/35245610 - - - rmg::chemprop - # Our build of this is version 0.0.1 (!!) and we are using parts - # of the API that are now gone. Need a serious PR to fix this. - - - rmg::rdkit >=2020.03.3.0 - # We should use the official channel, not sure how difficult this - # change will be. + # Note that _some other_ dep. in this list requires diffeqpy in its recipe + # which will cause it to be downloaded from the rmg conda channel # conda mutex metapackage - nomkl diff --git a/rmgpy/ml/estimator.py b/rmgpy/ml/estimator.py index 84cafb651c..81e1d9e701 100644 --- a/rmgpy/ml/estimator.py +++ b/rmgpy/ml/estimator.py @@ -32,10 +32,10 @@ from argparse import Namespace from typing import Callable, Union +chemprop = None try: import chemprop except ImportError as e: - chemprop = None chemprop_exception = e import numpy as np @@ -43,6 +43,16 @@ from rmgpy.species import Species from rmgpy.thermo import ThermoData +ADMONITION = """ +Support for predicting thermochemistry using chemprop has been temporarily removed +from RMG, pending official chemprop support for Python 3.11 and newer. + +To use chemprop and RMG, install a previous version of RMG (3.1.1 or earlier). + +See the link below for status of re-integration of chemprop: +https://github.com/ReactionMechanismGenerator/RMG-Py/issues/2559 +""" + class MLEstimator: """ @@ -118,7 +128,7 @@ def load_estimator(model_dir: str) -> Callable[[str], np.ndarray]: if chemprop is None: # Delay chemprop ImportError until we actually try to use it # so that RMG can load successfully without chemprop. - raise chemprop_exception + raise RuntimeError(ADMONITION + "\nOriginal Exception:\n" + str(chemprop_exception)) args = Namespace() # Simple class to hold attributes diff --git a/rmgpy/molecule/molecule.pxd b/rmgpy/molecule/molecule.pxd index 4590bbed6b..f227e5d533 100644 --- a/rmgpy/molecule/molecule.pxd +++ b/rmgpy/molecule/molecule.pxd @@ -224,14 +224,14 @@ cdef class Molecule(Graph): bint raise_charge_exception=?, bint check_consistency=?) cpdef from_xyz(self, np.ndarray atomic_nums, np.ndarray coordinates, float critical_distance_factor=?, bint raise_atomtype_exception=?) - - cpdef str to_inchi(self) - cpdef str to_augmented_inchi(self) + cpdef str to_inchi(self, str backend=?) + + cpdef str to_augmented_inchi(self, str backend=?) - cpdef str to_inchi_key(self) + cpdef str to_inchi_key(self, str backend=?) - cpdef str to_augmented_inchi_key(self) + cpdef str to_augmented_inchi_key(self, str backend=?) cpdef str to_smiles(self) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index e93f0092eb..21e5007c4d 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -1768,9 +1768,13 @@ def _repr_png_(self): os.unlink(temp_file_name) return png - def from_inchi(self, inchistr, backend='try-all', raise_atomtype_exception=True): + def from_inchi(self, inchistr, backend='openbabel-first', raise_atomtype_exception=True): """ Convert an InChI string `inchistr` to a molecular structure. + + RDKit and Open Babel are the two backends used in RMG. It is possible to use a + single backend or try different backends in sequence. The available options for the ``backend`` + argument: 'openbabel-first'(default), 'rdkit-first', 'rdkit', or 'openbabel'. """ translator.from_inchi(self, inchistr, backend, raise_atomtype_exception=raise_atomtype_exception) return self @@ -1782,9 +1786,13 @@ def from_augmented_inchi(self, aug_inchi, raise_atomtype_exception=True): translator.from_augmented_inchi(self, aug_inchi, raise_atomtype_exception=raise_atomtype_exception) return self - def from_smiles(self, smilesstr, backend='try-all', raise_atomtype_exception=True): + def from_smiles(self, smilesstr, backend='openbabel-first', raise_atomtype_exception=True): """ Convert a SMILES string `smilesstr` to a molecular structure. + + RDKit and Open Babel are the two backends used in RMG. It is possible to use a + single backend or try different backends in sequence. The available options for the ``backend`` + argument: 'openbabel-first'(default), 'rdkit-first', 'rdkit', or 'openbabel'. """ translator.from_smiles(self, smilesstr, backend, raise_atomtype_exception=raise_atomtype_exception) return self @@ -1863,62 +1871,78 @@ def to_single_bonds(self, raise_atomtype_exception=True): new_mol.update_atomtypes(raise_exception=raise_atomtype_exception) return new_mol - def to_inchi(self): + def to_inchi(self, backend='rdkit-first'): """ Convert a molecular structure to an InChI string. Uses `RDKit `_ to perform the conversion. Perceives aromaticity. - + or - + Convert a molecular structure to an InChI string. Uses `OpenBabel `_ to perform the conversion. + + It is possible to use a single backend or try different backends in sequence. + The available options for the ``backend`` argument: 'rdkit-first'(default), + 'openbabel-first', 'rdkit', or 'openbabel'. """ try: - return translator.to_inchi(self) + return translator.to_inchi(self, backend=backend) except: logging.exception(f"Error for molecule \n{self.to_adjacency_list()}") raise - def to_augmented_inchi(self): + def to_augmented_inchi(self, backend='rdkit-first'): """ Adds an extra layer to the InChI denoting the multiplicity of the molecule. - + Separate layer with a forward slash character. + + RDKit and Open Babel are the two backends used in RMG. It is possible to use a + single backend or try different backends in sequence. The available options for the ``backend`` + argument: 'rdkit-first'(default), 'openbabel-first', 'rdkit', or 'openbabel'. """ try: - return translator.to_inchi(self, aug_level=2) + return translator.to_inchi(self, backend=backend, aug_level=2) except: logging.exception(f"Error for molecule \n{self.to_adjacency_list()}") raise - def to_inchi_key(self): + def to_inchi_key(self, backend='rdkit-first'): """ Convert a molecular structure to an InChI Key string. Uses `OpenBabel `_ to perform the conversion. - - or - + + or + Convert a molecular structure to an InChI Key string. Uses `RDKit `_ to perform the conversion. + + It is possible to use a single backend or try different backends in sequence. + The available options for the ``backend`` argument: 'rdkit-first'(default), + 'openbabel-first', 'rdkit', or 'openbabel'. """ try: - return translator.to_inchi_key(self) + return translator.to_inchi_key(self, backend=backend) except: logging.exception(f"Error for molecule \n{self.to_adjacency_list()}") raise - def to_augmented_inchi_key(self): + def to_augmented_inchi_key(self, backend='rdkit-first'): """ Adds an extra layer to the InChIKey denoting the multiplicity of the molecule. Simply append the multiplicity string, do not separate by a character like forward slash. + + RDKit and Open Babel are the two backends used in RMG. It is possible to use a + single backend or try different backends in sequence. The available options for the ``backend`` + argument: 'rdkit-first'(default), 'openbabel-first', 'rdkit', or 'openbabel'. """ try: - return translator.to_inchi_key(self, aug_level=2) + return translator.to_inchi_key(self, backend=backend, aug_level=2) except: logging.exception(f"Error for molecule \n{self.to_adjacency_list()}") raise diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 1fd0cd2220..731d8d8d8e 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -169,7 +169,7 @@ def to_inchi(mol, backend='rdkit-first', aug_level=0): Uses RDKit or OpenBabel for conversion. Args: - backend choice of backend, 'try-all', 'rdkit', or 'openbabel' + backend choice of backend, 'rdkit-first' (default), 'openbabel-first', 'rdkit', or 'openbabel' aug_level level of augmentation, 0, 1, or 2 """ cython.declare(inchi=str, ulayer=str, player=str, mlayer=str) @@ -205,7 +205,7 @@ def to_inchi_key(mol, backend='rdkit-first', aug_level=0): Uses RDKit or OpenBabel for conversion. Args: - backend choice of backend, 'try-all', 'rdkit', or 'openbabel' + backend choice of backend, 'rdkit-first' (default), 'openbabel-first', 'rdkit', or 'openbabel' aug_level level of augmentation, 0, 1, or 2 """ cython.declare(key=str, ulayer=str, player=str, mlayer=str) @@ -274,11 +274,11 @@ def to_smiles(mol, backend='default'): return output -def from_inchi(mol, inchistr, backend='try-all', raise_atomtype_exception=True): +def from_inchi(mol, inchistr, backend='openbabel-first', raise_atomtype_exception=True): """ Convert an InChI string `inchistr` to a molecular structure. Uses - a user-specified backend for conversion, currently supporting - rdkit (default) and openbabel. + a user-specified backend for conversion, currently supporting 'openbabel-first' (default), rdkit-first, + rdkit, and openbabel. """ if inchiutil.INCHI_PREFIX in inchistr: return _read(mol, inchistr, 'inchi', backend, raise_atomtype_exception=raise_atomtype_exception) @@ -325,11 +325,11 @@ def from_smarts(mol, smartsstr, backend='rdkit', raise_atomtype_exception=True): return _read(mol, smartsstr, 'sma', backend, raise_atomtype_exception=raise_atomtype_exception) -def from_smiles(mol, smilesstr, backend='try-all', raise_atomtype_exception=True): +def from_smiles(mol, smilesstr, backend='openbabel-first', raise_atomtype_exception=True): """ Convert a SMILES string `smilesstr` to a molecular structure. Uses - a user-specified backend for conversion, currently supporting - rdkit (default) and openbabel. + a user-specified backend for conversion, currently supporting openbabel-first (default), rdkit-first, + rdkit and openbabel. """ return _read(mol, smilesstr, 'smi', backend, raise_atomtype_exception=raise_atomtype_exception) @@ -569,9 +569,9 @@ def _get_backend_list(backend): """ if not isinstance(backend, str): raise ValueError("The backend argument should be a string. " - "Accepted values are 'try-all', 'rdkit-first', 'rdkit', and 'openbabel'") + "Accepted values are 'openbabel-first', 'rdkit-first', 'rdkit', and 'openbabel'") backend = backend.strip().lower() - if backend == 'try-all': + if backend == 'openbabel-first': return BACKENDS elif backend == 'rdkit-first': return reversed(BACKENDS) @@ -579,4 +579,4 @@ def _get_backend_list(backend): return [backend] else: raise ValueError("Unrecognized value for backend argument. " - "Accepted values are 'try-all', 'rdkit-first', 'rdkit', and 'openbabel'") + "Accepted values are 'openbabel-first', 'rdkit-first', 'rdkit', and 'openbabel'") diff --git a/rmgpy/qm/molecule.py b/rmgpy/qm/molecule.py index c3a9a9fae4..81d771781d 100644 --- a/rmgpy/qm/molecule.py +++ b/rmgpy/qm/molecule.py @@ -520,11 +520,11 @@ def load_thermo_data(self): self.qm_data = local_context["qmData"] return thermo - def get_augmented_inchi_key(self): + def get_augmented_inchi_key(self, backend='rdkit-first'): """ Returns the augmented InChI from self.molecule """ - return self.molecule.to_augmented_inchi_key() + return self.molecule.to_augmented_inchi_key(backend=backend) def get_mol_file_path_for_calculation(self, attempt): """ diff --git a/rmgpy/species.py b/rmgpy/species.py index de0c3a2d9b..e0ae40a2f0 100644 --- a/rmgpy/species.py +++ b/rmgpy/species.py @@ -740,17 +740,17 @@ def copy(self, deep=False): return other - def get_augmented_inchi(self): + def get_augmented_inchi(self, backend='rdkit-first'): if self.aug_inchi is None: - self.aug_inchi = self.generate_aug_inchi() + self.aug_inchi = self.generate_aug_inchi(backend=backend) return self.aug_inchi - def generate_aug_inchi(self): + def generate_aug_inchi(self, backend='rdkit-first'): candidates = [] self.generate_resonance_structures() for mol in self.molecule: try: - cand = [mol.to_augmented_inchi(), mol] + cand = [mol.to_augmented_inchi(backend=backend), mol] except ValueError: pass # not all resonance structures can be parsed into InChI (e.g. if containing a hypervalance atom) else: diff --git a/scripts/checkModels.py b/scripts/checkModels.py index 2ebd814d1e..7d62b99e83 100644 --- a/scripts/checkModels.py +++ b/scripts/checkModels.py @@ -284,6 +284,18 @@ def initialize_log(verbose, log_file_name='checkModels.log'): `verbose` parameter is an integer specifying the amount of log text seen at the console; the levels correspond to those of the :data:`logging` module. """ + # since RDKit 2022.03.1, logging is done using the Python logger instead of the + # Cout streams. This does not affect running RMG normally, but this testing file + # only works properly if it is the only logger + # see https://github.com/rdkit/rdkit/pull/4846 for the changes in RDKit + + # clear all other existing loggers + # https://stackoverflow.com/a/12158233 + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + + # once moved to a more recent python (at least 3.8), just add force=true to this statement + # and remove the above logging.basicConfig( filename=log_file_name, filemode='w', diff --git a/test/rmgpy/data/thermoTest.py b/test/rmgpy/data/thermoTest.py index f462810007..7360b6f21e 100644 --- a/test/rmgpy/data/thermoTest.py +++ b/test/rmgpy/data/thermoTest.py @@ -53,7 +53,7 @@ split_bicyclic_into_single_rings, ) from rmgpy.exceptions import DatabaseError -from rmgpy.ml.estimator import MLEstimator +from rmgpy.ml.estimator import MLEstimator, ADMONITION from rmgpy.molecule.molecule import Molecule from rmgpy.quantity import Quantity from rmgpy.species import Species @@ -123,11 +123,11 @@ def setup_class(cls): ) cls.databaseWithoutLibraries.set_binding_energies("Pt111") - # Set up ML estimator - models_path = os.path.join(settings["database.directory"], "thermo", "ml", "main") - hf298_path = os.path.join(models_path, "hf298") - s298_cp_path = os.path.join(models_path, "s298_cp") - cls.ml_estimator = MLEstimator(hf298_path, s298_cp_path) + # Set up ML estimator - temporarily removed, see rmgpy.ml.estimator + # models_path = os.path.join(settings["database.directory"], "thermo", "ml", "main") + # hf298_path = os.path.join(models_path, "hf298") + # s298_cp_path = os.path.join(models_path, "s298_cp") + # cls.ml_estimator = MLEstimator(hf298_path, s298_cp_path) def test_pickle(self): """ @@ -602,6 +602,7 @@ def test_species_thermo_generation_library(self): assert arom.is_isomorphic(spec.molecule[0]) # The aromatic structure should now be the first one assert "library" in thermo.comment, "Thermo not found from library, test purpose not fulfilled." + @pytest.mark.skip(reason=ADMONITION) def test_species_thermo_generation_ml(self): """Test thermo generation for species objects based on ML estimation.""" @@ -652,6 +653,7 @@ def test_species_thermo_generation_ml(self): assert thermo1 is None assert thermo2 is None + @pytest.mark.skip(reason=ADMONITION) def test_thermo_generation_ml_settings(self): """Test that thermo generation with ML correctly respects settings""" diff --git a/test/rmgpy/ml/estimatorTest.py b/test/rmgpy/ml/estimatorTest.py index bd2213dc1b..bd3fc9574b 100644 --- a/test/rmgpy/ml/estimatorTest.py +++ b/test/rmgpy/ml/estimatorTest.py @@ -31,9 +31,11 @@ from rmgpy import settings -from rmgpy.ml.estimator import MLEstimator +from rmgpy.ml.estimator import MLEstimator, ADMONITION +import pytest +@pytest.mark.skip(reason=ADMONITION) class TestMLEstimator: """ Contains unit tests for rmgpy.ml.estimator diff --git a/test/rmgpy/molecule/translatorTest.py b/test/rmgpy/molecule/translatorTest.py index 70cd62619d..cb62d9dc97 100644 --- a/test/rmgpy/molecule/translatorTest.py +++ b/test/rmgpy/molecule/translatorTest.py @@ -57,7 +57,8 @@ def test_empty_molecule(self): assert mol.to_smiles() == "" assert mol.to_inchi() == "" - @patch("rmgpy.molecule.translator.logging") + @pytest.mark.skip(reason='This unit test checks for a bug which has been ' + 'patched in version of RDKit >= 2022.9.1.') def test_failure_message(self, mock_logging): """Test that we log the molecule adjlist upon failure.""" mol = Molecule(smiles="[CH2-][N+]#N") @@ -238,7 +239,7 @@ def test_ch2o2(self): 3 O 1 {1,S} """ - aug_inchi = "InChI=1/CH2O2/c2-1-3/h1H,(H,2,3)/u1,2" + aug_inchi = "InChI=1/CH2O2/c2-1-3/h1-2H/u1,3" self.compare(adjlist, aug_inchi) def test_c7h10(self): @@ -1411,8 +1412,8 @@ def test_c3h3o3(self): self.compare(inchi, u_indices) def test_ch2o2(self): - inchi = "CH2O2/c2-1-3/h1H,(H,2,3)" - u_indices = [1, 2] + inchi = "CH2O2/c2-1-3/h1-2H" + u_indices = [1, 3] self.compare(inchi, u_indices) def test_c2h2o3(self): diff --git a/test/rmgpy/rmg/inputTest.py b/test/rmgpy/rmg/inputTest.py index ef69ab6ade..95f53addea 100644 --- a/test/rmgpy/rmg/inputTest.py +++ b/test/rmgpy/rmg/inputTest.py @@ -31,6 +31,7 @@ import rmgpy.rmg.input as inp from rmgpy.rmg.main import RMG +from rmgpy.ml.estimator import ADMONITION import pytest @@ -92,7 +93,7 @@ def test_importing_database_reaction_libraries_from_true_tuple(self): assert isinstance(rmg.reaction_libraries[0], tuple) assert rmg.reaction_libraries[0][1] - +@pytest.mark.skip(reason=ADMONITION) class TestInputMLEstimator: """ Contains unit tests rmgpy.rmg.input.mlEstimator