diff --git a/.conda/meta.yaml b/.conda/meta.yaml
index b1df7678e6..7cd02969fe 100644
--- a/.conda/meta.yaml
+++ b/.conda/meta.yaml
@@ -30,7 +30,6 @@ requirements:
- cairocffi
- cantera >=2.3.0
- cclib >=1.6.3
- - chemprop
- coolprop
- coverage
- cython >=0.25.2
diff --git a/arkane/common.py b/arkane/common.py
index 5227298895..a706417b88 100644
--- a/arkane/common.py
+++ b/arkane/common.py
@@ -191,11 +191,11 @@ def update_species_attributes(self, species=None):
self.multiplicity = species.molecule[0].multiplicity
self.formula = species.molecule[0].get_formula()
try:
- inchi = to_inchi(species.molecule[0], backend='try-all', aug_level=0)
+ inchi = to_inchi(species.molecule[0], backend='openbabel-first', aug_level=0)
except ValueError:
inchi = ''
try:
- inchi_key = to_inchi_key(species.molecule[0], backend='try-all', aug_level=0)
+ inchi_key = to_inchi_key(species.molecule[0], backend='openbabel-first', aug_level=0)
except ValueError:
inchi_key = ''
self.inchi = inchi
diff --git a/environment.yml b/environment.yml
index 4596f580ee..97b22265fe 100644
--- a/environment.yml
+++ b/environment.yml
@@ -14,6 +14,8 @@
# Changelog:
# - May 15, 2023 Added this changelog, added inline documentation,
# made dependency list more explicit (@JacksonBurns).
+# - October 16, 2023 Switched RDKit and descripatastorus to conda-forge,
+# moved diffeqpy to pip and (temporarily) removed chemprop
#
name: rmg_env
channels:
@@ -44,6 +46,7 @@ dependencies:
- conda-forge::mopac
- conda-forge::cclib >=1.6.3,!=1.8.0
- conda-forge::openbabel >= 3
+ - conda-forge::rdkit >=2022.09.1
# general-purpose external software tools
- conda-forge::julia=1.9.1
@@ -54,7 +57,7 @@ dependencies:
- coverage
- cython >=0.25.2
- scikit-learn
- - scipy
+ - scipy <1.11
- numpy >=1.10.0
- pydot
- jinja2
@@ -94,14 +97,8 @@ dependencies:
# rather than ours (which is only made so that we can get it from conda)
# It is only on pip, so we will need to do something like:
# https://stackoverflow.com/a/35245610
-
- - rmg::chemprop
- # Our build of this is version 0.0.1 (!!) and we are using parts
- # of the API that are now gone. Need a serious PR to fix this.
-
- - rmg::rdkit >=2020.03.3.0
- # We should use the official channel, not sure how difficult this
- # change will be.
+ # Note that _some other_ dep. in this list requires diffeqpy in its recipe
+ # which will cause it to be downloaded from the rmg conda channel
# conda mutex metapackage
- nomkl
diff --git a/rmgpy/ml/estimator.py b/rmgpy/ml/estimator.py
index 84cafb651c..81e1d9e701 100644
--- a/rmgpy/ml/estimator.py
+++ b/rmgpy/ml/estimator.py
@@ -32,10 +32,10 @@
from argparse import Namespace
from typing import Callable, Union
+chemprop = None
try:
import chemprop
except ImportError as e:
- chemprop = None
chemprop_exception = e
import numpy as np
@@ -43,6 +43,16 @@
from rmgpy.species import Species
from rmgpy.thermo import ThermoData
+ADMONITION = """
+Support for predicting thermochemistry using chemprop has been temporarily removed
+from RMG, pending official chemprop support for Python 3.11 and newer.
+
+To use chemprop and RMG, install a previous version of RMG (3.1.1 or earlier).
+
+See the link below for status of re-integration of chemprop:
+https://github.com/ReactionMechanismGenerator/RMG-Py/issues/2559
+"""
+
class MLEstimator:
"""
@@ -118,7 +128,7 @@ def load_estimator(model_dir: str) -> Callable[[str], np.ndarray]:
if chemprop is None:
# Delay chemprop ImportError until we actually try to use it
# so that RMG can load successfully without chemprop.
- raise chemprop_exception
+ raise RuntimeError(ADMONITION + "\nOriginal Exception:\n" + str(chemprop_exception))
args = Namespace() # Simple class to hold attributes
diff --git a/rmgpy/molecule/molecule.pxd b/rmgpy/molecule/molecule.pxd
index 4590bbed6b..f227e5d533 100644
--- a/rmgpy/molecule/molecule.pxd
+++ b/rmgpy/molecule/molecule.pxd
@@ -224,14 +224,14 @@ cdef class Molecule(Graph):
bint raise_charge_exception=?, bint check_consistency=?)
cpdef from_xyz(self, np.ndarray atomic_nums, np.ndarray coordinates, float critical_distance_factor=?, bint raise_atomtype_exception=?)
-
- cpdef str to_inchi(self)
- cpdef str to_augmented_inchi(self)
+ cpdef str to_inchi(self, str backend=?)
+
+ cpdef str to_augmented_inchi(self, str backend=?)
- cpdef str to_inchi_key(self)
+ cpdef str to_inchi_key(self, str backend=?)
- cpdef str to_augmented_inchi_key(self)
+ cpdef str to_augmented_inchi_key(self, str backend=?)
cpdef str to_smiles(self)
diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py
index e93f0092eb..21e5007c4d 100644
--- a/rmgpy/molecule/molecule.py
+++ b/rmgpy/molecule/molecule.py
@@ -1768,9 +1768,13 @@ def _repr_png_(self):
os.unlink(temp_file_name)
return png
- def from_inchi(self, inchistr, backend='try-all', raise_atomtype_exception=True):
+ def from_inchi(self, inchistr, backend='openbabel-first', raise_atomtype_exception=True):
"""
Convert an InChI string `inchistr` to a molecular structure.
+
+ RDKit and Open Babel are the two backends used in RMG. It is possible to use a
+ single backend or try different backends in sequence. The available options for the ``backend``
+ argument: 'openbabel-first'(default), 'rdkit-first', 'rdkit', or 'openbabel'.
"""
translator.from_inchi(self, inchistr, backend, raise_atomtype_exception=raise_atomtype_exception)
return self
@@ -1782,9 +1786,13 @@ def from_augmented_inchi(self, aug_inchi, raise_atomtype_exception=True):
translator.from_augmented_inchi(self, aug_inchi, raise_atomtype_exception=raise_atomtype_exception)
return self
- def from_smiles(self, smilesstr, backend='try-all', raise_atomtype_exception=True):
+ def from_smiles(self, smilesstr, backend='openbabel-first', raise_atomtype_exception=True):
"""
Convert a SMILES string `smilesstr` to a molecular structure.
+
+ RDKit and Open Babel are the two backends used in RMG. It is possible to use a
+ single backend or try different backends in sequence. The available options for the ``backend``
+ argument: 'openbabel-first'(default), 'rdkit-first', 'rdkit', or 'openbabel'.
"""
translator.from_smiles(self, smilesstr, backend, raise_atomtype_exception=raise_atomtype_exception)
return self
@@ -1863,62 +1871,78 @@ def to_single_bonds(self, raise_atomtype_exception=True):
new_mol.update_atomtypes(raise_exception=raise_atomtype_exception)
return new_mol
- def to_inchi(self):
+ def to_inchi(self, backend='rdkit-first'):
"""
Convert a molecular structure to an InChI string. Uses
`RDKit `_ to perform the conversion.
Perceives aromaticity.
-
+
or
-
+
Convert a molecular structure to an InChI string. Uses
`OpenBabel `_ to perform the conversion.
+
+ It is possible to use a single backend or try different backends in sequence.
+ The available options for the ``backend`` argument: 'rdkit-first'(default),
+ 'openbabel-first', 'rdkit', or 'openbabel'.
"""
try:
- return translator.to_inchi(self)
+ return translator.to_inchi(self, backend=backend)
except:
logging.exception(f"Error for molecule \n{self.to_adjacency_list()}")
raise
- def to_augmented_inchi(self):
+ def to_augmented_inchi(self, backend='rdkit-first'):
"""
Adds an extra layer to the InChI denoting the multiplicity
of the molecule.
-
+
Separate layer with a forward slash character.
+
+ RDKit and Open Babel are the two backends used in RMG. It is possible to use a
+ single backend or try different backends in sequence. The available options for the ``backend``
+ argument: 'rdkit-first'(default), 'openbabel-first', 'rdkit', or 'openbabel'.
"""
try:
- return translator.to_inchi(self, aug_level=2)
+ return translator.to_inchi(self, backend=backend, aug_level=2)
except:
logging.exception(f"Error for molecule \n{self.to_adjacency_list()}")
raise
- def to_inchi_key(self):
+ def to_inchi_key(self, backend='rdkit-first'):
"""
Convert a molecular structure to an InChI Key string. Uses
`OpenBabel `_ to perform the conversion.
-
- or
-
+
+ or
+
Convert a molecular structure to an InChI Key string. Uses
`RDKit `_ to perform the conversion.
+
+ It is possible to use a single backend or try different backends in sequence.
+ The available options for the ``backend`` argument: 'rdkit-first'(default),
+ 'openbabel-first', 'rdkit', or 'openbabel'.
"""
try:
- return translator.to_inchi_key(self)
+ return translator.to_inchi_key(self, backend=backend)
except:
logging.exception(f"Error for molecule \n{self.to_adjacency_list()}")
raise
- def to_augmented_inchi_key(self):
+ def to_augmented_inchi_key(self, backend='rdkit-first'):
"""
Adds an extra layer to the InChIKey denoting the multiplicity
of the molecule.
Simply append the multiplicity string, do not separate by a
character like forward slash.
+
+ RDKit and Open Babel are the two backends used in RMG. It is possible to use a
+ single backend or try different backends in sequence. The available options for the ``backend``
+ argument: 'rdkit-first'(default), 'openbabel-first', 'rdkit', or 'openbabel'.
"""
try:
- return translator.to_inchi_key(self, aug_level=2)
+ return translator.to_inchi_key(self, backend=backend, aug_level=2)
except:
logging.exception(f"Error for molecule \n{self.to_adjacency_list()}")
raise
diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py
index 1fd0cd2220..731d8d8d8e 100644
--- a/rmgpy/molecule/translator.py
+++ b/rmgpy/molecule/translator.py
@@ -169,7 +169,7 @@ def to_inchi(mol, backend='rdkit-first', aug_level=0):
Uses RDKit or OpenBabel for conversion.
Args:
- backend choice of backend, 'try-all', 'rdkit', or 'openbabel'
+ backend choice of backend, 'rdkit-first' (default), 'openbabel-first', 'rdkit', or 'openbabel'
aug_level level of augmentation, 0, 1, or 2
"""
cython.declare(inchi=str, ulayer=str, player=str, mlayer=str)
@@ -205,7 +205,7 @@ def to_inchi_key(mol, backend='rdkit-first', aug_level=0):
Uses RDKit or OpenBabel for conversion.
Args:
- backend choice of backend, 'try-all', 'rdkit', or 'openbabel'
+ backend choice of backend, 'rdkit-first' (default), 'openbabel-first', 'rdkit', or 'openbabel'
aug_level level of augmentation, 0, 1, or 2
"""
cython.declare(key=str, ulayer=str, player=str, mlayer=str)
@@ -274,11 +274,11 @@ def to_smiles(mol, backend='default'):
return output
-def from_inchi(mol, inchistr, backend='try-all', raise_atomtype_exception=True):
+def from_inchi(mol, inchistr, backend='openbabel-first', raise_atomtype_exception=True):
"""
Convert an InChI string `inchistr` to a molecular structure. Uses
- a user-specified backend for conversion, currently supporting
- rdkit (default) and openbabel.
+ a user-specified backend for conversion, currently supporting 'openbabel-first' (default), rdkit-first,
+ rdkit, and openbabel.
"""
if inchiutil.INCHI_PREFIX in inchistr:
return _read(mol, inchistr, 'inchi', backend, raise_atomtype_exception=raise_atomtype_exception)
@@ -325,11 +325,11 @@ def from_smarts(mol, smartsstr, backend='rdkit', raise_atomtype_exception=True):
return _read(mol, smartsstr, 'sma', backend, raise_atomtype_exception=raise_atomtype_exception)
-def from_smiles(mol, smilesstr, backend='try-all', raise_atomtype_exception=True):
+def from_smiles(mol, smilesstr, backend='openbabel-first', raise_atomtype_exception=True):
"""
Convert a SMILES string `smilesstr` to a molecular structure. Uses
- a user-specified backend for conversion, currently supporting
- rdkit (default) and openbabel.
+ a user-specified backend for conversion, currently supporting openbabel-first (default), rdkit-first,
+ rdkit and openbabel.
"""
return _read(mol, smilesstr, 'smi', backend, raise_atomtype_exception=raise_atomtype_exception)
@@ -569,9 +569,9 @@ def _get_backend_list(backend):
"""
if not isinstance(backend, str):
raise ValueError("The backend argument should be a string. "
- "Accepted values are 'try-all', 'rdkit-first', 'rdkit', and 'openbabel'")
+ "Accepted values are 'openbabel-first', 'rdkit-first', 'rdkit', and 'openbabel'")
backend = backend.strip().lower()
- if backend == 'try-all':
+ if backend == 'openbabel-first':
return BACKENDS
elif backend == 'rdkit-first':
return reversed(BACKENDS)
@@ -579,4 +579,4 @@ def _get_backend_list(backend):
return [backend]
else:
raise ValueError("Unrecognized value for backend argument. "
- "Accepted values are 'try-all', 'rdkit-first', 'rdkit', and 'openbabel'")
+ "Accepted values are 'openbabel-first', 'rdkit-first', 'rdkit', and 'openbabel'")
diff --git a/rmgpy/qm/molecule.py b/rmgpy/qm/molecule.py
index c3a9a9fae4..81d771781d 100644
--- a/rmgpy/qm/molecule.py
+++ b/rmgpy/qm/molecule.py
@@ -520,11 +520,11 @@ def load_thermo_data(self):
self.qm_data = local_context["qmData"]
return thermo
- def get_augmented_inchi_key(self):
+ def get_augmented_inchi_key(self, backend='rdkit-first'):
"""
Returns the augmented InChI from self.molecule
"""
- return self.molecule.to_augmented_inchi_key()
+ return self.molecule.to_augmented_inchi_key(backend=backend)
def get_mol_file_path_for_calculation(self, attempt):
"""
diff --git a/rmgpy/species.py b/rmgpy/species.py
index de0c3a2d9b..e0ae40a2f0 100644
--- a/rmgpy/species.py
+++ b/rmgpy/species.py
@@ -740,17 +740,17 @@ def copy(self, deep=False):
return other
- def get_augmented_inchi(self):
+ def get_augmented_inchi(self, backend='rdkit-first'):
if self.aug_inchi is None:
- self.aug_inchi = self.generate_aug_inchi()
+ self.aug_inchi = self.generate_aug_inchi(backend=backend)
return self.aug_inchi
- def generate_aug_inchi(self):
+ def generate_aug_inchi(self, backend='rdkit-first'):
candidates = []
self.generate_resonance_structures()
for mol in self.molecule:
try:
- cand = [mol.to_augmented_inchi(), mol]
+ cand = [mol.to_augmented_inchi(backend=backend), mol]
except ValueError:
pass # not all resonance structures can be parsed into InChI (e.g. if containing a hypervalance atom)
else:
diff --git a/scripts/checkModels.py b/scripts/checkModels.py
index 2ebd814d1e..7d62b99e83 100644
--- a/scripts/checkModels.py
+++ b/scripts/checkModels.py
@@ -284,6 +284,18 @@ def initialize_log(verbose, log_file_name='checkModels.log'):
`verbose` parameter is an integer specifying the amount of log text seen
at the console; the levels correspond to those of the :data:`logging` module.
"""
+ # since RDKit 2022.03.1, logging is done using the Python logger instead of the
+ # Cout streams. This does not affect running RMG normally, but this testing file
+ # only works properly if it is the only logger
+ # see https://github.com/rdkit/rdkit/pull/4846 for the changes in RDKit
+
+ # clear all other existing loggers
+ # https://stackoverflow.com/a/12158233
+ for handler in logging.root.handlers[:]:
+ logging.root.removeHandler(handler)
+
+ # once moved to a more recent python (at least 3.8), just add force=true to this statement
+ # and remove the above
logging.basicConfig(
filename=log_file_name,
filemode='w',
diff --git a/test/rmgpy/data/thermoTest.py b/test/rmgpy/data/thermoTest.py
index f462810007..7360b6f21e 100644
--- a/test/rmgpy/data/thermoTest.py
+++ b/test/rmgpy/data/thermoTest.py
@@ -53,7 +53,7 @@
split_bicyclic_into_single_rings,
)
from rmgpy.exceptions import DatabaseError
-from rmgpy.ml.estimator import MLEstimator
+from rmgpy.ml.estimator import MLEstimator, ADMONITION
from rmgpy.molecule.molecule import Molecule
from rmgpy.quantity import Quantity
from rmgpy.species import Species
@@ -123,11 +123,11 @@ def setup_class(cls):
)
cls.databaseWithoutLibraries.set_binding_energies("Pt111")
- # Set up ML estimator
- models_path = os.path.join(settings["database.directory"], "thermo", "ml", "main")
- hf298_path = os.path.join(models_path, "hf298")
- s298_cp_path = os.path.join(models_path, "s298_cp")
- cls.ml_estimator = MLEstimator(hf298_path, s298_cp_path)
+ # Set up ML estimator - temporarily removed, see rmgpy.ml.estimator
+ # models_path = os.path.join(settings["database.directory"], "thermo", "ml", "main")
+ # hf298_path = os.path.join(models_path, "hf298")
+ # s298_cp_path = os.path.join(models_path, "s298_cp")
+ # cls.ml_estimator = MLEstimator(hf298_path, s298_cp_path)
def test_pickle(self):
"""
@@ -602,6 +602,7 @@ def test_species_thermo_generation_library(self):
assert arom.is_isomorphic(spec.molecule[0]) # The aromatic structure should now be the first one
assert "library" in thermo.comment, "Thermo not found from library, test purpose not fulfilled."
+ @pytest.mark.skip(reason=ADMONITION)
def test_species_thermo_generation_ml(self):
"""Test thermo generation for species objects based on ML estimation."""
@@ -652,6 +653,7 @@ def test_species_thermo_generation_ml(self):
assert thermo1 is None
assert thermo2 is None
+ @pytest.mark.skip(reason=ADMONITION)
def test_thermo_generation_ml_settings(self):
"""Test that thermo generation with ML correctly respects settings"""
diff --git a/test/rmgpy/ml/estimatorTest.py b/test/rmgpy/ml/estimatorTest.py
index bd2213dc1b..bd3fc9574b 100644
--- a/test/rmgpy/ml/estimatorTest.py
+++ b/test/rmgpy/ml/estimatorTest.py
@@ -31,9 +31,11 @@
from rmgpy import settings
-from rmgpy.ml.estimator import MLEstimator
+from rmgpy.ml.estimator import MLEstimator, ADMONITION
+import pytest
+@pytest.mark.skip(reason=ADMONITION)
class TestMLEstimator:
"""
Contains unit tests for rmgpy.ml.estimator
diff --git a/test/rmgpy/molecule/translatorTest.py b/test/rmgpy/molecule/translatorTest.py
index 70cd62619d..cb62d9dc97 100644
--- a/test/rmgpy/molecule/translatorTest.py
+++ b/test/rmgpy/molecule/translatorTest.py
@@ -57,7 +57,8 @@ def test_empty_molecule(self):
assert mol.to_smiles() == ""
assert mol.to_inchi() == ""
- @patch("rmgpy.molecule.translator.logging")
+ @pytest.mark.skip(reason='This unit test checks for a bug which has been '
+ 'patched in version of RDKit >= 2022.9.1.')
def test_failure_message(self, mock_logging):
"""Test that we log the molecule adjlist upon failure."""
mol = Molecule(smiles="[CH2-][N+]#N")
@@ -238,7 +239,7 @@ def test_ch2o2(self):
3 O 1 {1,S}
"""
- aug_inchi = "InChI=1/CH2O2/c2-1-3/h1H,(H,2,3)/u1,2"
+ aug_inchi = "InChI=1/CH2O2/c2-1-3/h1-2H/u1,3"
self.compare(adjlist, aug_inchi)
def test_c7h10(self):
@@ -1411,8 +1412,8 @@ def test_c3h3o3(self):
self.compare(inchi, u_indices)
def test_ch2o2(self):
- inchi = "CH2O2/c2-1-3/h1H,(H,2,3)"
- u_indices = [1, 2]
+ inchi = "CH2O2/c2-1-3/h1-2H"
+ u_indices = [1, 3]
self.compare(inchi, u_indices)
def test_c2h2o3(self):
diff --git a/test/rmgpy/rmg/inputTest.py b/test/rmgpy/rmg/inputTest.py
index ef69ab6ade..95f53addea 100644
--- a/test/rmgpy/rmg/inputTest.py
+++ b/test/rmgpy/rmg/inputTest.py
@@ -31,6 +31,7 @@
import rmgpy.rmg.input as inp
from rmgpy.rmg.main import RMG
+from rmgpy.ml.estimator import ADMONITION
import pytest
@@ -92,7 +93,7 @@ def test_importing_database_reaction_libraries_from_true_tuple(self):
assert isinstance(rmg.reaction_libraries[0], tuple)
assert rmg.reaction_libraries[0][1]
-
+@pytest.mark.skip(reason=ADMONITION)
class TestInputMLEstimator:
"""
Contains unit tests rmgpy.rmg.input.mlEstimator