Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ requirements:
- cairocffi
- cantera >=2.3.0
- cclib >=1.6.3
- chemprop
- coolprop
- coverage
- cython >=0.25.2
Expand Down
4 changes: 2 additions & 2 deletions arkane/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,11 @@ def update_species_attributes(self, species=None):
self.multiplicity = species.molecule[0].multiplicity
self.formula = species.molecule[0].get_formula()
try:
inchi = to_inchi(species.molecule[0], backend='try-all', aug_level=0)
inchi = to_inchi(species.molecule[0], backend='openbabel-first', aug_level=0)
except ValueError:
inchi = ''
try:
inchi_key = to_inchi_key(species.molecule[0], backend='try-all', aug_level=0)
inchi_key = to_inchi_key(species.molecule[0], backend='openbabel-first', aug_level=0)
except ValueError:
inchi_key = ''
self.inchi = inchi
Expand Down
15 changes: 6 additions & 9 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# Changelog:
# - May 15, 2023 Added this changelog, added inline documentation,
# made dependency list more explicit (@JacksonBurns).
# - October 16, 2023 Switched RDKit and descripatastorus to conda-forge,
# moved diffeqpy to pip and (temporarily) removed chemprop
#
name: rmg_env
channels:
Expand Down Expand Up @@ -44,6 +46,7 @@ dependencies:
- conda-forge::mopac
- conda-forge::cclib >=1.6.3,!=1.8.0
- conda-forge::openbabel >= 3
- conda-forge::rdkit >=2022.09.1

# general-purpose external software tools
- conda-forge::julia=1.9.1
Expand All @@ -54,7 +57,7 @@ dependencies:
- coverage
- cython >=0.25.2
- scikit-learn
- scipy
- scipy <1.11
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we add this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching this. I believe I originally added this because of descriptastorus (which was incompatible with later versions of scipy) but it might not be required. I will remove it and we can see what happens in the CI.

- numpy >=1.10.0
- pydot
- jinja2
Expand Down Expand Up @@ -94,14 +97,8 @@ dependencies:
# rather than ours (which is only made so that we can get it from conda)
# It is only on pip, so we will need to do something like:
# https://stackoverflow.com/a/35245610

- rmg::chemprop
# Our build of this is version 0.0.1 (!!) and we are using parts
# of the API that are now gone. Need a serious PR to fix this.

- rmg::rdkit >=2020.03.3.0
# We should use the official channel, not sure how difficult this
# change will be.
# Note that _some other_ dep. in this list requires diffeqpy in its recipe
# which will cause it to be downloaded from the rmg conda channel

# conda mutex metapackage
- nomkl
Expand Down
14 changes: 12 additions & 2 deletions rmgpy/ml/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,27 @@
from argparse import Namespace
from typing import Callable, Union

chemprop = None
try:
import chemprop
except ImportError as e:
chemprop = None
chemprop_exception = e
import numpy as np

from rmgpy.molecule import Molecule
from rmgpy.species import Species
from rmgpy.thermo import ThermoData

ADMONITION = """
Support for predicting thermochemistry using chemprop has been temporarily removed
from RMG, pending official chemprop support for Python 3.11 and newer.

To use chemprop and RMG, install a previous version of RMG (3.1.1 or earlier).

See the link below for status of re-integration of chemprop:
https://github.com/ReactionMechanismGenerator/RMG-Py/issues/2559
"""


class MLEstimator:
"""
Expand Down Expand Up @@ -118,7 +128,7 @@ def load_estimator(model_dir: str) -> Callable[[str], np.ndarray]:
if chemprop is None:
# Delay chemprop ImportError until we actually try to use it
# so that RMG can load successfully without chemprop.
raise chemprop_exception
raise RuntimeError(ADMONITION + "\nOriginal Exception:\n" + str(chemprop_exception))

args = Namespace() # Simple class to hold attributes

Expand Down
10 changes: 5 additions & 5 deletions rmgpy/molecule/molecule.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -224,14 +224,14 @@ cdef class Molecule(Graph):
bint raise_charge_exception=?, bint check_consistency=?)

cpdef from_xyz(self, np.ndarray atomic_nums, np.ndarray coordinates, float critical_distance_factor=?, bint raise_atomtype_exception=?)

cpdef str to_inchi(self)

cpdef str to_augmented_inchi(self)
cpdef str to_inchi(self, str backend=?)

cpdef str to_augmented_inchi(self, str backend=?)

cpdef str to_inchi_key(self)
cpdef str to_inchi_key(self, str backend=?)

cpdef str to_augmented_inchi_key(self)
cpdef str to_augmented_inchi_key(self, str backend=?)

cpdef str to_smiles(self)

Expand Down
56 changes: 40 additions & 16 deletions rmgpy/molecule/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -1768,9 +1768,13 @@ def _repr_png_(self):
os.unlink(temp_file_name)
return png

def from_inchi(self, inchistr, backend='try-all', raise_atomtype_exception=True):
def from_inchi(self, inchistr, backend='openbabel-first', raise_atomtype_exception=True):
"""
Convert an InChI string `inchistr` to a molecular structure.

RDKit and Open Babel are the two backends used in RMG. It is possible to use a
single backend or try different backends in sequence. The available options for the ``backend``
argument: 'openbabel-first'(default), 'rdkit-first', 'rdkit', or 'openbabel'.
"""
translator.from_inchi(self, inchistr, backend, raise_atomtype_exception=raise_atomtype_exception)
return self
Expand All @@ -1782,9 +1786,13 @@ def from_augmented_inchi(self, aug_inchi, raise_atomtype_exception=True):
translator.from_augmented_inchi(self, aug_inchi, raise_atomtype_exception=raise_atomtype_exception)
return self

def from_smiles(self, smilesstr, backend='try-all', raise_atomtype_exception=True):
def from_smiles(self, smilesstr, backend='openbabel-first', raise_atomtype_exception=True):
"""
Convert a SMILES string `smilesstr` to a molecular structure.

RDKit and Open Babel are the two backends used in RMG. It is possible to use a
single backend or try different backends in sequence. The available options for the ``backend``
argument: 'openbabel-first'(default), 'rdkit-first', 'rdkit', or 'openbabel'.
"""
translator.from_smiles(self, smilesstr, backend, raise_atomtype_exception=raise_atomtype_exception)
return self
Expand Down Expand Up @@ -1863,62 +1871,78 @@ def to_single_bonds(self, raise_atomtype_exception=True):
new_mol.update_atomtypes(raise_exception=raise_atomtype_exception)
return new_mol

def to_inchi(self):
def to_inchi(self, backend='rdkit-first'):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels kind of awkward that the default arguments are duplicated both here in molecule.py as well as translator.py. If one default value changes, it doesn't guarantee that the other will mirror that change.

"""
Convert a molecular structure to an InChI string. Uses
`RDKit <http://rdkit.org/>`_ to perform the conversion.
Perceives aromaticity.

or

Convert a molecular structure to an InChI string. Uses
`OpenBabel <http://openbabel.org/>`_ to perform the conversion.

It is possible to use a single backend or try different backends in sequence.
The available options for the ``backend`` argument: 'rdkit-first'(default),
'openbabel-first', 'rdkit', or 'openbabel'.
"""
try:
return translator.to_inchi(self)
return translator.to_inchi(self, backend=backend)
except:
logging.exception(f"Error for molecule \n{self.to_adjacency_list()}")
raise

def to_augmented_inchi(self):
def to_augmented_inchi(self, backend='rdkit-first'):
"""
Adds an extra layer to the InChI denoting the multiplicity
of the molecule.

Separate layer with a forward slash character.

RDKit and Open Babel are the two backends used in RMG. It is possible to use a
single backend or try different backends in sequence. The available options for the ``backend``
argument: 'rdkit-first'(default), 'openbabel-first', 'rdkit', or 'openbabel'.
"""
try:
return translator.to_inchi(self, aug_level=2)
return translator.to_inchi(self, backend=backend, aug_level=2)
except:
logging.exception(f"Error for molecule \n{self.to_adjacency_list()}")
raise

def to_inchi_key(self):
def to_inchi_key(self, backend='rdkit-first'):
"""
Convert a molecular structure to an InChI Key string. Uses
`OpenBabel <http://openbabel.org/>`_ to perform the conversion.
or

or

Convert a molecular structure to an InChI Key string. Uses
`RDKit <http://rdkit.org/>`_ to perform the conversion.

It is possible to use a single backend or try different backends in sequence.
The available options for the ``backend`` argument: 'rdkit-first'(default),
'openbabel-first', 'rdkit', or 'openbabel'.
"""
try:
return translator.to_inchi_key(self)
return translator.to_inchi_key(self, backend=backend)
except:
logging.exception(f"Error for molecule \n{self.to_adjacency_list()}")
raise

def to_augmented_inchi_key(self):
def to_augmented_inchi_key(self, backend='rdkit-first'):
"""
Adds an extra layer to the InChIKey denoting the multiplicity
of the molecule.

Simply append the multiplicity string, do not separate by a
character like forward slash.

RDKit and Open Babel are the two backends used in RMG. It is possible to use a
single backend or try different backends in sequence. The available options for the ``backend``
argument: 'rdkit-first'(default), 'openbabel-first', 'rdkit', or 'openbabel'.
"""
try:
return translator.to_inchi_key(self, aug_level=2)
return translator.to_inchi_key(self, backend=backend, aug_level=2)
except:
logging.exception(f"Error for molecule \n{self.to_adjacency_list()}")
raise
Expand Down
22 changes: 11 additions & 11 deletions rmgpy/molecule/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def to_inchi(mol, backend='rdkit-first', aug_level=0):
Uses RDKit or OpenBabel for conversion.

Args:
backend choice of backend, 'try-all', 'rdkit', or 'openbabel'
backend choice of backend, 'rdkit-first' (default), 'openbabel-first', 'rdkit', or 'openbabel'
aug_level level of augmentation, 0, 1, or 2
"""
cython.declare(inchi=str, ulayer=str, player=str, mlayer=str)
Expand Down Expand Up @@ -205,7 +205,7 @@ def to_inchi_key(mol, backend='rdkit-first', aug_level=0):
Uses RDKit or OpenBabel for conversion.

Args:
backend choice of backend, 'try-all', 'rdkit', or 'openbabel'
backend choice of backend, 'rdkit-first' (default), 'openbabel-first', 'rdkit', or 'openbabel'
aug_level level of augmentation, 0, 1, or 2
"""
cython.declare(key=str, ulayer=str, player=str, mlayer=str)
Expand Down Expand Up @@ -274,11 +274,11 @@ def to_smiles(mol, backend='default'):
return output


def from_inchi(mol, inchistr, backend='try-all', raise_atomtype_exception=True):
def from_inchi(mol, inchistr, backend='openbabel-first', raise_atomtype_exception=True):
"""
Convert an InChI string `inchistr` to a molecular structure. Uses
a user-specified backend for conversion, currently supporting
rdkit (default) and openbabel.
a user-specified backend for conversion, currently supporting 'openbabel-first' (default), rdkit-first,
rdkit, and openbabel.
"""
if inchiutil.INCHI_PREFIX in inchistr:
return _read(mol, inchistr, 'inchi', backend, raise_atomtype_exception=raise_atomtype_exception)
Expand Down Expand Up @@ -325,11 +325,11 @@ def from_smarts(mol, smartsstr, backend='rdkit', raise_atomtype_exception=True):
return _read(mol, smartsstr, 'sma', backend, raise_atomtype_exception=raise_atomtype_exception)


def from_smiles(mol, smilesstr, backend='try-all', raise_atomtype_exception=True):
def from_smiles(mol, smilesstr, backend='openbabel-first', raise_atomtype_exception=True):
"""
Convert a SMILES string `smilesstr` to a molecular structure. Uses
a user-specified backend for conversion, currently supporting
rdkit (default) and openbabel.
a user-specified backend for conversion, currently supporting openbabel-first (default), rdkit-first,
rdkit and openbabel.
"""
return _read(mol, smilesstr, 'smi', backend, raise_atomtype_exception=raise_atomtype_exception)

Expand Down Expand Up @@ -569,14 +569,14 @@ def _get_backend_list(backend):
"""
if not isinstance(backend, str):
raise ValueError("The backend argument should be a string. "
"Accepted values are 'try-all', 'rdkit-first', 'rdkit', and 'openbabel'")
"Accepted values are 'openbabel-first', 'rdkit-first', 'rdkit', and 'openbabel'")
backend = backend.strip().lower()
if backend == 'try-all':
if backend == 'openbabel-first':
return BACKENDS
elif backend == 'rdkit-first':
return reversed(BACKENDS)
elif backend in ['rdkit', 'openbabel']:
return [backend]
else:
raise ValueError("Unrecognized value for backend argument. "
"Accepted values are 'try-all', 'rdkit-first', 'rdkit', and 'openbabel'")
"Accepted values are 'openbabel-first', 'rdkit-first', 'rdkit', and 'openbabel'")
4 changes: 2 additions & 2 deletions rmgpy/qm/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,11 +520,11 @@ def load_thermo_data(self):
self.qm_data = local_context["qmData"]
return thermo

def get_augmented_inchi_key(self):
def get_augmented_inchi_key(self, backend='rdkit-first'):
"""
Returns the augmented InChI from self.molecule
"""
return self.molecule.to_augmented_inchi_key()
return self.molecule.to_augmented_inchi_key(backend=backend)

def get_mol_file_path_for_calculation(self, attempt):
"""
Expand Down
8 changes: 4 additions & 4 deletions rmgpy/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,17 +740,17 @@ def copy(self, deep=False):

return other

def get_augmented_inchi(self):
def get_augmented_inchi(self, backend='rdkit-first'):
if self.aug_inchi is None:
self.aug_inchi = self.generate_aug_inchi()
self.aug_inchi = self.generate_aug_inchi(backend=backend)
return self.aug_inchi

def generate_aug_inchi(self):
def generate_aug_inchi(self, backend='rdkit-first'):
candidates = []
self.generate_resonance_structures()
for mol in self.molecule:
try:
cand = [mol.to_augmented_inchi(), mol]
cand = [mol.to_augmented_inchi(backend=backend), mol]
except ValueError:
pass # not all resonance structures can be parsed into InChI (e.g. if containing a hypervalance atom)
else:
Expand Down
12 changes: 12 additions & 0 deletions scripts/checkModels.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,18 @@ def initialize_log(verbose, log_file_name='checkModels.log'):
`verbose` parameter is an integer specifying the amount of log text seen
at the console; the levels correspond to those of the :data:`logging` module.
"""
# since RDKit 2022.03.1, logging is done using the Python logger instead of the
# Cout streams. This does not affect running RMG normally, but this testing file
# only works properly if it is the only logger
# see https://github.com/rdkit/rdkit/pull/4846 for the changes in RDKit

# clear all other existing loggers
# https://stackoverflow.com/a/12158233
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)

# once moved to a more recent python (at least 3.8), just add force=true to this statement
# and remove the above
logging.basicConfig(
filename=log_file_name,
filemode='w',
Expand Down
Loading