From 4a548c7a34aea58404b565bf8a6d795a69d639b1 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 4 Mar 2024 23:37:03 -0500 Subject: [PATCH 1/5] Refactoring clar optimization and utilize scipy MILP 1. Replace lpsolve APIs with the scipy milp APIs. The new implementation potentially have a slightly better performance (due to vectorization, less data transfer, comparable solver performance, etc.) and improved readability. 2. Decouple the MILP solving step (as _solve_clar_milp ) from the MILP formulation step. The motivation is to avoid unnecessary computation. The original approach includes molecule analysis (specifically `get_aromatic_rings`) into the recursive calls. However, this is not necessary, as molecules are just copied and not modified at all. Therefore analyzing once is enough. --- rmgpy/molecule/resonance.py | 172 ++++++++++++++++++++---------------- 1 file changed, 96 insertions(+), 76 deletions(-) diff --git a/rmgpy/molecule/resonance.py b/rmgpy/molecule/resonance.py index 89b0ab3d78c..bdab0999bea 100644 --- a/rmgpy/molecule/resonance.py +++ b/rmgpy/molecule/resonance.py @@ -53,11 +53,14 @@ import logging +import numpy as np +from scipy.optimize import Bounds, LinearConstraint, milp + import cython import rmgpy.molecule.filtration as filtration import rmgpy.molecule.pathfinder as pathfinder -from rmgpy.exceptions import ILPSolutionError, KekulizationError, AtomTypeError, ResonanceError +from rmgpy.exceptions import KekulizationError, AtomTypeError, ResonanceError from rmgpy.molecule.adjlist import Saturator from rmgpy.molecule.graph import Vertex from rmgpy.molecule.kekulize import kekulize @@ -904,8 +907,7 @@ def generate_clar_structures(mol, save_order=False): Returns a list of :class:`Molecule` objects corresponding to the Clar structures. """ - cython.declare(output=list, mol_list=list, new_mol=Graph, aromatic_rings=list, bonds=list, solution=list, - y=list, x=list, index=cython.int, bond=Edge, ring=list) + cython.declare(output=list, mol_list=list, new_mol=Graph, aromatic_rings=list, bonds=list, index=cython.int, bond=Edge, ring=list) if not mol.is_cyclic(): return [] @@ -915,19 +917,20 @@ def generate_clar_structures(mol, save_order=False): mol.assign_atom_ids() try: - output = _clar_optimization(mol, save_order=save_order) - except ILPSolutionError: + aromatic_rings, bonds, solutions = _clar_optimization(mol, save_order=save_order) + except RuntimeError: # The optimization algorithm did not work on the first iteration return [] mol_list = [] - for new_mol, aromatic_rings, bonds, solution in output: + for solution in solutions: + new_mol = mol.copy(deep=True) # The solution includes a part corresponding to rings, y, and a part corresponding to bonds, x, using # nomenclature from the paper. In y, 1 means the ring as a sextet, 0 means it does not. # In x, 1 corresponds to a double bond, 0 either means a single bond or the bond is part of a sextet. - y = solution[0:len(aromatic_rings)] + y = solution[:len(aromatic_rings)] x = solution[len(aromatic_rings):] # Apply results to molecule - double bond locations first @@ -937,7 +940,7 @@ def generate_clar_structures(mol, save_order=False): elif x[index] == 1: bond.order = 2 # double else: - raise ValueError('Unaccepted bond value {0} obtained from optimization.'.format(x[index])) + raise ValueError(f'Unaccepted bond value {x[index]} obtained from optimization.') # Then apply locations of aromatic sextets by converting to benzene bonds for index, ring in enumerate(aromatic_rings): @@ -954,28 +957,25 @@ def generate_clar_structures(mol, save_order=False): return mol_list -def _clar_optimization(mol, constraints=None, max_num=None, save_order=False): +def _clar_optimization(mol, save_order=False): """ Implements linear programming algorithm for finding Clar structures. This algorithm maximizes the number of Clar sextets within the constraints of molecular geometry and atom valency. Returns a list of valid Clar solutions in the form of a tuple, with the following entries: - [0] Molecule object - [1] List of aromatic rings - [2] List of bonds - [3] Optimization solution + [0] List of aromatic rings + [1] List of bonds + [2] Optimization solutions - The optimization solution is a list of boolean values with sextet assignments followed by double bond assignments, - with indices corresponding to the list of aromatic rings and list of bonds, respectively. + The optimization solutions may contain multiple valid solutions, each is an array of boolean values with sextet assignments + followed by double bond assignments, with indices corresponding to the list of aromatic rings and list of bonds, respectively. Method adapted from: Hansen, P.; Zheng, M. The Clar Number of a Benzenoid Hydrocarbon and Linear Programming. J. Math. Chem. 1994, 15 (1), 93–107. """ - cython.declare(molecule=Graph, aromatic_rings=list, exo=list, l=cython.int, m=cython.int, n=cython.int, - a=list, objective=list, status=cython.int, solution=list, innerSolutions=list) - - from lpsolve55 import lpsolve + cython.declare(molecule=Graph, aromatic_rings=list, exo=list, n_rings=cython.int, n_atoms=cython.int, n_bonds=cython.int, + A=list, solutions=tuple) # Make a copy of the molecule so we don't destroy the original molecule = mol.copy(deep=True) @@ -1010,61 +1010,81 @@ def _clar_optimization(mol, constraints=None, max_num=None, save_order=False): exo.append(None) # Dimensions - l = len(aromatic_rings) - m = len(atoms) - n = l + len(bonds) + n_ring = len(aromatic_rings) + n_atom = len(atoms) + n_bond = len(bonds) + + # The aromaticity assignment problem is formulated as an MILP problem + # minimize: + # c @ x + # such that + # b_l <= A @ x <= b_u + # l <= x <= u + # x are integers # Connectivity matrix which indicates which rings and bonds each atom is in - # Part of equality constraint Ax=b - a = [] + # Part of equality constraint A_eq @ x = b_eq + A = [] for atom in atoms: in_ring = [1 if atom in ring else 0 for ring in aromatic_rings] in_bond = [1 if atom in [bond.atom1, bond.atom2] else 0 for bond in bonds] - a.append(in_ring + in_bond) + A.append(in_ring + in_bond) + constraints = [LinearConstraint( + A=np.array(A, dtype=int), lb=np.ones(n_atom, dtype=int), ub=np.ones(n_atom, dtype=int) + )] # Objective vector for optimization: sextets have a weight of 1, double bonds have a weight of 0 - objective = [1] * l + [0] * len(bonds) - - # Solve LP problem using lpsolve - lp = lpsolve('make_lp', m, n) # initialize lp with constraint matrix with m rows and n columns - lpsolve('set_verbose', lp, 2) # reduce messages from lpsolve - lpsolve('set_obj_fn', lp, objective) # set objective function - lpsolve('set_maxim', lp) # set solver to maximize objective - lpsolve('set_mat', lp, a) # set left hand side to constraint matrix - lpsolve('set_rh_vec', lp, [1] * m) # set right hand side to 1 for all constraints - for i in range(m): # set all constraints as equality constraints - lpsolve('set_constr_type', lp, i + 1, '=') - lpsolve('set_binary', lp, [True] * n) # set all variables to be binary - - # Constrain values of exocyclic bonds, since we don't want to modify them - for i in range(l, n): - if exo[i - l] is not None: - # NOTE: lpsolve indexes from 1, so the variable we're changing should be i + 1 - lpsolve('set_bounds', lp, i + 1, exo[i - l], exo[i - l]) - - # Add constraints to problem if provided - if constraints is not None: - for constraint in constraints: - try: - lpsolve('add_constraint', lp, constraint[0], '<=', constraint[1]) - except Exception as e: - logging.debug('Unable to add constraint: {0} <= {1}'.format(constraint[0], constraint[1])) - logging.debug(mol.to_adjacency_list()) - if str(e) == 'invalid vector.': - raise ILPSolutionError('Unable to add constraint, likely due to ' - 'inconsistent aromatic ring perception.') - else: - raise + c = - np.array([1] * n_ring + [0] * n_bond, dtype=int) + + # Variable bounds + bounds = Bounds( + lb=np.array( + [0] * n_ring + [1 if val == 1 else 0 for val in exo] + dtype=int, + ), # lower bounds: 0 except for exo double bonds + ub=np.array( + [1] * n_ring + [0 if val == 0 else 1 for val in exo] + dtype=int, + ), # upper bounds: 1 except for exo single bonds + ) + + solutions = _solve_clar_milp(c, bounds, constraints, n_ring) + + return aromatic_rings, bonds, solutions + + +def _solve_clar_milp( + c, + bounds, + constraints, + n_ring, + max_num=None, +): + """ + A helpful function to solve the formulated clar optimization MILP problem. ``c``, + ``bounds``, and ``constraints`` are computed in _clar_optimization and follow the + definition of their corresponding kwargs in scipy.optimize.milp. ``n_ring`` is the + number of aromatic rings in the molecule. ``max_num`` is the maximum number of sextets + that can be found. If ``max_num`` is None for first run but will be updated during the + recursion and is used to check sub-optimal solution. + """ + # To modify + cython.declare(inner_solutions=list) - status = lpsolve('solve', lp) - obj_val, solution = lpsolve('get_solution', lp)[0:2] - lpsolve('delete_lp', lp) # Delete the LP problem to clear up memory + result = milp( + c=-c, # negative for maximization + integrality=1, + bounds=bounds, + constraints=constraints, + options={'time_limit': 10}, + ) - # Check that optimization was successful - if status != 0: - raise ILPSolutionError('Optimization could not find a valid solution.') + if result.status != 0: + raise RuntimeError("Optimization could not find a valid solution.") - # Check that we the result contains at least one aromatic sextet + obj_val, solution = -result.fun, result.x + + # Check that the result contains at least one aromatic sextet if obj_val == 0: return [] @@ -1072,27 +1092,27 @@ def _clar_optimization(mol, constraints=None, max_num=None, save_order=False): if max_num is None: max_num = obj_val # This is the first solution, so the result should be an upper limit elif obj_val < max_num: - raise ILPSolutionError('Optimization obtained a sub-optimal solution.') + raise RuntimeError("Optimization obtained a sub-optimal solution.") if any([x != 1 and x != 0 for x in solution]): - raise ILPSolutionError('Optimization obtained a non-integer solution.') + raise RuntimeError('Optimization obtained a non-integer solution.') # Generate constraints based on the solution obtained - y = solution[0:l] - new_a = y + [0] * len(bonds) - new_b = sum(y) - 1 - if constraints is not None: - constraints.append((new_a, new_b)) - else: - constraints = [(new_a, new_b)] + y = solution[0: n_ring] + constraints.append( + LinearConstraint( + A=np.hstack([y, [0] * (solution.shape[0] - n_ring)]), + ub=sum(y) - 1, + ), + ) # Run optimization with additional constraints try: - inner_solutions = _clar_optimization(mol, constraints=constraints, max_num=max_num, save_order=save_order) - except ILPSolutionError: + inner_solutions = _solve_clar_milp(c, bounds, constraints, n_ring, max_num) + except RuntimeError: inner_solutions = [] - return inner_solutions + [(molecule, aromatic_rings, bonds, solution)] + return inner_solutions + [solution] def _clar_transformation(mol, aromatic_ring): From fbf14e6ba1e0b5321c5647072cec0cf53acc1d9c Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 4 Mar 2024 23:38:07 -0500 Subject: [PATCH 2/5] Update pxd of clar optimization after reimplement with scipy MILP --- rmgpy/molecule/resonance.pxd | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rmgpy/molecule/resonance.pxd b/rmgpy/molecule/resonance.pxd index e3b01db841e..fdd481b4c0e 100644 --- a/rmgpy/molecule/resonance.pxd +++ b/rmgpy/molecule/resonance.pxd @@ -25,6 +25,8 @@ # # ############################################################################### +cimport numpy as cnp + from rmgpy.molecule.graph cimport Vertex, Edge, Graph from rmgpy.molecule.molecule cimport Atom, Bond, Molecule @@ -60,6 +62,8 @@ cpdef list generate_kekule_structure(Graph mol) cpdef list generate_clar_structures(Graph mol, bint save_order=?) -cpdef list _clar_optimization(Graph mol, list constraints=?, max_num=?, save_order=?) +cpdef list _clar_optimization(Graph mol, bint save_order=?) + +cpdef list _solve_clar_milp(cnp.ndarray[cnp.int_t, ndim=1] c, bounds, list constraints, int n_ring, max_num=?) cpdef list _clar_transformation(Graph mol, list aromatic_ring) From c63abb840a4b40ba91b816d8e326b124b529e042 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 4 Mar 2024 23:38:35 -0500 Subject: [PATCH 3/5] Remove the redundant ILPSolutionError --- rmgpy/exceptions.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/rmgpy/exceptions.py b/rmgpy/exceptions.py index 7b4420cf66a..f2e7dae4602 100644 --- a/rmgpy/exceptions.py +++ b/rmgpy/exceptions.py @@ -109,15 +109,6 @@ class ForbiddenStructureException(Exception): pass -class ILPSolutionError(Exception): - """ - An exception to be raised when solving an integer linear programming problem if a solution - could not be found or the solution is not valid. Can pass a string to indicate the reason - that the solution is invalid. - """ - pass - - class ImplicitBenzeneError(Exception): """ An exception class when encountering a group with too many implicit benzene From f15200abf7c8bbc0de9f093628260651eaec082b Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Tue, 5 Mar 2024 00:39:43 -0500 Subject: [PATCH 4/5] Fix typos in resonance clar optimization --- rmgpy/molecule/resonance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rmgpy/molecule/resonance.py b/rmgpy/molecule/resonance.py index bdab0999bea..1f0a275c3ed 100644 --- a/rmgpy/molecule/resonance.py +++ b/rmgpy/molecule/resonance.py @@ -1039,11 +1039,11 @@ def _clar_optimization(mol, save_order=False): # Variable bounds bounds = Bounds( lb=np.array( - [0] * n_ring + [1 if val == 1 else 0 for val in exo] + [0] * n_ring + [1 if val == 1 else 0 for val in exo], dtype=int, ), # lower bounds: 0 except for exo double bonds ub=np.array( - [1] * n_ring + [0 if val == 0 else 1 for val in exo] + [1] * n_ring + [0 if val == 0 else 1 for val in exo], dtype=int, ), # upper bounds: 1 except for exo single bonds ) From e3c8ca87b5cc69dab21217104ad898dde34ae6ea Mon Sep 17 00:00:00 2001 From: Xiaorui Dong <35771233+xiaoruiDong@users.noreply.github.com> Date: Tue, 5 Mar 2024 10:27:15 -0500 Subject: [PATCH 5/5] Fix list style in clar optimization Co-authored-by: Hao-Wei Pang <45482070+hwpang@users.noreply.github.com> --- rmgpy/molecule/resonance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmgpy/molecule/resonance.py b/rmgpy/molecule/resonance.py index 1f0a275c3ed..9b7ec30e73e 100644 --- a/rmgpy/molecule/resonance.py +++ b/rmgpy/molecule/resonance.py @@ -1098,7 +1098,7 @@ def _solve_clar_milp( raise RuntimeError('Optimization obtained a non-integer solution.') # Generate constraints based on the solution obtained - y = solution[0: n_ring] + y = solution[:n_ring] constraints.append( LinearConstraint( A=np.hstack([y, [0] * (solution.shape[0] - n_ring)]),