Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Lint with Ruff
on:
push:
branches:
- main
- master
pull_request:
jobs:
lint:
Expand Down
5 changes: 3 additions & 2 deletions examples/example_deco.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import numpy as np
from qstack import compound, fields
from qstack.fields import moments
from qstack.fields.decomposition import decompose, correct_N, correct_N_atomic
from qstack.fields.density2file import coeffs_to_cube, coeffs_to_molden

Expand All @@ -10,7 +11,7 @@
auxmol, c = decompose(mol, dm, 'cc-pvqz jkfit')
print("Expansion Coefficients:", c)

N = fields.decomposition.number_of_electrons_deco(auxmol, c)
N = moments.r2_c(auxmol, c, moments=[0])[0]

print("Number of electrons after decomposition: ", N)

Expand All @@ -21,7 +22,7 @@
print('density saved to H2O.molden')

c = correct_N(auxmol, c)
N = fields.decomposition.number_of_electrons_deco(auxmol, c)
N = moments.r2_c(auxmol, c, moments=[0])[0]
print(N)


Expand Down
2 changes: 2 additions & 0 deletions qstack/basis_opt/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
"""Basis set optimization module."""

from . import opt
from . import basis_tools
51 changes: 35 additions & 16 deletions qstack/basis_opt/basis_tools.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
"""Utility functions for basis set manipulation."""

import copy
import numpy as np
from pyscf import df, dft


def energy_mol(newbasis, moldata):
"""Computes overlap and 2-/3-centers ERI matrices.
"""Compute loss function (fitting error) for one molecule.

Args:
mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix.
auxmol (pyscf Mole): pyscf Mole object holding molecular structure, composition and the auxiliary basis set.
newbasis (dict): Basis set.
moldata (dict): Dictionary containing molecular data.

Returns:
numpy ndarray: Overlap matrix, 2-centers and 3-centers ERI matrices.

float: Loss function value for the given basis.
"""
mol = moldata['mol' ]
rho = moldata['rho' ]
Expand All @@ -31,16 +32,18 @@ def energy_mol(newbasis, moldata):


def gradient_mol(nexp, newbasis, moldata):
"""
"""Compute loss function and gradient for one molecule.

Args:
nexp():
newbasis():
moldata(pyscf Mole): pyscf Mole object holding molecular structure, composition and the auxiliary basis set
nexp (int): Number of exponents.
newbasis (dict): Basis set.
moldata (dict): Dictionary containing molecular data.

Returns:
tuple: A tuple containing:
- E (float): Loss function value.
- dE_da (numpy.ndarray): Gradient of loss function with respect to exponents.
"""

mol = moldata['mol' ]
rho = moldata['rho' ]
coords = moldata['coords' ]
Expand Down Expand Up @@ -89,15 +92,15 @@ def gradient_mol(nexp, newbasis, moldata):


def exp2basis(exponents, elements, basis):
"""
"""Convert exponents array to basis set format.

Argas:
exponents():
elements():
basis():
Args:
exponents (numpy.ndarray): Array of basis function exponents.
elements (list): List of elements for which change the basis.
basis (dict): Template basis set definition.

Returns:
newbasis():
dict: New basis set with updated exponents.
"""
i = 0
newbasis = copy.deepcopy(basis)
Expand All @@ -109,6 +112,16 @@ def exp2basis(exponents, elements, basis):


def cut_myelements(x, myelements, bf_bounds):
"""Extract subset of array corresponding to specified elements.

Args:
x (numpy.ndarray): Input array.
myelements (list): List of element symbols to extract.
bf_bounds (dict): Dictionary mapping elements to their basis set bound indices.

Returns:
numpy.ndarray: Array containing x only for specified elements.
"""
x1 = []
for q in myelements:
bounds = bf_bounds[q]
Expand All @@ -118,6 +131,12 @@ def cut_myelements(x, myelements, bf_bounds):


def printbasis(basis, f):
"""Print basis set in JSON-like format to file.

Args:
basis (dict): Basis set definition.
f (file): File object to write to.
"""
print('{', file=f)
for q, b in basis.items():
print(' "'+q+'": [', file=f)
Expand Down
98 changes: 79 additions & 19 deletions qstack/basis_opt/opt.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,41 @@
"""Basis set optimization routines and command-line interface."""

import sys
import argparse
from ast import literal_eval
import numpy as np
import scipy.optimize
from pyscf import gto
import pyscf.data
from ..compound import basis_flatten
from . import basis_tools as qbbt


def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in="CG", printlvl=2, check=False):
""" Optimize a given basis set.
"""Optimize a given basis set.

Args:
elements_in (str):
basis_in (str or dict): Basis set
molecules_in (dict): which contains the cartesian coordinates of the molecule (string) with the key 'atom', the uncorrelated on-top pair density on a grid (numpy array) with the key 'rho', the grid coordinates (numpy array) with the key 'coords', and the grid weights (numpy array) with the key 'weight'.
elements_in (str): List of elements to optimize. If None, optimize all elements in the basis.
basis_in (list): List of files paths (str) or dicts containing basis set(s).
molecules_in (list): List of file paths (str) or dicts containing molecular data.
gtol_in (float): Gradient norm must be less than gtol_in before successful termination (minimization).
method_in (str): Type of solver. Check scipy.optimize.minimize for full documentation.
printlvl (int):
check (bool):
printlvl (int): Level of printing during optimization (0: none, 1: final basis, 2: detailed).
check (bool): If True, compute and return both analytical and numerical gradients without optimization.

Returns:
Dictionary containing the optimized basis.

"""
def energy(x):
"""Compute total loss function (fitting error) for given exponents.

Args:
x (numpy.ndarray): Log of exponents.

def energy(x):
Returns:
float: Loss function value.
"""
exponents = np.exp(x)
newbasis = qbbt.exp2basis(exponents, myelements, basis)
E = 0.0
Expand All @@ -34,6 +44,16 @@ def energy(x):
return E

def gradient(x):
"""Compute total loss function (fitting error) and gradient for given exponents.

Args:
x (numpy.ndarray): Log of exponents.

Returns:
tuple: A tuple containing:
- E (float): Loss function value.
- dE_dx (numpy.ndarray): Gradient with respect to log(exponents).
"""
exponents = np.exp(x)
newbasis = qbbt.exp2basis(exponents, myelements, basis)

Expand All @@ -56,9 +76,28 @@ def gradient(x):
return E, dE_dx

def gradient_only(x):
"""Compute only the gradient of the loss function (wrapper for optimization algorithms).

Args:
x (numpy.ndarray): Log of exponents.

Returns:
numpy.ndarray: Gradient with respect to log(exponents).
"""
return gradient(x)[1]

def read_bases(basis_files):
"""Read basis set definitions from files or dicts.

Args:
basis_files (list): List of file paths (str) or basis dicts.

Returns:
dict: Combined basis set definition.

Raises:
RuntimeError: If multiple sets for the same element are provided.
"""
basis = {}
for i in basis_files:
if isinstance(i, str):
Expand All @@ -76,6 +115,11 @@ def read_bases(basis_files):
return basis

def make_bf_start():
"""Create basis function index bounds for each element.

Returns:
dict: Dictionary mapping elements to their [start, end] indices.
"""
nbf = [len(basis[q]) for q in elements]
bf_bounds = {}
for i, q in enumerate(elements):
Expand All @@ -84,6 +128,23 @@ def make_bf_start():
return bf_bounds

def make_moldata(fname):
"""Create molecular data dictionary from file or dict.

Args:
fname (str or dict): Path to .npz file or dictionary containing molecular structure,
grid coordinates and weights, and reference density evaluated on it.

Returns:
dict: Dictionary containing:
mol (pyscf Mole): pyscf Mole object.
rho (numpy.ndarray): Reference density values on the grid.
coords (numpy.ndarray): Grid coordinates.
weights (numpy.ndarray): Grid weights.
self (float): Integral of the squared reference density.
idx (numpy.ndarray): Basis function indices for each AO.
centers (list): Atomic center indices for each AO.
distances (numpy.ndarray): Squared distances from each atom to each grid point.
"""
if isinstance(fname, str):
rho_data = np.load(fname)
else:
Expand All @@ -96,16 +157,15 @@ def make_moldata(fname):
self = np.einsum('p,p,p->', weights, rho, rho)
mol = gto.M(atom=str(molecule), basis=basis)

idx = []
centers = []
for iat in range(mol.natm):
q = mol._atom[iat][0]
ib0 = bf_bounds[q][0]
for ib, b in enumerate(mol._basis[q]):
l = b[0]
idx += [ib+ib0] * (2*l+1)
centers += [iat] * (2*l+1)
idx = np.array(idx)
centers, l, _ = basis_flatten(mol, return_both=False)
idx = np.zeros_like(centers)
i = 0
while i < mol.nao:
q = mol.atom_symbol(centers[i])
for ib in range(*bf_bounds[q]):
msize = 2*l[i]+1
idx[i:i+msize] = [ib] * msize
i += msize

distances = np.zeros((mol.natm, len(rho)))
for iat in range(mol.natm):
Expand Down Expand Up @@ -166,9 +226,9 @@ def make_moldata(fname):

return newbasis

def main():
import argparse

def main():
"""Run basis set optimization via command-line interface."""
parser = argparse.ArgumentParser(description='Optimize a density fitting basis set.')
parser.add_argument('--elements', type=str, dest='elements', nargs='+', help='elements for optimization')
parser.add_argument('--basis', type=str, dest='basis', nargs='+', help='initial df bases', required=True)
Expand Down
60 changes: 57 additions & 3 deletions qstack/c2mio.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Converter from cell2mol Cell objects to PySCF Mole."""

import sys
import os
import io
Expand All @@ -7,15 +9,44 @@


def get_cell2mol_xyz(mol):
"""Extract XYZ coordinates, charge, and spin from a cell2mol object.

Args:
mol: cell2mol molecule or ligand object.

Returns:
tuple: A tuple containing:
- xyz (str): XYZ coordinate string.
- charge (int): Total charge of the molecule.
- spin (int): Number of unpaired electrons of the molecule (multiplicity - 1)
for molecules and None for ligands.
"""
f = io.StringIO()
sys.stdout, stdout = f, sys.stdout
mol.print_xyz()
xyz, sys.stdout = f.getvalue(), stdout
f.close()
return xyz, mol.totcharge, (mol.get_spin()-1 if hasattr(mol, 'get_spin') else 0)
return xyz, mol.totcharge, (mol.get_spin()-1 if hasattr(mol, 'get_spin') else None)


def get_cell(fpath, workdir='.'):
"""Load a unit cell from a .cell or .cif file.

If a .cif file is provided, the function checks for a corresponding .cell file
in the working directory. If it exists, it loads the .cell file; otherwise, it
calls cell2mol to process the .cif file to generate the unit cell.

Args:
fpath (str): Path to the input file (.cell or .cif).
workdir (str): Directory to read / write .cell file and logs if a .cif file
is provided. Defaults to '.'.

Returns:
cell2mol.unitcell: Unit cell object.

Raises:
NotImplementedError: If the file extension is not .cell or .cif.
"""
ext = os.path.splitext(fpath)[-1]
if ext=='.cell':
cell = load_binary(fpath)
Expand All @@ -32,12 +63,35 @@ def get_cell(fpath, workdir='.'):


def get_mol(cell, mol_idx=0, basis='minao', ecp=None):
"""Convert a molecule in a cell2mol unit cell object to a pyscf Mole object.

Args:
cell: cell2mol unit cell object.
mol_idx (int): Index of the molecule in the cell. Defaults to 0.
basis (str or dict): Basis set. Defaults to 'minao'.
ecp (str): Effective core potential. Defaults to None.

Returns:
pyscf.gto.Mole: pyscf Mole object for the molecule.
"""
mol = cell.moleclist[mol_idx]
xyz, charge, spin = get_cell2mol_xyz(mol)
return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp, read_string=True)
return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp)


def get_ligand(cell, mol_idx=0, lig_idx=0, basis='minao', ecp=None):
"""Convert a ligand in a cell2mol unit cell object to a pyscf Mole object.

Args:
cell: cell2mol unit cell object.
mol_idx (int): Index of the molecule in the cell. Defaults to 0.
lig_idx (int): Index of the ligand in the molecule. Defaults to 0.
basis (str or dict): Basis set. Defaults to 'minao'.
ecp (str): Effective core potential. Defaults to None.

Returns:
pyscf.gto.Mole: pyscf Mole object for the ligand.
"""
mol = cell.moleclist[mol_idx].ligands[lig_idx]
xyz, charge, spin = get_cell2mol_xyz(mol)
return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp, read_string=True)
return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp)
Loading
Loading