diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 89cb33f6..e063dd51 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -3,7 +3,7 @@ name: Lint with Ruff on: push: branches: - - main + - master pull_request: jobs: lint: diff --git a/examples/example_deco.py b/examples/example_deco.py index 2f77d542..cbedb85e 100644 --- a/examples/example_deco.py +++ b/examples/example_deco.py @@ -1,6 +1,7 @@ import os import numpy as np from qstack import compound, fields +from qstack.fields import moments from qstack.fields.decomposition import decompose, correct_N, correct_N_atomic from qstack.fields.density2file import coeffs_to_cube, coeffs_to_molden @@ -10,7 +11,7 @@ auxmol, c = decompose(mol, dm, 'cc-pvqz jkfit') print("Expansion Coefficients:", c) -N = fields.decomposition.number_of_electrons_deco(auxmol, c) +N = moments.r2_c(auxmol, c, moments=[0])[0] print("Number of electrons after decomposition: ", N) @@ -21,7 +22,7 @@ print('density saved to H2O.molden') c = correct_N(auxmol, c) -N = fields.decomposition.number_of_electrons_deco(auxmol, c) +N = moments.r2_c(auxmol, c, moments=[0])[0] print(N) diff --git a/qstack/basis_opt/__init__.py b/qstack/basis_opt/__init__.py index 7f71607c..85a9221e 100644 --- a/qstack/basis_opt/__init__.py +++ b/qstack/basis_opt/__init__.py @@ -1,2 +1,4 @@ +"""Basis set optimization module.""" + from . import opt from . import basis_tools diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py index c6481f76..5f6fcaef 100644 --- a/qstack/basis_opt/basis_tools.py +++ b/qstack/basis_opt/basis_tools.py @@ -1,18 +1,19 @@ +"""Utility functions for basis set manipulation.""" + import copy import numpy as np from pyscf import df, dft def energy_mol(newbasis, moldata): - """Computes overlap and 2-/3-centers ERI matrices. + """Compute loss function (fitting error) for one molecule. Args: - mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix. - auxmol (pyscf Mole): pyscf Mole object holding molecular structure, composition and the auxiliary basis set. + newbasis (dict): Basis set. + moldata (dict): Dictionary containing molecular data. Returns: - numpy ndarray: Overlap matrix, 2-centers and 3-centers ERI matrices. - + float: Loss function value for the given basis. """ mol = moldata['mol' ] rho = moldata['rho' ] @@ -31,16 +32,18 @@ def energy_mol(newbasis, moldata): def gradient_mol(nexp, newbasis, moldata): - """ + """Compute loss function and gradient for one molecule. Args: - nexp(): - newbasis(): - moldata(pyscf Mole): pyscf Mole object holding molecular structure, composition and the auxiliary basis set + nexp (int): Number of exponents. + newbasis (dict): Basis set. + moldata (dict): Dictionary containing molecular data. Returns: + tuple: A tuple containing: + - E (float): Loss function value. + - dE_da (numpy.ndarray): Gradient of loss function with respect to exponents. """ - mol = moldata['mol' ] rho = moldata['rho' ] coords = moldata['coords' ] @@ -89,15 +92,15 @@ def gradient_mol(nexp, newbasis, moldata): def exp2basis(exponents, elements, basis): - """ + """Convert exponents array to basis set format. - Argas: - exponents(): - elements(): - basis(): + Args: + exponents (numpy.ndarray): Array of basis function exponents. + elements (list): List of elements for which change the basis. + basis (dict): Template basis set definition. Returns: - newbasis(): + dict: New basis set with updated exponents. """ i = 0 newbasis = copy.deepcopy(basis) @@ -109,6 +112,16 @@ def exp2basis(exponents, elements, basis): def cut_myelements(x, myelements, bf_bounds): + """Extract subset of array corresponding to specified elements. + + Args: + x (numpy.ndarray): Input array. + myelements (list): List of element symbols to extract. + bf_bounds (dict): Dictionary mapping elements to their basis set bound indices. + + Returns: + numpy.ndarray: Array containing x only for specified elements. + """ x1 = [] for q in myelements: bounds = bf_bounds[q] @@ -118,6 +131,12 @@ def cut_myelements(x, myelements, bf_bounds): def printbasis(basis, f): + """Print basis set in JSON-like format to file. + + Args: + basis (dict): Basis set definition. + f (file): File object to write to. + """ print('{', file=f) for q, b in basis.items(): print(' "'+q+'": [', file=f) diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index f6fc7d53..75f0d443 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -1,31 +1,41 @@ +"""Basis set optimization routines and command-line interface.""" + import sys +import argparse from ast import literal_eval import numpy as np import scipy.optimize from pyscf import gto import pyscf.data +from ..compound import basis_flatten from . import basis_tools as qbbt def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in="CG", printlvl=2, check=False): - """ Optimize a given basis set. + """Optimize a given basis set. Args: - elements_in (str): - basis_in (str or dict): Basis set - molecules_in (dict): which contains the cartesian coordinates of the molecule (string) with the key 'atom', the uncorrelated on-top pair density on a grid (numpy array) with the key 'rho', the grid coordinates (numpy array) with the key 'coords', and the grid weights (numpy array) with the key 'weight'. + elements_in (str): List of elements to optimize. If None, optimize all elements in the basis. + basis_in (list): List of files paths (str) or dicts containing basis set(s). + molecules_in (list): List of file paths (str) or dicts containing molecular data. gtol_in (float): Gradient norm must be less than gtol_in before successful termination (minimization). method_in (str): Type of solver. Check scipy.optimize.minimize for full documentation. - printlvl (int): - check (bool): + printlvl (int): Level of printing during optimization (0: none, 1: final basis, 2: detailed). + check (bool): If True, compute and return both analytical and numerical gradients without optimization. Returns: Dictionary containing the optimized basis. """ + def energy(x): + """Compute total loss function (fitting error) for given exponents. + Args: + x (numpy.ndarray): Log of exponents. - def energy(x): + Returns: + float: Loss function value. + """ exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) E = 0.0 @@ -34,6 +44,16 @@ def energy(x): return E def gradient(x): + """Compute total loss function (fitting error) and gradient for given exponents. + + Args: + x (numpy.ndarray): Log of exponents. + + Returns: + tuple: A tuple containing: + - E (float): Loss function value. + - dE_dx (numpy.ndarray): Gradient with respect to log(exponents). + """ exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) @@ -56,9 +76,28 @@ def gradient(x): return E, dE_dx def gradient_only(x): + """Compute only the gradient of the loss function (wrapper for optimization algorithms). + + Args: + x (numpy.ndarray): Log of exponents. + + Returns: + numpy.ndarray: Gradient with respect to log(exponents). + """ return gradient(x)[1] def read_bases(basis_files): + """Read basis set definitions from files or dicts. + + Args: + basis_files (list): List of file paths (str) or basis dicts. + + Returns: + dict: Combined basis set definition. + + Raises: + RuntimeError: If multiple sets for the same element are provided. + """ basis = {} for i in basis_files: if isinstance(i, str): @@ -76,6 +115,11 @@ def read_bases(basis_files): return basis def make_bf_start(): + """Create basis function index bounds for each element. + + Returns: + dict: Dictionary mapping elements to their [start, end] indices. + """ nbf = [len(basis[q]) for q in elements] bf_bounds = {} for i, q in enumerate(elements): @@ -84,6 +128,23 @@ def make_bf_start(): return bf_bounds def make_moldata(fname): + """Create molecular data dictionary from file or dict. + + Args: + fname (str or dict): Path to .npz file or dictionary containing molecular structure, + grid coordinates and weights, and reference density evaluated on it. + + Returns: + dict: Dictionary containing: + mol (pyscf Mole): pyscf Mole object. + rho (numpy.ndarray): Reference density values on the grid. + coords (numpy.ndarray): Grid coordinates. + weights (numpy.ndarray): Grid weights. + self (float): Integral of the squared reference density. + idx (numpy.ndarray): Basis function indices for each AO. + centers (list): Atomic center indices for each AO. + distances (numpy.ndarray): Squared distances from each atom to each grid point. + """ if isinstance(fname, str): rho_data = np.load(fname) else: @@ -96,16 +157,15 @@ def make_moldata(fname): self = np.einsum('p,p,p->', weights, rho, rho) mol = gto.M(atom=str(molecule), basis=basis) - idx = [] - centers = [] - for iat in range(mol.natm): - q = mol._atom[iat][0] - ib0 = bf_bounds[q][0] - for ib, b in enumerate(mol._basis[q]): - l = b[0] - idx += [ib+ib0] * (2*l+1) - centers += [iat] * (2*l+1) - idx = np.array(idx) + centers, l, _ = basis_flatten(mol, return_both=False) + idx = np.zeros_like(centers) + i = 0 + while i < mol.nao: + q = mol.atom_symbol(centers[i]) + for ib in range(*bf_bounds[q]): + msize = 2*l[i]+1 + idx[i:i+msize] = [ib] * msize + i += msize distances = np.zeros((mol.natm, len(rho))) for iat in range(mol.natm): @@ -166,9 +226,9 @@ def make_moldata(fname): return newbasis -def main(): - import argparse +def main(): + """Run basis set optimization via command-line interface.""" parser = argparse.ArgumentParser(description='Optimize a density fitting basis set.') parser.add_argument('--elements', type=str, dest='elements', nargs='+', help='elements for optimization') parser.add_argument('--basis', type=str, dest='basis', nargs='+', help='initial df bases', required=True) diff --git a/qstack/c2mio.py b/qstack/c2mio.py index e8fbc0e5..1c0dbc5b 100644 --- a/qstack/c2mio.py +++ b/qstack/c2mio.py @@ -1,3 +1,5 @@ +"""Converter from cell2mol Cell objects to PySCF Mole.""" + import sys import os import io @@ -7,15 +9,44 @@ def get_cell2mol_xyz(mol): + """Extract XYZ coordinates, charge, and spin from a cell2mol object. + + Args: + mol: cell2mol molecule or ligand object. + + Returns: + tuple: A tuple containing: + - xyz (str): XYZ coordinate string. + - charge (int): Total charge of the molecule. + - spin (int): Number of unpaired electrons of the molecule (multiplicity - 1) + for molecules and None for ligands. + """ f = io.StringIO() sys.stdout, stdout = f, sys.stdout mol.print_xyz() xyz, sys.stdout = f.getvalue(), stdout f.close() - return xyz, mol.totcharge, (mol.get_spin()-1 if hasattr(mol, 'get_spin') else 0) + return xyz, mol.totcharge, (mol.get_spin()-1 if hasattr(mol, 'get_spin') else None) def get_cell(fpath, workdir='.'): + """Load a unit cell from a .cell or .cif file. + + If a .cif file is provided, the function checks for a corresponding .cell file + in the working directory. If it exists, it loads the .cell file; otherwise, it + calls cell2mol to process the .cif file to generate the unit cell. + + Args: + fpath (str): Path to the input file (.cell or .cif). + workdir (str): Directory to read / write .cell file and logs if a .cif file + is provided. Defaults to '.'. + + Returns: + cell2mol.unitcell: Unit cell object. + + Raises: + NotImplementedError: If the file extension is not .cell or .cif. + """ ext = os.path.splitext(fpath)[-1] if ext=='.cell': cell = load_binary(fpath) @@ -32,12 +63,35 @@ def get_cell(fpath, workdir='.'): def get_mol(cell, mol_idx=0, basis='minao', ecp=None): + """Convert a molecule in a cell2mol unit cell object to a pyscf Mole object. + + Args: + cell: cell2mol unit cell object. + mol_idx (int): Index of the molecule in the cell. Defaults to 0. + basis (str or dict): Basis set. Defaults to 'minao'. + ecp (str): Effective core potential. Defaults to None. + + Returns: + pyscf.gto.Mole: pyscf Mole object for the molecule. + """ mol = cell.moleclist[mol_idx] xyz, charge, spin = get_cell2mol_xyz(mol) - return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp, read_string=True) + return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp) def get_ligand(cell, mol_idx=0, lig_idx=0, basis='minao', ecp=None): + """Convert a ligand in a cell2mol unit cell object to a pyscf Mole object. + + Args: + cell: cell2mol unit cell object. + mol_idx (int): Index of the molecule in the cell. Defaults to 0. + lig_idx (int): Index of the ligand in the molecule. Defaults to 0. + basis (str or dict): Basis set. Defaults to 'minao'. + ecp (str): Effective core potential. Defaults to None. + + Returns: + pyscf.gto.Mole: pyscf Mole object for the ligand. + """ mol = cell.moleclist[mol_idx].ligands[lig_idx] xyz, charge, spin = get_cell2mol_xyz(mol) - return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp, read_string=True) + return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp) diff --git a/qstack/compound.py b/qstack/compound.py index 1019cc7f..2c6470de 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -1,6 +1,4 @@ -""" -Module containing all the operations to load, transform, and save molecular objects. -""" +"""Molecular structure parsing and manipulation.""" import json import re @@ -8,12 +6,14 @@ import numpy as np from pyscf import gto, data from qstack import constants -from qstack.tools import rotate_euler +from qstack.reorder import get_mrange +from qstack.mathutils.array import stack_padding +from qstack.mathutils.rotation_matrix import rotate_euler +from qstack.tools import Cursor # detects a charge-spin line, containing only two ints (one positive or negative, the other positive and nonzero) _re_spincharge = re.compile(r'(?P[-+]?[0-9]+)\s+(?P[1-9][0-9]*)') - # fetches a single key=value or key:value pair, then matches a full line, for space-separated pairs _re_singlekey = re.compile(r'\s*(?P\w+)[=:](?P[^\s,]+)\s*') _re_keyline = re.compile(r'\s*(\w+[=:][^\s,]+\s+)*(\w+[=:][^\s,]+)\s*') @@ -25,21 +25,29 @@ # matches a floating-point number in any format python reads _re_float = re.compile(r'[+-]?[0-9]*?([0-9]\.|\.[0-9]|[0-9])[0-9]*?([eEdD][+-]?[0-9]+)?') + def xyz_comment_line_parser(line): - """reads the 'comment' line of a XYZ file, and tries to infer its meaning""" + """Read the 'comment' line of a XYZ file and tries to infer its meaning. + + Args: + line (str): Comment line from XYZ file. + + Returns: + dict: Dictionary containing parsed properties (charge, spin, etc.). + """ line = line.strip() if line == '': return {} elif _re_spincharge.fullmatch(line): # possibility 1: the line only has charge and spin multiplicity + # note: this skips the futher processing matcher = _re_spincharge.fullmatch(line) spinmult = int(matcher.group('spinmult')) charge = int(matcher.group('charge')) - # note: this skips the futher processing return {'charge':charge, 'spin':spinmult-1} elif _re_keyline.fullmatch(line): # possibility 2: space-separated key/value pairs - line_parts = line.split() # split across any whitespace + line_parts = line.split() part_matching = _re_singlekey props = {} elif _re_keyline2.fullmatch(line): @@ -76,50 +84,52 @@ def xyz_comment_line_parser(line): val = float(val) props[part_matcher.group('key')] = val - if 'spin' in props: # we want a difference in electons (alpha-beta), but we expect the file to contain a spin multiplicity props['spin'] = props['spin']-1 return props -def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit=None, ecp=None, parse_comment=False, read_string=False): - """Reads a molecular file in xyz format and returns a pyscf Mole object. + +def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit=None, ecp=None, parse_comment=False): + """Read a molecular file in xyz format and returns a pyscf Mole object. Args: - inp (str): path of the xyz file to read / xyz fine contents if read_string==True - basis (str or dict): Basis set. - charge (int): Provide/override charge of the molecule. - spin (int): Provide/override spin of the molecule (alpha electrons - beta electrons). - ignore (bool): If assume molecule closed-shell an assign charge either 0 or -1 - unit (str): Provide/override units (Ang or Bohr) - ecp (str) : ECP to use + inp (str): Path of the xyz file to read, or xyz file contents. + basis (str or dict): Basis set. Defaults to "def2-svp". + charge (int): Provide/override charge of the molecule. Defaults to None. + spin (int): Provide/override spin of the molecule (alpha electrons - beta electrons). Defaults to None. + ignore (bool): If True, assume molecule is closed-shell and assign charge either 0 or -1. Defaults to False. + unit (str): Provide/override units (Ang or Bohr). Defaults to None. + ecp (str): ECP to use. Defaults to None. + parse_comment (bool): Whether to parse the comment line for properties. Defaults to False. Returns: - A pyscf Mole object containing the molecule information. - """ + pyscf.gto.Mole: pyscf Mole object containing the molecule information. - if read_string: + Raises: + RuntimeError: If units are not recognized or if minao basis requires ECP for heavy atoms. + """ + if '\n' in inp: molxyz = gto.fromstring(inp) else: molxyz = gto.fromfile(inp) if parse_comment: - if read_string: + if '\n' in inp: comment_line = inp.split('\n')[1] else: with open(inp) as f: - _ = f.readline() - comment_line = f.readline() + _, comment_line = f.readline(), f.readline() props = xyz_comment_line_parser(comment_line) else: - props = [None] + props = {} - # Define attributes to the Mole object and build it mol = gto.Mole() mol.atom = molxyz mol.basis = basis + if ecp is not None: + mol.ecp = ecp - # Check the units for the pyscf driver if unit is not None: pass elif 'unit' in props: @@ -128,7 +138,7 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit unit = 'Angstrom' unit = unit.upper()[0] if unit not in ['B', 'A']: - raise RuntimeError("Unknown units (use Ängstrom or Bohr)") + raise RuntimeError("Unknown units (use A[ngstrom] or B[ohr])") mol.unit = unit if ignore: @@ -142,8 +152,6 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit elif 'charge' in props: mol.charge = props['charge'] else: - # no ignore, no charge/spin specified: - # let's hope we have a set of neutral, closed shell compounds! mol.charge = 0 if spin is not None: @@ -153,9 +161,6 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit else: mol.spin = 0 - if ecp is not None: - mol.ecp = ecp - mol.build() species_charges = [data.elements.charge(z) for z in mol.elements] if mol.basis == 'minao' and ecp is None and (np.array(species_charges) > 36).any(): @@ -165,51 +170,45 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit def mol_to_xyz(mol, fout, fmt="xyz"): - """Converts a pyscf Mole object into a molecular file in xyz format. + """Convert a pyscf Mole object into a molecular file in xyz format. Args: - pyscf Mole: pyscf Mole object. + mol (pyscf.gto.Mole): pyscf Mole object. fout (str): Name (including path) of the xyz file to write. + fmt (str): Output format. Defaults to "xyz". Returns: - A file in xyz format containing the charge, total spin and molecular coordinates. - """ + str: String containing the xyz formatted data. + Raises: + NotImplementedError: If fmt is not "xyz". + """ fmt = fmt.lower() + output = [] if fmt == "xyz": coords = mol.atom_coords() * constants.BOHR2ANGS - output = [] - if fmt == "xyz": - output.append(str(mol.natm)) - output.append(f"{mol.charge} {mol.multiplicity}") - - for i in range(mol.natm): - symb = mol.atom_pure_symbol(i) - x, y, z = coords[i] - output.append(f"{symb:4s} {x:14.5f} {y:14.5f} {z:14.5f}") - string = "\n".join(output) - + output.append(f"{mol.natm}\n{mol.charge} {mol.multiplicity}") + output.extend([f"{mol.atom_pure_symbol(i):4s} {r[0]:14.5f} {r[1]:14.5f} {r[2]:14.5f}" for i, r in enumerate(coords)]) + output = "\n".join(output) else: raise NotImplementedError with open(fout, "w") as f: - f.write(string) - f.write("\n") - return string + f.write(output+"\n") + return output def make_auxmol(mol, basis, copy_ecp=False): - """Builds an auxiliary Mole object given a basis set and a pyscf Mole object. + """Build an auxiliary Mole object given a basis set and a pyscf Mole object. Args: - mol (pyscf Mole): Original pyscf Mole object. + mol (pyscf.gto.Mole): Original pyscf Mole object. basis (str or dict): Basis set. + copy_ecp (bool): Whether to copy ECP from original molecule. Defaults to False. Returns: - An auxiliary pyscf Mole object. + pyscf.gto.Mole: Auxiliary pyscf Mole object. """ - - # Define attributes to the auxiliary Mole object and build it auxmol = gto.Mole() auxmol.atom = mol.atom auxmol.charge = mol.charge @@ -218,108 +217,110 @@ def make_auxmol(mol, basis, copy_ecp=False): if copy_ecp: auxmol.ecp = mol.ecp auxmol.build() - return auxmol def rotate_molecule(mol, a, b, g, rad=False): - """Rotate a molecule: transform nuclear coordinates given a set of Euler angles. + """Rotate a molecule: transform nuclear coordinates given a set of Cardan angles. Args: - mol (pyscf Mole): Original pyscf Mole object. + mol (pyscf.gto.Mole): Original pyscf Mole object. a (float): Alpha Euler angle. b (float): Beta Euler angle. g (float): Gamma Euler angle. - rad (bool) : Wheter the angles are in radians or not. - + rad (bool): Whether the angles are in radians. Defaults to False (degrees). Returns: - A pyscf Mole object with transformed coordinates. + pyscf.gto.Mole: pyscf Mole object with transformed coordinates. """ - - orig_coords = mol.atom_coords() - rotated_coords = orig_coords @ rotate_euler(a, b, g, rad) * constants.BOHR2ANGS - atom_types = mol.elements - + rotated_coords = mol.atom_coords() @ rotate_euler(a, b, g, rad) * constants.BOHR2ANGS rotated_mol = gto.Mole() - rotated_mol.atom = list(zip(atom_types, rotated_coords.tolist(), strict=True)) + rotated_mol.atom = [*zip(mol.elements, rotated_coords, strict=True)] rotated_mol.charge = mol.charge rotated_mol.spin = mol.spin rotated_mol.basis = mol.basis + rotated_mol.ecp = mol.ecp rotated_mol.build() - return rotated_mol - def fragments_read(frag_file): - """Loads fragement definition from a frag file. + """Load fragment definition from a file. Args: - frag_file (str): Name (including path) of the frag file to read. + frag_file (str): Path to the fragment file containing space-separated atom indices (1-based). Returns: - A list of arrays containing the fragments. + list: List of numpy arrays containing the fragment indices. """ with open(frag_file) as f: fragments = [np.fromstring(line, dtype=int, sep=' ')-1 for line in f] return fragments + def fragment_partitioning(fragments, prop_atom_inp, normalize=True): - """Computes the contribution of each fragment. + """Compute the contribution of each fragment. Args: - fragments (numpy ndarray): Fragment definition - prop_atom_inp (list of arrays or array): Coefficients densities. - normalize (bool): Normalized fragment partitioning. Defaults to True. + fragments (list): Fragment definition as list of numpy arrays. + prop_atom_inp (numpy.ndarray or list of numpy.ndarray): Atomic contributions to property(ies). + normalize (bool): Whether to normalize fragment partitioning. Defaults to True. Returns: - A list of arrays or an array containing the contribution of each fragment. + list or numpy.ndarray: Contribution of each fragment. Returns list if input was list, array otherwise. """ - - if type(prop_atom_inp) is list: - props_atom = prop_atom_inp - else: - props_atom = [prop_atom_inp] + props_atom = prop_atom_inp if type(prop_atom_inp) is list else [prop_atom_inp] props_frag = [] for prop_atom in props_atom: - prop_frag = np.zeros(len(fragments)) - for i, k in enumerate(fragments): - prop_frag[i] = prop_atom[k].sum() - prop_frag[i] = prop_atom[k].sum() + prop_frag = np.array([prop_atom[k].sum() for i, k in enumerate(fragments)]) + if normalize: + prop_frag *= 100.0 / prop_frag.sum() props_frag.append(prop_frag) - if normalize: - for i, prop_frag in enumerate(props_frag): - tot = prop_frag.sum() - props_frag[i] *= 100.0 / tot + return props_frag if type(prop_atom_inp) is list else props_frag[0] - if type(prop_atom_inp) is list: - return props_frag - else: - return props_frag[0] +def make_atom(q, basis, ecp=None): + """Create a single-atom molecule at the origin. + + Args: + q (str): Element symbol. + basis (str or dict): Basis set. + ecp (str): ECP to use. Defaults to None. -def make_atom(q, basis): + Returns: + pyscf.gto.Mole: Single-atom pyscf Mole object. + """ mol = gto.Mole() mol.atom = q + " 0.0 0.0 0.0" mol.charge = 0 mol.spin = data.elements.ELEMENTS_PROTON[q] % 2 mol.basis = basis + if ecp is not None: + mol.ecp = ecp mol.build() return mol + def singleatom_basis_enumerator(basis): - """Enumerates the different tensors of atomic orbitals within a 1-atom basis set - Each tensor is a $2l+2$-sized group of orbitals that share a radial function and $l$ value. - For each tensor, return the values of $l$, $n$ (an arbitrary radial-function counter that starts at 0), - as well as AO range + """Enumerate the different tensors of atomic orbitals within a 1-atom basis set. + + Each tensor is a 2l+1-sized group of orbitals that share a radial function and l value. + + Args: + basis (list): Basis set definition in pyscf format. + + Returns: + tuple: A tuple containing: + - l_per_bas (list): Angular momentum quantum number l for each basis function. + - n_per_bas (list): Radial function counter n (starting at 0) for each basis function. + - ao_starts (list): Starting index in AO array for each basis function. """ ao_starts = [] l_per_bas = [] n_per_bas = [] - cursor = 0 + cursor = Cursor(action='ranger') cursor_per_l = [] for bas in basis: # shape of `bas`, l, then another optional constant, then lists [exp, coeff, coeff, coeff] @@ -327,16 +328,59 @@ def singleatom_basis_enumerator(basis): # and the number of primitive gaussians (one per list) l = bas[0] while len(cursor_per_l) <= l: - cursor_per_l.append(0) - + cursor_per_l.append(Cursor(action='ranger')) n_count = len(bas[-1])-1 - n_start = cursor_per_l[l] - cursor_per_l[l] += n_count - l_per_bas += [l] * n_count - n_per_bas.extend(range(n_start, n_start+n_count)) + n_per_bas.extend(cursor_per_l[l].add(n_count)) msize = 2*l+1 - ao_starts.extend(range(cursor, cursor+msize*n_count, msize)) - cursor += msize*n_count + ao_starts.extend(cursor.add(msize*n_count)[::msize]) return l_per_bas, n_per_bas, ao_starts + +def basis_flatten(mol, return_both=True, return_shells=False): + """Flatten a basis set definition for AOs. + + Args: + mol (pyscf.gto.Mole): pyscf Mole object. + return_both (bool): Whether to return both AO info and primitive Gaussian info. Defaults to True. + return_shells (bool): Whether to return angular momenta per shell. Defaults to False. + + Returns: + - numpy.ndarray: 3×mol.nao int array where each column corresponds to an AO and rows are: + - 0: atom index + - 1: angular momentum quantum number l + - 2: magnetic quantum number m + If return_both is True, also returns: + - numpy.ndarray: 2×mol.nao×max_n float array where index (i,j,k) means: + - i: 0 for exponent, 1 for contraction coefficient of a primitive Gaussian + - j: AO index + - k: radial function index (padded with zeros if necessary) + If return_shell is True, also returns: + - numpy.ndarray: angular momentum quantum number for each shell + + """ + x = [] + L = [] + y = np.zeros((3, mol.nao), dtype=int) + i = Cursor(action='slicer') + a = mol.bas_exps() + for iat in range(mol.natm): + for bas_id in mol.atom_shell_ids(iat): + l = mol.bas_angular(bas_id) + n = mol.bas_nctr(bas_id) + cs = mol.bas_ctr_coeff(bas_id) + msize = 2*l+1 + if return_both: + for c in cs.T: + ac = np.array([a[bas_id], c]) + x.extend([ac]*msize) + y[:,i(msize*n)] = np.vstack((np.array([[iat, l]]*msize*n).T, [*get_mrange(l)]*n)) + if return_shells: + L.extend([l]*n) + + ret = [y] + if return_both: + ret.append(stack_padding(x).transpose((1,0,2))) + if return_shells: + ret.append(np.array(L)) + return ret[0] if len(ret)==1 else ret diff --git a/qstack/constants.py b/qstack/constants.py index 74b0c6e4..c9085d51 100644 --- a/qstack/constants.py +++ b/qstack/constants.py @@ -1,14 +1,15 @@ -''' -NIST physical constants and unit conversion +"""NIST physical constants and unit conversion. https://physics.nist.gov/cuu/Constants/ https://physics.nist.gov/cuu/Constants/Table/allascii.txt -''' +""" +import math + # Constants SPEED_LIGHT = 299792458.0 PLANCK = 6.62607004e-34 -HBAR = PLANCK/(2*3.141592653589793) +HBAR = PLANCK/(2*math.pi) FUND_CHARGE = 1.6021766208e-19 MOL_NA = 6.022140857e23 MASS_E = 9.10938356e-31 @@ -22,4 +23,4 @@ BOHR2ANGS = 0.52917721092 # Angstroms HARTREE2J = HBAR**2/(MASS_E*(BOHR2ANGS*1e-10)**2) HARTREE2EV = 27.21138602 -AU2DEBYE = FUND_CHARGE * BOHR2ANGS*1e-10 / DEBYE # 2.541746 +AU2DEBYE = FUND_CHARGE * BOHR2ANGS*1e-10 / DEBYE # 2.541746 diff --git a/qstack/equio.py b/qstack/equio.py index d66fe674..35d2adf2 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -1,19 +1,25 @@ +"""Equilibrium geometry and molecular structure I/O utilities.""" + +import itertools from functools import reduce -import numpy as np from types import SimpleNamespace +import numpy as np from pyscf import data import metatensor -import numbers +from qstack.tools import Cursor +from qstack.reorder import get_mrange, pyscf2gpr_l1_order +from qstack.compound import singleatom_basis_enumerator + vector_label_names = SimpleNamespace( - tm = ['spherical_harmonics_l', 'species_center'], + tm = ['o3_lambda', 'center_type'], block_prop = ['radial_channel'], block_samp = ['atom_id'], block_comp = ['spherical_harmonics_m'], ) matrix_label_names = SimpleNamespace( - tm = ['spherical_harmonics_l1', 'spherical_harmonics_l2', 'species_center1', 'species_center2'], + tm = ['o3_lambda1', 'o3_lambda2', 'center_type1', 'center_type2'], block_prop = ['radial_channel1', 'radial_channel2'], block_samp = ['atom_id1', 'atom_id2'], block_comp = ['spherical_harmonics_m1', 'spherical_harmonics_m2'], @@ -21,77 +27,58 @@ _molid_name = 'mol_id' -_pyscf2gpr_l1_order = [1,2,0] - - -def _get_mrange(l): - # for l=1, the pyscf order is x,y,z (1,-1,0) - if l==1: - return (1,-1,0) - else: - return range(-l,l+1) +def _get_llist(mol): + """Get list of angular momentum quantum numbers for basis functions of each element of a molecule. -def _get_llist(q, mol): - """ Args: - q (int): Atomic number. - mol (pyscf Mole): pyscf Mole object. + mol (pyscf.gto.Mole): pyscf Mole object. Returns: - A list + dict: Dictionary with atom numbers as keys and List of angular momentum quantum numbers for each basis function as values. """ - - # TODO other basis formats? -# for bas_id in mol.atom_shell_ids(iat): -# l = mol.bas_angular(bas_id) -# nc = mol.bas_nctr(bas_id) -# for n in range(nc): - if isinstance(q, numbers.Integral): - q = data.elements.ELEMENTS[q] - llist = [] - for l, *prim in mol._basis[q]: - llist.extend([l]*(len(prim[0])-1)) - return llist + return {int(q): singleatom_basis_enumerator(mol._basis[data.elements.ELEMENTS[q]])[0] for q in np.unique(mol.atom_charges())} def _get_tsize(tensor): - """Computes the size of a tensor. + """Compute the size of a tensor. Args: - tensor (metatensor TensorMap): Tensor. + tensor (metatensor.TensorMap): Tensor. Returns: - The size of the tensor as an integer. + int: Total size of the tensor (total number of elements). """ - return sum([np.prod(tensor.block(key).values.shape) for key in tensor.keys]) + return sum(np.prod(tensor.block(key).values.shape) for key in tensor.keys) + def _labels_to_array(labels): - """Represents a set of metatensor labels as an array of the labels, using custom dtypes + """Represent a set of metatensor labels as an array. Args: - labels (metatensor Labels): Labels + labels (metatensor.Labels): Labels object. Returns: - labels (numpy ndarray[ndim=1, structured dtype]): the same labels + numpy.ndarray: 1D structured array containing the same labels. """ values = labels.values - dtype = [ (name,values.dtype) for name in labels.names] + dtype = [(name, values.dtype) for name in labels.names] return values.view(dtype=dtype).reshape(values.shape[0]) + def vector_to_tensormap(mol, c): - """Transform a vector into a tensor map. Used by :py:func:`array_to_tensormap`. + """Transform an vector into a tensor map. + + Each element of the vector corresponds to an atomic orbital of the molecule. Args: - mol (pyscf Mole): pyscf Mole object. - v (numpy ndarray): Vector. + mol (pyscf.gto.Mole): pyscf Mole object. + c (numpy.ndarray): vector to transform. Returns: - A metatensor tensor map. + metatensor.TensorMap: Tensor map representation of the vector. """ - - atom_charges = list(mol.atom_charges()) - elements = sorted(set(atom_charges)) + atom_charges = mol.atom_charges() tm_label_vals = [] block_prop_label_vals = {} @@ -103,19 +90,20 @@ def vector_to_tensormap(mol, c): # Create labels for TensorMap, lables for blocks, and empty blocks - for q in elements: - llist = _get_llist(q, mol) - llists[q] = llist + llists = _get_llist(mol) + + for q, samples_count in zip(*np.unique(atom_charges, return_counts=True), strict=True): + llist = llists[q] + block_samp_label_vals_q = np.where(atom_charges==q)[0].reshape(-1,1) for l in sorted(set(llist)): label = (l, q) tm_label_vals.append(label) - samples_count = atom_charges.count(q) components_count = 2*l+1 properties_count = llist.count(l) blocks[label] = np.zeros((samples_count, components_count, properties_count)) block_comp_label_vals[label] = np.arange(-l, l+1).reshape(-1,1) block_prop_label_vals[label] = np.arange(properties_count).reshape(-1,1) - block_samp_label_vals[label] = np.where(atom_charges==q)[0].reshape(-1,1) + block_samp_label_vals[label] = block_samp_label_vals_q tm_labels = metatensor.Labels(vector_label_names.tm, np.array(tm_label_vals)) @@ -125,27 +113,25 @@ def vector_to_tensormap(mol, c): # Fill in the blocks - iq = dict.fromkeys(elements, 0) - i = 0 + iq = dict.fromkeys(llists.keys(), 0) + i = Cursor(action='slicer') for q in atom_charges: if llists[q]==sorted(llists[q]): for l in set(llists[q]): msize = 2*l+1 - nsize = blocks[(l,q)].shape[-1] - cslice = c[i:i+nsize*msize].reshape(nsize,msize).T + nsize = blocks[l,q].shape[-1] + cslice = c[i(nsize*msize)].reshape(nsize,msize).T if l==1: # for l=1, the pyscf order is x,y,z (1,-1,0) - cslice = cslice[_pyscf2gpr_l1_order] - blocks[(l,q)][iq[q],:,:] = cslice - i += msize*nsize + cslice = cslice[pyscf2gpr_l1_order] + blocks[l,q][iq[q],:,:] = cslice else: il = dict.fromkeys(range(max(llists[q]) + 1), 0) for l in llists[q]: msize = 2*l+1 - cslice = c[i:i+msize] + cslice = c[i(msize)] if l==1: # for l=1, the pyscf order is x,y,z (1,-1,0) - cslice = cslice[_pyscf2gpr_l1_order] - blocks[(l,q)][iq[q],:,il[l]] = cslice - i += msize + cslice = cslice[pyscf2gpr_l1_order] + blocks[l,q][iq[q],:,il[l]] = cslice il[l] += 1 iq[q] += 1 @@ -158,31 +144,34 @@ def vector_to_tensormap(mol, c): def tensormap_to_vector(mol, tensor): - """Transform a tensor map into a vector. :py:func:`Used by tensormap_to_array`. + """Transform a tensor map into a vector. Args: - mol (pyscf Mole): pyscf Mole object. - tensor (metatensor TensorMap): Tensor. + mol (pyscf.gto.Mole): pyscf Mole object. + tensor (metatensor.TensorMap): tensor to transform. Returns: - A numpy ndarray (vector). - """ + numpy.ndarray: 1D array (vector) representation. + Raises: + RuntimeError: If tensor size does not match mol.nao. + """ nao = _get_tsize(tensor) if mol.nao != nao: raise RuntimeError(f'Tensor size mismatch ({nao} instead of {mol.nao})') c = np.zeros(mol.nao) atom_charges = mol.atom_charges() + llists = _get_llist(mol) i = 0 for iat, q in enumerate(atom_charges): - llist = _get_llist(q, mol) + llist = llists[q] il = dict.fromkeys(range(max(llist) + 1), 0) for l in llist: - block = tensor.block(spherical_harmonics_l=l, species_center=q) + block = tensor.block(o3_lambda=l, center_type=q) id_samp = block.samples.position((iat,)) id_prop = block.properties.position((il[l],)) - for m in _get_mrange(l): + for m in get_mrange(l): id_comp = block.components[0].position((m,)) c[i] = block.values[id_samp,id_comp,id_prop] i += 1 @@ -191,53 +180,51 @@ def tensormap_to_vector(mol, tensor): def matrix_to_tensormap(mol, dm): - """ Transform a matrix into a tensor map. Used by :py:func:`array_to_tensormap`. + """Transform a matrix into a tensor map. + + Each element of the matrix corresponds to a pair of atomic orbitals. Args: - mol (pyscf Mole): pyscf Mole object. - v (numpy ndarray): Matrix. + mol (pyscf.gto.Mole): pyscf Mole object. + dm (numpy.ndarray): matrix to transform. Returns: - A metatensor tensor map. + metatensor.TensorMap: Tensor map representation of the matrix. """ - - def pairs(list1, list2): - return np.array([(i,j) for i in list1 for j in list2]) - - atom_charges = list(mol.atom_charges()) - elements = sorted(set(atom_charges)) + atom_charges = mol.atom_charges() + elements, counts = np.unique(atom_charges, return_counts=True) + counts = dict(zip(elements, counts, strict=True)) + element_indices = {q: np.where(atom_charges==q)[0] for q in elements} + llists = _get_llist(mol) tm_label_vals = [] block_prop_label_vals = {} block_samp_label_vals = {} block_comp_label_vals = {} - blocks = {} - llists = {q: _get_llist(q, mol) for q in elements} # Create labels for TensorMap, lables for blocks, and empty blocks for q1 in elements: for q2 in elements: + samples_count1 = counts[q1] + samples_count2 = counts[q2] llist1 = llists[q1] llist2 = llists[q2] + block_samp_label_vals_q1q2 = np.array([*itertools.product(element_indices[q1], element_indices[q2])]) for l1 in sorted(set(llist1)): + components_count1 = 2*l1+1 + properties_count1 = llist1.count(l1) for l2 in sorted(set(llist2)): - label = (l1, l2, q1, q2) - tm_label_vals.append(label) - - samples_count1 = atom_charges.count(q1) - components_count1 = 2*l1+1 - properties_count1 = llist1.count(l1) - - samples_count2 = atom_charges.count(q2) components_count2 = 2*l2+1 properties_count2 = llist2.count(l2) + label = (l1, l2, q1, q2) + tm_label_vals.append(label) blocks[label] = np.zeros((samples_count1*samples_count2, components_count1, components_count2, properties_count1*properties_count2)) block_comp_label_vals[label] = (np.arange(-l1, l1+1).reshape(-1,1), np.arange(-l2, l2+1).reshape(-1,1)) - block_prop_label_vals[label] = pairs(np.arange(properties_count1), np.arange(properties_count2)) - block_samp_label_vals[label] = pairs(np.where(atom_charges==q1)[0],np.where(atom_charges==q2)[0]) + block_prop_label_vals[label] = np.array([*itertools.product(np.arange(properties_count1), np.arange(properties_count2))]) + block_samp_label_vals[label] = block_samp_label_vals_q1q2 tm_labels = metatensor.Labels(matrix_label_names.tm, np.array(tm_label_vals)) @@ -252,48 +239,44 @@ def pairs(list1, list2): if all(llists[q]==sorted(llists[q]) for q in llists): iq1 = dict.fromkeys(elements, 0) - i1 = 0 + i1 = Cursor(action='slicer') for iat1, q1 in enumerate(atom_charges): for l1 in set(llists[q1]): msize1 = 2*l1+1 nsize1 = llists[q1].count(l1) iq2 = dict.fromkeys(elements, 0) - i2 = 0 + i1.add(nsize1*msize1) + i2 = Cursor(action='slicer') for iat2, q2 in enumerate(atom_charges): for l2 in set(llists[q2]): msize2 = 2*l2+1 nsize2 = llists[q2].count(l2) - dmslice = dm[i1:i1+nsize1*msize1,i2:i2+nsize2*msize2].reshape(nsize1,msize1,nsize2,msize2) + dmslice = dm[i1(),i2(nsize2*msize2)].reshape(nsize1,msize1,nsize2,msize2) dmslice = np.transpose(dmslice, axes=[1,3,0,2]).reshape(msize1,msize2,-1) block = tensor_blocks[tm_label_vals.index((l1,l2,q1,q2))] at_p = block.samples.position((iat1,iat2)) - blocks[(l1,l2,q1,q2)][at_p,:,:,:] = dmslice - i2 += msize2*nsize2 + blocks[l1,l2,q1,q2][at_p,:,:,:] = dmslice iq2[q2] += 1 - i1 += msize1*nsize1 iq1[q1] += 1 else: iq1 = dict.fromkeys(elements, 0) - i1 = 0 + i1 = Cursor(action='slicer') for iat1, q1 in enumerate(atom_charges): il1 = dict.fromkeys(range(max(llists[q1]) + 1), 0) for l1 in llists[q1]: - msize1 = 2*l1+1 + i1.add(2*l1+1) iq2 = dict.fromkeys(elements, 0) - i2 = 0 + i2 = Cursor(action='slicer') for iat2, q2 in enumerate(atom_charges): il2 = dict.fromkeys(range(max(llists[q2]) + 1), 0) for l2 in llists[q2]: - msize2 = 2*l2+1 - dmslice = dm[i1:i1+msize1,i2:i2+msize2] + dmslice = dm[i1(),i2(2*l2+1)] block = tensor_blocks[tm_label_vals.index((l1, l2, q1, q2))] at_p = block.samples.position((iat1, iat2)) n_p = block.properties.position((il1[l1], il2[l2])) - blocks[(l1,l2,q1,q2)][at_p,:,:,n_p] = dmslice - i2 += msize2 + blocks[l1,l2,q1,q2][at_p,:,:,n_p] = dmslice il2[l2] += 1 iq2[q2] += 1 - i1 += msize1 il1[l1] += 1 iq1[q1] += 1 @@ -301,9 +284,9 @@ def pairs(list1, list2): for key in blocks: l1,l2 = key[:2] if l1==1: - blocks[key] = np.ascontiguousarray(blocks[key][:,_pyscf2gpr_l1_order,:,:]) + blocks[key] = np.ascontiguousarray(blocks[key][:,pyscf2gpr_l1_order,:,:]) if l2==1: - blocks[key] = np.ascontiguousarray(blocks[key][:,:,_pyscf2gpr_l1_order,:]) + blocks[key] = np.ascontiguousarray(blocks[key][:,:,pyscf2gpr_l1_order,:]) # Build tensor map tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals] @@ -313,40 +296,40 @@ def pairs(list1, list2): def tensormap_to_matrix(mol, tensor): - """Transform a tensor map into a matrix. Used by :py:func:`tensormap_to_array`. + """Transform a tensor map into a matrix. Args: - mol (pyscf Mole): pyscf Mole object. - tensor (metatensor TensorMap): Tensor. + mol (pyscf.gto.Mole): pyscf Mole object. + tensor (metatensor.TensorMap): tensor to transform. Returns: - A numpy ndarray (matrix). - """ + numpy.ndarray: 2D array (matrix) representation. + Raises: + RuntimeError: If tensor size does not match mol.nao * mol.nao. + """ nao2 = _get_tsize(tensor) if mol.nao*mol.nao != nao2: raise RuntimeError(f'Tensor size mismatch ({nao2} instead of {mol.nao*mol.nao})') dm = np.zeros((mol.nao, mol.nao)) atom_charges = mol.atom_charges() + llists = _get_llist(mol) i1 = 0 for iat1, q1 in enumerate(atom_charges): - llist1 = _get_llist(q1, mol) + llist1 = llists[q1] il1 = dict.fromkeys(range(max(llist1) + 1), 0) for l1 in llist1: - for m1 in _get_mrange(l1): - + for m1 in get_mrange(l1): i2 = 0 for iat2, q2 in enumerate(atom_charges): - llist2 = _get_llist(q2, mol) + llist2 = llists[q2] il2 = dict.fromkeys(range(max(llist2) + 1), 0) for l2 in llist2: - - block = tensor.block(spherical_harmonics_l1=l1, spherical_harmonics_l2=l2, species_center1=q1, species_center2=q2) + block = tensor.block(o3_lambda1=l1, o3_lambda2=l2, center_type1=q1, center_type2=q2) id_samp = block.samples.position((iat1, iat2)) id_prop = block.properties.position((il1[l1], il2[l2])) - - for m2 in _get_mrange(l2): + for m2 in get_mrange(l2): id_comp1 = block.components[0].position((m1,)) id_comp2 = block.components[1].position((m2,)) dm[i1, i2] = block.values[id_samp, id_comp1, id_comp2, id_prop] @@ -354,18 +337,23 @@ def tensormap_to_matrix(mol, tensor): il2[l2] += 1 i1 += 1 il1[l1] += 1 - return dm + def array_to_tensormap(mol, v): - """ Transform an array into a tensor map. + """Transform an array into a tensor map. + + Wrapper for vector_to_tensormap and matrix_to_tensormap. Args: - mol (pyscf Mole): pyscf Mole object. - v (numpy ndarray): Array. It can be a vector or a matrix. + mol (pyscf.gto.Mole): pyscf Mole object. + v (numpy.ndarray): Array to transform. Can be a vector (1D) or matrix (2D). Returns: - A metatensor tensor map. + metatensor.TensorMap: Tensor map representation of the array. + + Raises: + ValueError: If array dimension is not 1 or 2. """ if v.ndim==1: return vector_to_tensormap(mol, v) @@ -378,14 +366,18 @@ def array_to_tensormap(mol, v): def tensormap_to_array(mol, tensor): """Transform a tensor map into an array. + Wrapper for tensormap_to_vector and tensormap_to_matrix. + Args: - mol (pyscf Mole): pyscf Mole object. - tensor (metatensor TensorMap): Tensor. + mol (pyscf.gto.Mole): pyscf Mole object. + tensor (metatensor.TensorMap): Tensor to transform. Returns: - A numpy ndarray. Matrix or vector, depending on the key names of the tensor. - """ + numpy.ndarray: Array representation (1D vector or 2D matrix). + Raises: + RuntimeError: If tensor key names don't match expected format. + """ if tensor.keys.names==vector_label_names.tm: return tensormap_to_vector(mol, tensor) elif tensor.keys.names==matrix_label_names.tm: @@ -395,15 +387,17 @@ def tensormap_to_array(mol, tensor): def join(tensors): - """Merge two or more tensors with the same label names avoiding information duplictaion. + """Merge two or more tensors with the same label names avoiding information duplication. Args: - tensors (list): List of metatensor TensorMap. + tensors (list): List of metatensor.TensorMap objects. Returns: - A metatensor TensorMap containing the information of all the input tensors. - """ + metatensor.TensorMap: Merged tensor containing information from all input tensors. + Raises: + RuntimeError: If tensors have different label names. + """ if not all(tensor.keys.names==tensors[0].keys.names for tensor in tensors): raise RuntimeError('Cannot merge tensors with different label names') tm_label_vals = set().union(*[set(_labels_to_array(tensor.keys)) for tensor in tensors]) @@ -446,12 +440,15 @@ def split(tensor): """Split a tensor based on the molecule information stored within the input TensorMap. Args: - tensor (metatensor TensorMap): Tensor containing several molecules. + tensor (metatensor.TensorMap): Tensor containing several molecules. Returns: - N metatensor TensorMap, where N is equal to the total number of diferent molecules stored within the input TensorMap. - """ + list or dict: Collection of metatensor.TensorMap objects, one per molecule. + Returns list if molecule indices are continuous, dict otherwise. + Raises: + RuntimeError: If tensor does not contain multiple molecules. + """ if tensor.sample_names[0]!=_molid_name: raise RuntimeError('Tensor does not seem to contain several molecules') @@ -488,7 +485,7 @@ def split(tensor): continue sampleidx = [t[0] for t in samples] samplelbl = [t[1] for t in samples] - #sampleidx = [block.samples.position(lbl) for lbl in samplelbl] + # sampleidx = [block.samples.position(lbl) for lbl in samplelbl] blocks[key] = block.values[sampleidx] block_samp_labels[key] = metatensor.Labels(tensor.sample_names[1:], np.array(samplelbl)[:,1:]) diff --git a/qstack/fields/__init__.py b/qstack/fields/__init__.py index 78c8a246..4ae39111 100644 --- a/qstack/fields/__init__.py +++ b/qstack/fields/__init__.py @@ -1,3 +1,7 @@ +"""Molecular field analysis module. + +Provides tools for density decomposition, molecular fields, and property computation.""" + from qstack.fields import decomposition from qstack.fields import dm from qstack.fields import hf_otpd diff --git a/qstack/fields/decomposition.py b/qstack/fields/decomposition.py index b9f2b085..91ba4b2a 100644 --- a/qstack/fields/decomposition.py +++ b/qstack/fields/decomposition.py @@ -1,7 +1,11 @@ +"""Density matrix decomposition onto auxiliary basis sets.""" + import numpy as np import scipy from pyscf import scf from qstack import compound +from . import moments + def decompose(mol, dm, auxbasis): """Fit molecular density onto an atom-centered basis. @@ -12,76 +16,114 @@ def decompose(mol, dm, auxbasis): auxbasis (string / pyscf basis dictionary): Atom-centered basis to decompose on. Returns: - A copy of the pyscf Mole object with the auxbasis basis in a pyscf Mole object, and a 1D numpy array containing the decomposition coefficients. + Tuple containing: + - copy of the pyscf Mole object with the auxbasis basis in a pyscf Mole object, + - 1D numpy array containing the decomposition coefficients. """ - auxmol = compound.make_auxmol(mol, auxbasis) _S, eri2c, eri3c = get_integrals(mol, auxmol) c = get_coeff(dm, eri2c, eri3c) return auxmol, c + def get_integrals(mol, auxmol): - """Computes overlap and 2-/3-centers ERI matrices. + """Compute overlap integrals and 2-/3-centers ERI matrices. Args: mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix. - auxmol (pyscf Mole): pyscf Mole object holding molecular structure, composition and the auxiliary basis set. + auxmol (pyscf Mole): pyscf Mole object of the same molecule with an auxiliary basis set. Returns: - Three numpy ndarray containing: the overlap matrix, the 2-centers ERI matrix, and the 3-centers ERI matrix respectively. + Tuple of three numpy ndarray containing: + - overlap matrix (auxmol.nao,auxmol.nao) for the auxiliary basis, + - 2-centers ERI matrix (auxmol.nao,auxmol.nao) for the auxiliary basis, + - 3-centers ERI matrix (mol.nao,mol.nao,auxmol.nao) between AO and auxiliary basis. """ - - # Get overlap integral in the auxiliary basis S = auxmol.intor('int1e_ovlp_sph') - - # Concatenate standard and auxiliary basis set into a pmol object pmol = mol + auxmol - - # Compute 2- and 3-centers ERI integrals using the concatenated mol object eri2c = auxmol.intor('int2c2e_sph') eri3c = pmol.intor('int3c2e_sph', shls_slice=(0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas)) eri3c = eri3c.reshape(mol.nao_nr(), mol.nao_nr(), -1) - return S, eri2c, eri3c -def get_self_repulsion(mol, dm): - """Computes the Einstein summation of the Coulumb matrix and the density matrix. + +def get_self_repulsion(mol_or_mf, dm): + r"""Compute the self-repulsion of the density. + + \int \int \rho_DM(r1) 1/|r1-r2| \rho_DM(r2) dr1 dr2 Args: - mol (pyscf Mole): pyscf Mole object. - dm (numpy ndarray): Density matrix. + mol_or_mf (pyscf Mole or SCF): pyscf Mole or Mean Field object. + dm (2D numpy ndarray): Density matrix. Returns: - A nummpy ndarray result of the Einstein summation of the J matrix and the Density matrix. + float: Self-repulsion energy (a.u). """ - try: - j, _k = mol.get_jk() + j, _k = mol_or_mf.get_jk() except AttributeError: - j, _k = scf.hf.get_jk(mol, dm) + j, _k = scf.hf.get_jk(mol_or_mf, dm) return np.einsum('ij,ij', j, dm) -def decomposition_error(self_repulsion, c, eri2c): - """Computes the decomposition error. - .. todo:: - Write the complete docstring +def optimal_decomposition_error(self_repulsion, c, eri2c): + r"""Compute the decomposition error for optimal density fitting. + + \int \int \rho_DM(r1) 1/|r1-r2| \rho_DF(r2) dr1 dr2 + + Args: + self_repulsion (float): Self-repulsion energy from the original density matrix. + c (numpy ndarray): 1D array of density expansion coefficients. + eri2c (numpy ndarray): 2D array of 2-center electron repulsion integrals. + + Returns: + float: The decomposition error. + + Notes: + - It is assumed that `c` are the optimal coefficients obtained from the density matrix. + - `self_repulsion` can be set to 0 to avoid expensive computations when only the relative error is needed. """ return self_repulsion - c @ eri2c @ c + +def decomposition_error(self_repulsion, c, eri2c, eri3c, dm): + r"""Compute the decomposition error for optimal density fitting. + + \int \int \rho_DM(r1) 1/|r1-r2| \rho_DF(r2) dr1 dr2 + + Args: + self_repulsion (float): Self-repulsion energy from the original density matrix. + c (numpy ndarray): 1D array of density expansion coefficients. + eri2c (numpy ndarray): 2D array of 2-center ERIs. + eri3c (numpy ndarray): 3D array of 3-center ERIs. + dm (numpy ndarray): Density matrix. + + Returns: + float: The decomposition error. + + Notes: + - If `c` are the optimal coefficients obtained from the density matrix, `optimal_decomposition_error()` can be used instead. + - `self_repulsion` can be set to 0 to avoid expensive computations when only the relative error is needed. + """ + projection = np.einsum('ijp,ij->p', eri3c, dm) + return self_repulsion + c @ eri2c @ c - 2.0 * c @ projection + + def get_coeff(dm, eri2c, eri3c, slices=None): - """Computes the density expansion coefficients. + """Compute the density expansion coefficients. Args: dm (numpy ndarray): Density matrix. eri2c (numpy ndarray): 2-centers ERI matrix. eri3c (numpy ndarray): 3-centers ERI matrix. - slices (optional numpy ndarray): assume that eri2c is bloc-diagonal, by giving the boundaries of said blocks + slices (optional numpy ndarray): Assume that eri2c is bloc-diagonal, by giving the boundaries of said blocks. Returns: A numpy ndarray containing the expansion coefficients of the density onto the auxiliary basis. - """ + Raises: + RuntimeError: If the `slices` argument is incorrectly formatted or inconsistent with the auxiliary basis size. + """ # Compute the projection of the density onto auxiliary basis using a Coulomb metric projection = np.einsum('ijp,ij->p', eri3c, dm) @@ -99,43 +141,47 @@ def get_coeff(dm, eri2c, eri3c, slices=None): return c + def _get_inv_metric(mol, metric, v): - """ - - Args: - mol (pyscf Mole): pyscf Mole object. - metric (str): unit, overlap or coulomb. - v (numpy ndarray): Number of electrons decomposed into a vector. - """ - if isinstance(metric, str): - metric = metric.lower() - if metric in ['u', 'unit', '1']: - return v - elif metric in ['s', 'overlap', 'ovlp']: - O = mol.intor('int1e_ovlp_sph') - elif metric in ['j', 'coulomb']: - O = mol.intor('int2c2e_sph') - else: - O = metric - return scipy.linalg.solve(O, v, assume_a='pos') + """Compute the inverse metric applied to a vector. + Args: + mol (pyscf Mole): pyscf Mole object. + metric (str or numpy ndarray): Metric type ('unit', 'overlap', 'coulomb') or a metric matrix. + v (numpy ndarray): Vector to apply the inverse metric to. -def correct_N_atomic(mol, N, c0, metric='u'): + Returns: + numpy ndarray: Result of applying the inverse metric to the input vector. """ + if isinstance(metric, str): + metric = metric.lower() + if metric in ['u', 'unit', '1']: + return v + elif metric in ['s', 'overlap', 'ovlp']: + O = mol.intor('int1e_ovlp_sph') + elif metric in ['j', 'coulomb']: + O = mol.intor('int2c2e_sph') + else: + O = metric + return scipy.linalg.solve(O, v, assume_a='pos') + + +def correct_N_atomic(mol, N, c0, metric='u'): + """Corrects decomposition coefficients to match the target number of electrons per atom. + + Uses Lagrange multipliers to enforce the correct number of electrons per atom + while minimizing changes to the decomposition coefficients. Args: - mol (pyscf Mole): pyscf Mole objec used for the computation of the density matrix. - N (int): Number of electrons. Defaults to None. - c0 (1D numpy array): Decomposition coefficients. - metric (str): .Defaults to 'u'. + mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix. + N (numpy ndarray): Target number of electrons per atom. + c0 (numpy ndarray): 1D array of initial decomposition coefficients. + metric (str): Metric type for correction ('u' for unit, 's' for overlap, 'j' for coulomb). Defaults to 'u'. Returns: - - .. todo:: - Write the complete docstring. + numpy ndarray: Corrected decomposition coefficients (1D array). """ - - Q = number_of_electrons_deco_vec(mol, per_atom=True) + Q = moments.r2_c(mol, None, moments=[0], per_atom=True)[0] N0 = c0 @ Q O1q = _get_inv_metric(mol, metric, Q) la = scipy.linalg.solve(Q.T @ O1q, N-N0) @@ -144,24 +190,20 @@ def correct_N_atomic(mol, N, c0, metric='u'): def correct_N(mol, c0, N=None, mode='Lagrange', metric='u'): - """ + """Corrects decomposition coefficients to match the target total number of electrons. Args: - mol (pyscf Mole): pyscf Mole objec used for the computation of the density matrix. - c0 (1D numpy array): Decomposition coefficients. - N (int): Number of electrons. Defaults to None. - mode (str): Defaults to Lagrange. - metric (str): Defaults to u. + mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix. + c0 (numpy ndarray): 1D array of initial decomposition coefficients. + N (int, optional): Target number of electrons. If None, uses mol.nelectron. Defaults to None. + mode (str): Correction method ('scale' or 'Lagrange'). Defaults to 'Lagrange'. + metric (str): Metric type for Lagrange correction ('u', 's', or 'j'). Defaults to 'u'. Returns: - A numpy ndarray containing a set of expansion coefficients taking into account the correct total number of electrons. - - .. todo:: - Write the complete docstring. + numpy ndarray: Corrected decomposition coefficients (1D array). """ - mode = mode.lower() - q = number_of_electrons_deco_vec(mol) + q = moments.r2_c(mol, None, moments=[0]) N0 = c0 @ q if N is None: @@ -175,44 +217,3 @@ def correct_N(mol, c0, N=None, mode='Lagrange', metric='u'): la = (N - N0) / (q @ O1q) c = c0 + la * O1q return c - - -def number_of_electrons_deco_vec(mol, per_atom=False): - """ - - .. todo:: - Write the complete docstring. - """ - if per_atom: - Q = np.zeros((mol.nao,mol.natm)) - else: - Q = np.zeros(mol.nao) - i = 0 - for iat in range(mol.natm): - for bas_id in mol.atom_shell_ids(iat): - l = mol.bas_angular(bas_id) - n = mol.bas_nctr(bas_id) - if l==0: - w = mol.bas_ctr_coeff(bas_id) - a = mol.bas_exp(bas_id) - q = np.pow(2.0*np.pi/a, 0.75) @ w - if per_atom: - Q[i:i+n,iat] = q - else: - Q[i:i+n] = q - i += (2*l+1)*n - return Q - -def number_of_electrons_deco(auxmol, c): - """Computes the number of electrons of a molecule given a set of expansion coefficients and a Mole object. - - Args: - auxmol (pyscf Mole): pyscf mol object holding molecular structure, composition and the auxiliary basis set. - c (numpy ndarray): expansion coefficients of the density onto the auxiliary basis. - - Returns: - The number of electrons as an integer value. - """ - - q = number_of_electrons_deco_vec(auxmol) - return q @ c diff --git a/qstack/fields/density2file.py b/qstack/fields/density2file.py index 585114c0..686f70a1 100644 --- a/qstack/fields/density2file.py +++ b/qstack/fields/density2file.py @@ -1,48 +1,48 @@ +"""Density field output to file formats (cube, etc.).""" + import numpy as np from pyscf.dft.numint import eval_ao from pyscf.tools.cubegen import Cube import pyscf.tools.molden -from .decomposition import number_of_electrons_deco +from . import moments -def coeffs_to_cube(mol, coeffs, cubename, nx = 80, ny = 80, nz = 80, resolution = 0.1, margin = 3.0): - """Saves the density in a cube file. - Args: - mol (pyscf Mole): pyscf Mole. - coeffs (numpy ndarray): Expansion coefficients. - cubename (str): Name of the cubo file. +def coeffs_to_cube(mol, coeffs, cubename, nx=80, ny=80, nz=80, resolution=0.1, margin=3.0): + """Save the electron density to a cube file. - Returns: - A new or overwrited file named .cube + Args: + mol (pyscf Mole): pyscf Mole object. + coeffs (numpy ndarray): 1D array of density expansion coefficients. + cubename (str): Output filename (without .cube extension). + nx (int): Number of grid points in x direction. Defaults to 80. + ny (int): Number of grid points in y direction. Defaults to 80. + nz (int): Number of grid points in z direction. Defaults to 80. + resolution (float): Grid spacing in Bohr. Defaults to 0.1. + margin (float): Extra space around molecule in Bohr. Defaults to 3.0. + + Output: + Creates a file named .cube on disk. """ - - # Make grid grid = Cube(mol, nx, ny, nz, resolution, margin) - - # Compute density on the .cube grid coords = grid.get_coords() - ao = eval_ao(mol, coords) orb_on_grid = np.dot(ao, coeffs) - orb_on_grid = orb_on_grid.reshape(grid.nx,grid.ny,grid.nz) - - # Write out orbital to the .cube file + orb_on_grid = orb_on_grid.reshape(grid.nx, grid.ny, grid.nz) grid.write(orb_on_grid, cubename, comment='Electron Density') def coeffs_to_molden(mol, coeffs, moldenname): - """Saves the density in a molden file. + """Save the electron density to a MOLDEN file. Args: - mol (pyscf Mole): pyscf Mole. - coeffs (numpy ndarray): Expansion coefficients. - moldenname (str): File name of the molden file. + mol (pyscf Mole): pyscf Mole object. + coeffs (numpy ndarray): 1D array of density expansion coefficients. + moldenname (str): Output filename for the MOLDEN file. - Returns: - A new or overwrited file named .molden + Output: + Creates a file named .molden on disk. """ - with open(moldenname, 'w') as f: - N = number_of_electrons_deco(mol, coeffs) + N = moments.r2_c(mol, coeffs, moments=[0])[0] pyscf.tools.molden.header(mol, f, True) pyscf.tools.molden.orbital_coeff(mol, f, np.array([coeffs]).T, ene=[0.0], occ=[N], ignore_h=True) diff --git a/qstack/fields/dm.py b/qstack/fields/dm.py index 11d27564..90651f13 100644 --- a/qstack/fields/dm.py +++ b/qstack/fields/dm.py @@ -1,19 +1,25 @@ +"""Density matrix manipulation and analysis functions.""" + +import numpy as np from pyscf import dft from qstack import constants -import numpy as np +from qstack.tools import Cursor -def get_converged_dm(mol, xc, verbose=False): - """Performs restricted SCF and returns density matrix, given pyscf mol object and an XC density functional. + +def get_converged_mf(mol, xc, dm0=None, verbose=False): + """Perform SCF calculation. Args: mol (pyscf Mole): pyscf Mole object. xc (str): Exchange-correlation functional. - verbose (bool): If print more info + dm0 (numpy ndarray, optional): Initial guess for density matrix. Defaults to None. + verbose (bool): If print more information. Returns: - A numpy ndarray containing the density matrix in AO-basis. + tuple: A tuple containing: + - mf (pyscf.dft.rks.RKS or pyscf.dft.uks.UKS): Converged mean-field object. + - dm (numpy ndarray): Converged density matrix in AO-basis. """ - if mol.multiplicity == 1: mf = dft.RKS(mol) else: @@ -21,21 +27,34 @@ def get_converged_dm(mol, xc, verbose=False): mf.xc = xc if verbose: - print("Starting Kohn-Sham computation at "+str(mf.xc)+"/"+str(mol.basis)+" level.") + print(f"Starting Kohn-Sham computation at {mf.xc}/{mol.basis} level.") mf.verbose = 1 - mf.kernel() + mf.kernel(dm0=dm0) if verbose: - print("Convergence: ",mf.converged) - print("Energy: ",mf.e_tot) + print(f"Convergence: {mf.converged}") + print(f"Energy: {mf.e_tot}") - # Make the one-particle density matrix in ao-basis dm = mf.make_rdm1() + return (mf, dm) - return dm -def make_grid_for_rho(mol, grid_level = 3): - """Generates a grid of real space coordinates and weights for integration. +def get_converged_dm(mol, xc, verbose=False): + """Get a converged density matrix. + + Args: + mol (pyscf Mole): pyscf Mole object. + xc (str): Exchange-correlation functional. + verbose (bool): If print more information. + + Returns: + A numpy ndarray containing the density matrix in AO-basis. + """ + return get_converged_mf(mol, xc, dm0=None, verbose=verbose)[1] + + +def make_grid_for_rho(mol, grid_level=3): + """Generate a grid of real space coordinates and weights for integration. Args: mol (pyscf Mole): pyscf Mole object. @@ -44,13 +63,12 @@ def make_grid_for_rho(mol, grid_level = 3): Returns: pyscf Grid object. """ - grid = dft.gen_grid.Grids(mol) grid.level = grid_level grid.build() - return grid + def sphericalize_density_matrix(mol, dm): """Sphericalize the density matrix in the sense of an integral over all possible rotations. @@ -61,45 +79,27 @@ def sphericalize_density_matrix(mol, dm): Returns: A numpy ndarray with the sphericalized density matrix. """ - idx_by_l = [[] for i in range(constants.MAX_L)] - i0 = 0 + i0 = Cursor(action='ranger') for ib in range(mol.nbas): l = mol.bas_angular(ib) + msize = 2*l+1 nc = mol.bas_nctr(ib) - i1 = i0 + nc * (l*2+1) - idx_by_l[l].extend(range(i0, i1, l*2+1)) - i0 = i1 + idx_by_l[l].extend(i0(nc*msize)[::msize]) spherical_dm = np.zeros_like(dm) for l in np.nonzero(idx_by_l)[0]: + msize = 2*l+1 for idx in idx_by_l[l]: for jdx in idx_by_l[l]: if l == 0: spherical_dm[idx,jdx] = dm[idx,jdx] else: trace = 0 - for m in range(2*l+1): + for m in range(msize): trace += dm[idx+m,jdx+m] - for m in range(2*l+1): - spherical_dm[idx+m,jdx+m] = trace / (2*l+1) + for m in range(msize): + spherical_dm[idx+m,jdx+m] = trace / msize return spherical_dm - -def get_converged_mf(mol, func, dm0=None): - """ - - .. todo:: - Write the complete docstring, and merge with get_converged_dm() - """ - - if mol.multiplicity == 1: - mf = dft.RKS(mol) - else: - mf = dft.UKS(mol) - mf.xc = func - mf.kernel(dm0=dm0) - dm = mf.make_rdm1() - return (mf, dm) - diff --git a/qstack/fields/dori.py b/qstack/fields/dori.py index 50c82d5e..e911802f 100644 --- a/qstack/fields/dori.py +++ b/qstack/fields/dori.py @@ -1,33 +1,33 @@ +"""Density Overlap Regions Indicator (DORI) computation.""" + import numpy as np +from tqdm import trange from pyscf.dft.numint import eval_ao, _dot_ao_dm, _contract_rho from pyscf.tools.cubegen import Cube, RESOLUTION, BOX_MARGIN from .dm import make_grid_for_rho -from tqdm import tqdm def eval_rho_dm(mol, ao, dm, deriv=2): - r'''Calculate the electron density and the density derivatives. + """Compute electron density and its derivatives from a density matrix. - Taken from pyscf/dft/numint.py and modified to return second derivative matrices. + Modified from pyscf/dft/numint.py to return full second derivative matrices + needed for DORI calculations. Args: - mol : an instance of :class:`pyscf.gto.Mole` - ao : 3D array of shape (*,ngrids,nao): - ao[0] : atomic oribitals values on the grid - ao[1:4] : atomic oribitals derivatives values (if deriv>=1) - ao[4:10] : atomic oribitals second derivatives values (if deriv==2) - dm : 2D array of (nao,nao) - Density matrix (assumed Hermitian) - Kwargs: - deriv : int - Compute with up to `deriv`-order derivatives + mol (pyscf.gto.Mole): pyscf Mole object. + ao (numpy ndarray): 3D array of shape (*, ngrids, nao) where + - ao[0]: atomic orbital values on the grid, + - ao[1:4]: first derivatives (if deriv>=1), + - ao[4:10]: second derivatives in upper triangular form (if deriv==2). + dm (numpy ndarray): Density matrix in AO basis. + deriv (int): Maximum derivative order to compute (0, 1, or 2). Defaults to 2. Returns: - 1D array of size ngrids to store electron density - 2D array of (3,ngrids) to store density derivatives (if deriv>=1) - 3D array of (3,3,ngrids) to store 2nd derivatives (if deriv==2) - ''' - + tuple: Depending on deriv value: + - deriv=0: rho as (ngrids,) numpy ndarray, + - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) numpy ndarray, + - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) numpy ndarray. + """ AO, dAO_dr, d2AO_dr2 = np.split(ao, [1,4]) DM_AO = _dot_ao_dm(mol, AO[0], dm, None, None, None) rho = _contract_rho(AO[0], DM_AO) @@ -44,7 +44,7 @@ def eval_rho_dm(mol, ao, dm, deriv=2): if deriv==2: DM_dAO_dr_i = 2 * _dot_ao_dm(mol, dAO_dr[i], dm, None, None, None) for j in range(i, 3): - d2rho_dr2[i,j] = _contract_rho(dAO_dr[j], DM_dAO_dr_i) + 2.0*np.einsum('ip,ip->i', d2AO_dr2[triu_idx[(i,j)]], DM_AO) + d2rho_dr2[i,j] = _contract_rho(dAO_dr[j], DM_dAO_dr_i) + 2.0*np.einsum('ip,ip->i', d2AO_dr2[triu_idx[i,j]], DM_AO) d2rho_dr2[j,i] = d2rho_dr2[i,j] if deriv==1: @@ -53,26 +53,22 @@ def eval_rho_dm(mol, ao, dm, deriv=2): def eval_rho_df(ao, c, deriv=2): - r'''Calculate the electron density and the density derivatives - for a fitted density. + """Compute electron density and its derivatives from density-fitting coefficients. Args: - ao : 3D array of shape (*,ngrids,nao): - ao[0] : atomic oribitals values on the grid - ao[1:4] : atomic oribitals derivatives values (if deriv>=1) - ao[4:10] : atomic oribitals second derivatives values (if deriv==2) - c : 1D array of (nao,) - density fitting coefficients - Kwargs: - deriv : int - Compute with up to `deriv`-order derivatives + ao (numpy ndarray): 3D array of shape (*, ngrids, nao) where: + - ao[0]: atomic orbital values on the grid, + - ao[1:4]: first derivatives (if deriv>=1), + - ao[4:10]: second derivatives in upper triangular form (if deriv==2). + c (numpy ndarray): 1D array of density fitting/expansion coefficients. + deriv (int): Maximum derivative order to compute (0, 1, or 2). Defaults to 2. Returns: - 1D array of size ngrids to store electron density - 2D array of (3,ngrids) to store density derivatives (if deriv>=1) - 3D array of (3,3,ngrids) to store 2nd derivatives (if deriv==2) - ''' - + tuple: Depending on deriv value: + - deriv=0: rho as (ngrids,) numpy ndarray, + - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) numpy ndarray, + - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) numpy ndarray. + """ maxdim = 1 if deriv==0 else (4 if deriv==1 else 10) rho_all = np.tensordot(ao[:maxdim], c, 1) # corresponds to np.einsum('xip,p->xi', ao[:maxdim], c) if deriv==0: @@ -87,27 +83,28 @@ def eval_rho_df(ao, c, deriv=2): def compute_rho(mol, coords, dm=None, c=None, deriv=2, eps=1e-4): - r'''Wrapper to calculate the electron density and the density derivatives. + """Calculate electron density and derivatives efficiently. + + Computes density and its spatial derivatives on a grid from either a density + matrix or fitting coefficients, with optimizations for numerical stability. Args: - mol : an instance of :class:`pyscf.gto.Mole` - coords : 2D array of (ngrids,3) - Grid coordinates (in Bohr) - Kwargs: - dm : 2D array of (nao,nao) - Density matrix (assumed Hermitian) (confilicts with c) - c : 1D array of (nao) - density fitting coefficients (confilicts with dm) - deriv : int - Compute with up to `deriv`-order derivatives - eps : float - Min. density to compute the derivatives for + mol (pyscf.gto.Mole): pyscf Mole object. + coords (numpy ndarray): 2D array (ngrids, 3) of grid coordinates in Bohr. + dm (numpy ndarray, optional): 2D density matrix in AO basis. Conflicts with c. + c (numpy ndarray, optional): 1D density fitting coefficients. Conflicts with dm. + deriv (int): Maximum derivative order (0, 1, or 2). Defaults to 2. + eps (float): Minimum density threshold below which derivatives are set to zero. Defaults to 1e-4. Returns: - 1D array of size ngrids to store electron density - 2D array of (3,ngrids) to store density derivatives (if deriv>=1) - 3D array of (3,3,ngrids) to store 2nd derivatives (if deriv==2) - ''' + tuple: Depending on deriv value: + - deriv=0: rho as (ngrids,) numpy ndarray, + - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) numpy ndarray, + - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) numpy ndarray. + + Raises: + RuntimeError: If both or neither of dm and c are provided. + """ if (c is None)==(dm is None): raise RuntimeError('Use either density matrix (dm) or density fitting coefficients (c)') if dm is not None: @@ -135,19 +132,19 @@ def compute_rho(mol, coords, dm=None, c=None, deriv=2, eps=1e-4): def compute_s2rho(rho, d2rho_dr2, eps=1e-4): - """Compute the sign of 2nd eigenvalue of density Hessian × density + """Compute signed density based on second eigenvalue of the density Hessian. + + Useful for distinguishing bonding vs. non-bonding regions. The sign of the + second eigenvalue of the Hessian indicates local density topology. Args: - rho : 1D array of (ngrids) - Electron density - d2rho_dr2 : 3D array of (3,3,ngrids) - Density 2nd derivatives - Kwargs: - eps : float - density threshold + rho (numpy ndarray): 1D array (ngrids,) of electron density values. + d2rho_dr2 (numpy ndarray): 3D array (3, 3, ngrids) of density second derivatives (Hessian). + eps (float): Density threshold below which values are set to zero. Defaults to 1e-4. + Returns: - 1D array of (ngrids) --- electron density * sgn(second eigenvalue of d^2rho/dr^2) - if density>=eps else 0 + numpy ndarray: 1D array (ngrids,) containing rho * sign(λ₂) where λ₂ is the + second eigenvalue of the Hessian, or 0 where rho < eps. """ s2rho = np.zeros_like(rho) idx = np.where(rho>=eps) @@ -156,40 +153,17 @@ def compute_s2rho(rho, d2rho_dr2, eps=1e-4): def compute_dori(rho, drho_dr, d2rho_dr2, eps=1e-4): - r""" Inner function to compute DORI analytically + """Compute Density Overlap Regions Indicator (DORI) analytically. Args: - rho : 1D array of (ngrids) - Electron density - drho_dr : 2D array of (3,ngrids) - Density derivatives - d2rho_dr2 : 3D array of (3,3,ngrids) - Density 2nd derivatives - Kwargs: - eps : float - Density threshold (if |rho|=eps)[0] k = drho_dr[...,idx] / rho[idx] k2 = np.einsum('xi,xi->i', k, k) @@ -207,28 +181,24 @@ def compute_dori(rho, drho_dr, d2rho_dr2, eps=1e-4): def compute_dori_num(mol, coords, dm=None, c=None, eps=1e-4, dx=1e-4): - r""" Inner function to compute DORI seminumerically - See documentation to compute_dori(). + """Compute DORI using numerical differentiation (semi-numerical approach). + + Alternative to analytical DORI calculation using finite differences for + derivatives of k², where k=dρ/dr. Useful for validation or when analytical + gradients are problematic. Args: - mol : an instance of :class:`pyscf.gto.Mole` - coords : 2D array of (ngrids,3) - Grid coordinates (in Bohr) - Kwargs: - dm : 2D array of (nao,nao) - Density matrix (assumed Hermitian) (confilicts with c) - c : 1D array of (nao) - density fitting coefficients (confilicts with dm) - eps : float - Density threshold (if |rho|=eps else 0 (only with alg='analytical'). + tuple: (dori, rho, s2rho) where: + - dori (numpy ndarray): 1D array (ngrids,) of DORI values + - rho (numpy ndarray): 1D array (ngrids,) of electron density + - s2rho (numpy ndarray): 1D array (ngrids,) of signed density (None if numerical) """ - max_size = int(mem * 2**30) # mem * 1 GiB point_size = 10 * mol.nao * np.float64().itemsize # memory needed for 1 grid point dgrid = max_size // point_size - grid_chunks = range(0, len(coords), dgrid) - if progress: - grid_chunks = tqdm(grid_chunks) + grid_chunks = trange(0, len(coords), dgrid, disable=not progress) rho = np.zeros(len(coords)) @@ -325,53 +285,55 @@ def dori(mol, dm=None, c=None, nx=80, ny=80, nz=80, resolution=RESOLUTION, margin=BOX_MARGIN, cubename=None, dx=1e-4, mem=1, progress=False): - """Compute DORI + """High-level interface to compute DORI with automatic grid generation and file output. + + Reference: + P. de Silva, C. Corminboeuf, + "Simultaneous visualization of covalent and noncovalent interactions using regions of density overlap", + J. Chem. Theory Comput. 10, 3745–3756 (2014), doi:10.1021/ct500490b. + + Computes the Density Overlap Regions Indicator (DORI). + Automatically generates appropriate grids and optionally saves results + to cube files for visualization. + + DORI is a density-based descriptor for identifying covalent bonding regions, + with values close to 1 indicating strong electron sharing (covalent bonds). + + DORI(r) = γ(r) = θ(r) / (1 + θ(r)), where: + θ = |∇(k²)|² / |k|⁶, and k(r) = ∇ρ(r) / ρ(r) Args: - mol : an instance of :class:`pyscf.gto.Mole` - Kwargs: - dm : 2D array of (nao,nao) - Density matrix (confilicts with c) - c : 1D array of (nao) - Density fitting coefficients (confilicts with dm) - eps : float - density threshold for DORI - alg : str - [a]nalytical or [n]umerical computation - grid_type : str - Type of grid, 'dft' for a DFT grid and 'cube' for a cubic grid. - grid_level : int - For a DFT grid, the grid level. - nx, ny, nz : int - For a cubic grid, - the number of grid point divisions in x, y, z directions. - Conflicts to keyword resolution. - resolution: float - For a cubic grid, - the resolution of the mesh grid in the cube box. - Conflicts to keywords nx, ny, nz. - cubename : str - For a cubic grid, - name for the cube files to save the results to. - mem : float - max. memory (GiB) that can be allocated to compute - the AO and their derivatives - dx : float - Step (in Bohr) to take the numerical derivatives - progress : bool - if print a progress bar + mol (pyscf.gto.Mole): pyscf Mole object. + dm (numpy ndarray, optional): 2D density matrix in AO basis. Conflicts with c. + c (numpy ndarray, optional): 1D density fitting coefficients. Conflicts with dm. + eps (float): Density threshold for DORI. Defaults to 1e-4. + alg (str): Algorithm: 'analytical' or 'numerical'. Defaults to 'analytical'. + grid_type (str): Grid type: 'dft' for DFT quadrature grid or 'cube' for uniform grid. Defaults to 'dft'. + grid_level (int): For DFT grid, the grid level (higher = more points). Defaults to 1. + nx (int): For cube grid, number of points in x direction. Defaults to 80. + ny (int): For cube grid, number of points in y direction. Defaults to 80. + nz (int): For cube grid, number of points in z direction. Defaults to 80. + resolution (float): For cube grid, grid spacing in Bohr. Conflicts with nx/ny/nz. + margin (float): For cube grid, extra space around molecule in Bohr. Defaults to BOX_MARGIN. + cubename (str, optional): For cube grid, base filename for output cube files. If None, no files saved. + dx (float): For numerical algorithm, finite difference step in Bohr. Defaults to 1e-4. + mem (float): Maximum memory in GiB for AO evaluation. Defaults to 1. + progress (bool): If True, displays progress bar. Defaults to False. Returns: - Tuple of: - 1D array of (ngrids) --- computed DORI - 1D array of (ngrids) --- electron density - 1D array of (ngrids) --- electron density * sgn(second eigenvalue of d^2rho/dr^2) - if density>=eps else 0 (only with alg='analytical'). - 2D array of (ngrids,3) --- grid coordinates - 1D array of (ngrids) --- grid weights - + tuple: (dori, rho, s2rho, coords, weights) containing: + - dori (numpy ndarray): 1D array of DORI values + - rho (numpy ndarray): 1D array of electron density + - s2rho (numpy ndarray): 1D array of signed density (None if numerical) + - coords (numpy ndarray): 2D array (ngrids, 3) of grid coordinates + - weights (numpy ndarray): 1D array of grid weights + + Note: + When cubename is provided with cube grid, creates three files: + - .dori.cube: DORI values + - .rho.cube: electron density + - .sgnL2rho.cube: signed density (analytical only) """ - if grid_type=='dft': grid = make_grid_for_rho(mol, grid_level=grid_level) weights = grid.weights diff --git a/qstack/fields/excited.py b/qstack/fields/excited.py index 743c51fe..868a323e 100644 --- a/qstack/fields/excited.py +++ b/qstack/fields/excited.py @@ -1,11 +1,18 @@ +"""Excited state density and property analysis.""" + import numpy as np from . import moments + def get_cis(mf, nstates): - """ + """Run a CIS (Configuration interaction singles) / TDA (Tamm-Dancoff approximation) computation. - .. todo:: - Write the complete docstring. + Args: + mf: Pyscf mean-field object. + nstates (int): Number of excited states to compute. + + Returns: + TDA object: Converged TDA/CIS computation object with excited state information. """ td = mf.TDA() td.nstates = nstates @@ -14,26 +21,30 @@ def get_cis(mf, nstates): td.analyze() return td + def get_cis_tdm(td): - """ + """Extract transition density matrices from TDA/CIS calculation. + + Args: + td: TDA/CIS calculation object containing excitation amplitudes. - .. todo:: - Write the complete docstring. + Returns: + numpy ndarray: Array of transition density matrices for all computed states. """ - return np.sqrt(2.0) * np.array([ xy[0] for xy in td.xy ]) + return np.sqrt(2.0) * np.array([xy[0] for xy in td.xy]) + def get_holepart(mol, x, coeff): - """Computes the hole and particle density matrices (atomic orbital basis) of selected states. + """Compute the hole and particle density matrices (in AO basis) for a selected state. Args: mol (pyscf Mole): pyscf Mole object. - x (numpy ndarray): Response vector (nstates×occ×virt) normalized to 1. + x (numpy ndarray): Response vector (occ×virt) normalized to 1. coeff (numpy ndarray): Ground-state molecular orbital vectors. Returns: Two numpy ndarrays containing the hole density matrices and the particle density matrices respectively. """ - def mo2ao(mat, coeff): return np.dot(coeff, np.dot(mat, coeff.T)) occ = mol.nelectron//2 @@ -43,45 +54,48 @@ def mo2ao(mat, coeff): part_ao = mo2ao(part_mo, coeff[:,occ:]) return hole_ao, part_ao + def get_transition_dm(mol, x_mo, coeff): - """ Compute the Transition Density Matrix. + """Compute the transition density matrix for a selected state. Args: mol (pyscf Mole): pyscf Mole object. - x_mo (numpy ndarray): Response vector (nstates×occ×virt) normalized to 1. + x_mo (numpy ndarray): Response vector (occ×virt) normalized to 1. coeff (numpy ndarray): Ground-state molecular orbital vectors. Returns: - A numpy ndarray containing the Transition Density Matrix. + numpy ndarray: transition density matrix. """ - occ = mol.nelectron//2 x_ao = coeff[:,:occ] @ x_mo @ coeff[:,occ:].T return x_ao def exciton_properties_c(mol, hole, part): - """ Computes the decomposed/predicted hole-particle distance, the hole size and the particle size. + """Compute the decomposed/predicted hole-particle distance, the hole size, and the particle size. Args: mol (pyscf Mole): pyscf Mole object. - hole (numpy ndarray): Hole density matrix. - part (numpy ndarray): Particle density matrix. + hole (numpy ndarray): Hole AO density decomposition coefficiants. + part (numpy ndarray): Particle density decomposition coefficiants. Returns: - Three floats: the hole-particle distance, the hole size, and the particle size respectively. + Tuple of floats: + - hole-particle distance, + - hole size, + - particle size. """ - - _hole_N, hole_r, hole_r2 = moments.r2_c(hole, mol) - _part_N, part_r, part_r2 = moments.r2_c(part, mol) + _hole_N, hole_r, hole_r2 = moments.r2_c(mol, hole) + _part_N, part_r, part_r2 = moments.r2_c(mol, part) dist = np.linalg.norm(hole_r-part_r) hole_extent = np.sqrt(hole_r2-hole_r@hole_r) part_extent = np.sqrt(part_r2-part_r@part_r) - return(dist, hole_extent, part_extent) + return dist, hole_extent, part_extent + def exciton_properties_dm(mol, hole, part): - """Computes the ab initio hole-particle distance, the hole size and the particle size. + """Compute the ab initio hole-particle distance, the hole size, and the particle size. Args: mol (pyscf Mole): pyscf Mole object. @@ -89,9 +103,11 @@ def exciton_properties_dm(mol, hole, part): part (numpy ndarray): Particle density matrix. Returns: - Three floats: the hole-particle distance, the hole size, and the particle size respectively. + Tuple of floats: + - hole-particle distance, + - hole size, + - particle size. """ - with mol.with_common_orig((0,0,0)): ao_r = mol.intor_symmetric('int1e_r', comp=3) ao_r2 = mol.intor_symmetric('int1e_r2') @@ -104,20 +120,28 @@ def exciton_properties_dm(mol, hole, part): dist = np.linalg.norm(hole_r-part_r) hole_extent = np.sqrt(hole_r2-hole_r@hole_r) part_extent = np.sqrt(part_r2-part_r@part_r) - return(dist, hole_extent, part_extent) + return dist, hole_extent, part_extent + def exciton_properties(mol, hole, part): - """Computes the ab initio or decomposed/predicted hole-particle distance, the hole size and the particle size according to the number of dimensions of the density matrices. + """Compute the ab initio or decomposed/predicted hole-particle distance, the hole size, and the particle size. + + Distance is defined as |_hole - _part|, and size as sqrt( - ^2). Args: mol (pyscf Mole): pyscf Mole object. - hole (numpy ndarray): Hole density matrix. - part (numpy ndarray): Particle density matrix. + hole (numpy ndarray): Hole density matrix in AO basis (2D) or decomposition coefficients (1D). + part (numpy ndarray): Particle density matrix in AO basis (2D) or decomposition coefficients (1D). Returns: - The hole-particle distance, the hole size, and the particle size as floats. - """ + Tuple of floats: + - hole-particle distance, + - hole size, + - particle size. + Raises: + RuntimeError: If the dimensions of hole and part do not match or are not 1D or 2D. + """ if hole.ndim==1 and part.ndim==1: return exciton_properties_c(mol, hole, part) elif hole.ndim==2 and part.ndim==2: diff --git a/qstack/fields/hf_otpd.py b/qstack/fields/hf_otpd.py index fcaaa95d..850cfb79 100644 --- a/qstack/fields/hf_otpd.py +++ b/qstack/fields/hf_otpd.py @@ -1,21 +1,27 @@ +"""Hartree-Fock on-top pair density.""" + from pyscf import dft import numpy as np from .dm import make_grid_for_rho -def hf_otpd(mol, dm, grid_level = 3, save_otpd = False, return_all = False): - """Computes the uncorrelated on-top pair density on a grid. + +def hf_otpd(mol, dm, grid_level=3, save_otpd=False, return_all=False): + """Compute the Hartree-Fock uncorrelated on-top pair density (OTPD) on a grid. + + The on-top pair density is the probability density of finding two electrons + at the same position. For Hartree-Fock, this is computed as (rho/2)^2. Args: mol (pyscf Mole): pyscf Mole object. - dm (numpy ndarray): Density matrix in AO-basis. - grid_level (int): Controls the number of radial and angular points. - save_otpd (bool): If True, saves the input and output in a .npz file. Defaults to False - return_all (bool): If true, returns the uncorrelated on-top pair density on a grid, and the cooresponding pyscf Grid object; if False, returns only the uncorrelated on-top pair density. Defaults to False + dm (numpy ndarray): 2D density matrix in AO-basis. + grid_level (int): DFT grid level controlling number of radial and angular points. Defaults to 3. + save_otpd (bool): If True, saves results to a .npz file. Defaults to False. + return_all (bool): If True, returns both OTPD and grid object; if False, returns only OTPD. Defaults to False. Returns: - A numpy ndarray with the uncorrelated on-top pair density on a grid. If 'return_all' = True, then it also returns the corresponding pyscf Grid object. + numpy ndarray or tuple: If return_all is False, returns 1D array of OTPD values. + If return_all is True, returns tuple of (otpd, grid) where grid is the pyscf Grid object. """ - grid = make_grid_for_rho(mol, grid_level) ao = dft.numint.eval_ao(mol, grid.coords) @@ -28,17 +34,22 @@ def hf_otpd(mol, dm, grid_level = 3, save_otpd = False, return_all = False): if return_all: return hf_otpd, grid - return hf_otpd + def save_OTPD(mol, otpd, grid): - """ Saves the information about an OTPD computation into a .npz file. + """Save on-top pair density computation results to a NumPy compressed file. + + Creates a .npz file containing the molecular structure, OTPD values, + grid coordinates, and integration weights for later analysis. Args: mol (pyscf Mole): pyscf Mole object. - otpd (numpy ndarray): On-top pair density on a grid. - grid (pyscf Grid): Grid object - """ + otpd (numpy ndarray): 1D array of on-top pair density values on the grid. + grid (pyscf Grid): Grid object containing coordinates and weights. + Output: + Creates a file named _otpd_data.npz on disk. + """ output = ''.join(mol.elements)+"_otpd_data" np.savez(output, atom=mol.atom, rho=otpd, coords=grid.coords, weights=grid.weights) diff --git a/qstack/fields/hirshfeld.py b/qstack/fields/hirshfeld.py index 5819cdd8..81038c65 100644 --- a/qstack/fields/hirshfeld.py +++ b/qstack/fields/hirshfeld.py @@ -1,45 +1,53 @@ +"""Hirshfeld partitioning and atomic charge analysis.""" + import numpy as np import pyscf from .dm import make_grid_for_rho def spherical_atoms(elements, atm_bas): - """Get density matrices for spherical atoms. + """Compute density matrices for spherically averaged isolated atoms. + + For each element, creates an isolated atom calculation with appropriate spin + and computes its density matrix using atomic Hartree-Fock initial guess. Args: - elements (list of str): Elements to compute the DM for. - atm_bas (string / pyscf basis dictionary): Basis to use. + elements (list of str or set): Element symbols to compute density matrices for. + atm_bas (str or dict): Basis set name (e.g., 'def2-svp') or pyscf basis dictionary. Returns: - A dict of numpy 2d ndarrays which contains the atomic density matrices for each element with its name as a key. + dict: Dictionary mapping element symbols (str) to atomic density matrices (numpy 2D ndarrays). """ - dm_atoms = {} for q in elements: mol_atm = pyscf.gto.M(atom=[[q, [0,0,0]]], spin=pyscf.data.elements.ELEMENTS_PROTON[q]%2, basis=atm_bas) dm_atoms[q] = pyscf.scf.hf.init_guess_by_atom(mol_atm) return dm_atoms -def _hirshfeld_weights(mol_full, grid_coord, atm_dm, atm_bas, dominant): - """ Computes the Hirshfeld weights. + +def _hirshfeld_weights(mol, grid_coord, atm_dm, atm_bas, dominant): + """Compute Hirshfeld partitioning weights for each atom at grid points. + + Hirshfeld partitioning divides the molecular density among atoms based on + their promolecular (free atom) densities. Dominant partitioning assigns + each grid point exclusively to the atom with the highest weight. Args: mol (pyscf Mole): pyscf Mole object. - grid_coord (numpy ndarray): Coordinates of the grid. - dm_atoms (dict of numpy 2d ndarrays): Atomic density matrices (output of the `spherical_atoms` fn). - atm_bas (string / pyscf basis dictionary): Basis set used to compute dm_atoms. - dominant (bool): Whether to use dominant or classical partitioning. + grid_coord (numpy ndarray): 2D array (ngrids, 3) of grid point coordinates in Bohr. + atm_dm (dict): Dictionary mapping element symbols to atomic density matrices from `spherical_atoms`. + atm_bas (str or dict): Basis set name or dictionary used for atomic density matrices. + dominant (bool): If True, uses dominant (all-or-nothing) partitioning; if False, uses standard Hirshfeld weights. Returns: - A numpy ndarray containing the computed Hirshfeld weights. + numpy ndarray: 2D array (natm, ngrids) of partitioning weights for each atom at each grid point. """ - # promolecular density grid_n = len(grid_coord) - rho_atm = np.zeros((mol_full.natm, grid_n), dtype=float) - for i in range(mol_full.natm): - q = mol_full._atom[i][0] - mol_atm = pyscf.gto.M(atom=mol_full._atom[i:i+1], basis=atm_bas, spin=pyscf.data.elements.ELEMENTS_PROTON[q]%2, unit='Bohr') + rho_atm = np.zeros((mol.natm, grid_n), dtype=float) + for i in range(mol.natm): + q = mol._atom[i][0] + mol_atm = pyscf.gto.M(atom=mol._atom[i:i+1], basis=atm_bas, spin=pyscf.data.elements.ELEMENTS_PROTON[q]%2, unit='Bohr') ao_atm = pyscf.dft.numint.eval_ao(mol_atm, grid_coord) rho_atm[i] = pyscf.dft.numint.eval_rho(mol_atm, ao_atm, atm_dm[q]) @@ -47,14 +55,13 @@ def _hirshfeld_weights(mol_full, grid_coord, atm_dm, atm_bas, dominant): rho = rho_atm.sum(axis=0) idx = np.where(rho > 0)[0] h_weights = np.zeros_like(rho_atm) - for i in range(mol_full.natm): + for i in range(mol.natm): h_weights[i,idx] = rho_atm[i,idx] /rho[idx] if dominant: - # get dominant hirshfeld weights for point in range(grid_n): i = np.argmax(h_weights[:,point]) - h_weights[:,point] = np.zeros(mol_full.natm) + h_weights[:,point] = np.zeros(mol.natm) h_weights[i,point] = 1.0 return h_weights @@ -62,23 +69,33 @@ def _hirshfeld_weights(mol_full, grid_coord, atm_dm, atm_bas, dominant): def hirshfeld_charges(mol, cd, dm_atoms=None, atm_bas=None, dominant=True, occupations=False, grid_level=3): - """Fit molecular density onto an atom-centered basis. + """Compute atomic charges or occupations using Hirshfeld partitioning. + + Partitions the molecular electron density among atoms using Hirshfeld weights + based on free atom densities. Can work with either density-fitting coefficients + or density matrices, and supports both standard and dominant partitioning. Args: mol (pyscf Mole): pyscf Mole object. - cd (1D or 2D numpy ndarray or list of arrays): Density-fitting coefficients / density matrices. - dm_atoms (dict of numpy 2d ndarrays): Atomic density matrices (output of the `spherical_atoms` fn). - If None, is computed on-the-fly. - atm_bas (string / pyscf basis dictionary): Basis set used to compute dm_atoms. - If None, is taken from mol. - dominant (bool): Whether to use dominant or classical partitioning. - occupations (bool): Whether to return atomic occupations or charges. - grid level (int): Grid level for numerical integration. + cd (numpy ndarray or list): Density representation as: + - 1D array: Density-fitting coefficients + - 2D array: Density matrix in AO basis + - list: Multiple densities (returns list of results). + dm_atoms (dict, optional): Pre-computed atomic density matrices from `spherical_atoms`. + If None, computed automatically. Defaults to None. + atm_bas (str or dict, optional): Basis set for atomic density matrices. + If None, uses mol.basis. Defaults to None. + dominant (bool): If True, uses dominant (all-or-nothing) partitioning; + if False, uses standard Hirshfeld weights. Defaults to True. + occupations (bool): If True, returns atomic electron populations; + if False, returns atomic charges (Z - N). Defaults to False. + grid_level (int): DFT grid level for numerical integration. Defaults to 3. Returns: - A numpy 1d ndarray or list of them containing the computed atomic charges or occupations. + numpy ndarray or list: Atomic charges or occupations. + - Single 1D array if cd is a single density. + - List of 1D arrays if cd is a list of densities. """ - def atom_contributions(cd, ao, tot_weights): if cd.ndim==1: tmp = np.einsum('i,xi->x', cd, ao) @@ -87,10 +104,7 @@ def atom_contributions(cd, ao, tot_weights): return np.einsum('x,ax->a', tmp, tot_weights) # check input - if type(cd) is list: - cd_list = cd - else: - cd_list = [cd] + cd_list = cd if type(cd) is list else [cd] # spherical atoms if atm_bas is None: @@ -111,7 +125,4 @@ def atom_contributions(cd, ao, tot_weights): if not occupations: charges_list = [mol.atom_charges()-charges for charges in charges_list] - if type(cd) is list: - return charges_list - else: - return charges_list[0] + return charges_list if type(cd) is list else charges_list[0] diff --git a/qstack/fields/moments.py b/qstack/fields/moments.py index 9d793b6d..f784d6a2 100644 --- a/qstack/fields/moments.py +++ b/qstack/fields/moments.py @@ -1,106 +1,171 @@ +"""Multipole moment computation functions.""" + import numpy as np +from qstack.compound import basis_flatten +from qstack.mathutils.array import safe_divide, scatter + def first(mol, rho): - """ Computes the transition dipole moments. + r"""Compute the first moment of a molecular density needed for dipole moments. + + $$\int r \rho(r) dr$$ Args: mol (pyscf Mole): pyscf Mole object. - rho (numpy ndarray): Density Matrix (trnasition if given ) or fitting coefficnts for the same matrix. + rho (numpy ndarray): 2D (mol.nao×mol.nao) density matrix or 1D (mol.nao) fitting coefficients. Returns: - A numpy ndarray with the transition dipole moments. If rho is a 1D matrix, returns the Decomposed/predicted transition dipole moments; if rho is a 2D matrix, returns the ab initio transition dipole moments. + numpy ndarray: Electronic dipole moment vector (3 components). + + Raises: + RuntimeError: If `rho` is not 1D or 2D. """ if rho.ndim==1: - return r_c(mol, rho) #coefficient + return r2_c(mol, rho, moments=(1,))[0] elif rho.ndim==2: - return r_dm(mol, rho) #matrix + return r_dm(mol, rho) else: - raise RuntimeError('Dimension mismatch') + raise RuntimeError(f'Dimension mismatch {rho.shape}') def r_dm(mol, dm): - """ + """Compute the first moment of a density matrix. + + Args: + mol (pyscf Mole): pyscf Mole object. + dm (numpy ndarray): 2D density matrix in AO basis. - .. todo:: - write docstring. + Returns: + numpy ndarray: Electronic dipole moment vector (3 components). """ with mol.with_common_orig((0,0,0)): ao_dip = mol.intor_symmetric('int1e_r', comp=3) el_dip = np.einsum('xij,ji->x', ao_dip, dm) return el_dip -def r_c(mol, rho): - """ - .. todo:: - Write docstring, and include uncontracted basis in code and verify formulas - """ - r = np.zeros(3) - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - coord = mol.atom_coords()[iat] - for gto in mol._basis[q]: - l, [a, c] = gto - if(l==0): - I0 = c * (2.0*np.pi/a)**0.75 - r += I0 * rho[i] * coord - i+=1 - elif(l==1): - I1 = c * (2.0*np.pi)**0.75 / (a**1.25) - r += I1 * rho[i:i+3] - i+=3 - else: - i+=2*l+1 - return r -def r2_c(rho, mol): - """Compute the zeroth ( :math:`<1>` ), first ( :math:`` ), and second ( :math:``) moments of electron density distribution. +def r2_c(mol, rho, moments=(0,1,2), per_atom=False): + r"""Compute the zeroth ( :math:`<1>` ), first ( :math:`` ), and second ( :math:``) moments of a fitted density. .. math:: - <1> = \\int \\rho d r - \\quad + <1> = \int \rho d r + \quad ; - \\quad - = \\int \\hat{r} \\rho d r - \\quad + \quad + = \\int \hat{r} \rho d r + \quad ; - \\quad - = \\int \\hat{r}^{2} \\rho d r + \quad + = \int \hat{r}^{2} \rho d r Args: - mol (scipy Mole): scipy Mole object. + mol (pyscf Mole): pyscf Mole object. + rho (numpy ndarray): 1D array of density-fitting coefficients. Can be None to compute AO integrals instead. + moments (tuple): Moments to compute (0, 1, and/or 2). + per_atom (bool): If return AO integrals / moments per atom. Returns: - The zeroth, first, and second moments of electron density distribution. + tuple: If rho!=None, values representing the requested moments, possibly containing: + - float: Zeroth moment (integrated density). + - numpy ndarray: First moment (3-component dipole vector). + - float: Second moment (mean square radius). + If rho is None, arrays representing the requested moments in AO basis so that + they can be contracted with the coefficients usin (returned array)@(rho). + + if rho is None and per_atom is True: + 0st moment: (mol.nao, mol.natm) + 1st moment: (3, mol.nao, mol.natm) + 2nd moment: (mol.nao, mol.natm) + + if rho is None and per_atom is False: + 0st moment: (mol.nao,) + 1st moment: (3, mol.nao) + 2nd moment: (mol.nao,) - .. todo:: - Include uncontracted basis in code and verify formulas + if rho is not None and per_atom is True: + 0st moment: (mol.natm,) + 1st moment: (3, mol.natm) + 2nd moment: (mol.natm,) + + Raises: + NotImplementedError: If a moment > 2 is requested. """ + if max(moments)>2: + raise NotImplementedError('Only moments 0, 1, and 2 are supported.') + ret = {} + + (iat, l, _), (a, c) = basis_flatten(mol) + coords = mol.atom_coords()[iat] + + idx_l0 = np.where(l==0)[0] + ta = safe_divide(2.0*np.pi, a[idx_l0])**0.75 + I0 = (c[idx_l0] * ta).sum(axis=1) + if rho is None: + if 0 in moments: + moments_ao = np.zeros(mol.nao) + moments_ao[idx_l0] = I0 + if per_atom: + ret[0] = scatter(moments_ao, iat) + else: + ret[0] = moments_ao + + else: + t0 = rho[idx_l0] * I0 + if 0 in moments: + if per_atom: + ret[0] = np.zeros(mol.natm) + np.add.at(ret[0], iat[idx_l0], t0) + else: + ret[0] = t0.sum() - N = 0.0 # <1> zeroth - r = np.zeros(3) # first - r2 = 0.0 # second moments electron density distribution - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - coord = mol.atom_coords()[iat] - for gto in mol._basis[q]: - l, [a, c] = gto - if(l==0): - I0 = c * (2.0*np.pi/a)**0.75 - I2 = c * 3.0 * (np.pi**0.75) / (a**1.75 * 2.0**0.25) - N += I0 * rho[i] - r += I0 * rho[i] * coord - r2 += I0 * rho[i] * (coord@coord) - r2 += I2 * rho[i] - i+=1 - elif(l==1): - I1 = c * (2.0*np.pi)**0.75 / (a**1.25) - temp = I1 * rho[i:i+3] - r += temp - r2 += 2.0*(temp@coord) - i+=3 + if 1 in moments or 2 in moments: + idx_l1 = np.where(l==1)[0] + I1 = (c[idx_l1] * safe_divide((2.0*np.pi)**0.75, a[idx_l1]**1.25)).sum(axis=1) + mask = np.tile([[1,0,0,0,1,0,0,0,1]], len(I1)//3).reshape(-1,3).T + I1 = I1*mask + if rho is not None: + t1 = (rho[idx_l1]*I1).T + + if 1 in moments: + if rho is None: + moments_ao = np.zeros((3, mol.nao)) + moments_ao[:,idx_l0] = I0 * coords[idx_l0].T + moments_ao[:,idx_l1] = I1 + moments_ao = moments_ao + if per_atom: + ret[1] = scatter(moments_ao, iat) + else: + ret[1] = moments_ao + else: + if per_atom: + ret[1] = np.zeros((3, mol.natm)) + np.add.at(ret[1], iat[idx_l0], coords[idx_l0] * t0[:,None]) + np.add.at(ret[1], iat[idx_l1], t1) + else: + ret[1] = (t0 * coords[idx_l0].T).sum(axis=1) \ + + t1.sum(axis=0) + + if 2 in moments: + I2 = (c[idx_l0] * ta * safe_divide(1.5, a[idx_l0])).sum(axis=1) + if rho is None: + moments_ao = np.zeros(mol.nao) + moments_ao[idx_l0] = I2 + (I0 * (coords[idx_l0]**2).sum(axis=1)) + moments_ao[idx_l1] = 2.0 * (I1.T * coords[idx_l1]).sum(axis=1) + if per_atom: + ret[2] = scatter(moments_ao, iat) else: - i+=2*l+1 - return N, r, r2 + ret[2] = moments_ao + + else: + if per_atom: + ret[2] = np.zeros(mol.natm) + np.add.at(ret[2], iat[idx_l0], t0 * (coords[idx_l0]**2).sum(axis=1)) + np.add.at(ret[2], iat[idx_l0], rho[idx_l0] * I2) + np.add.at(ret[2], iat[idx_l1], 2.0 * (t1 * coords[idx_l1]).sum(axis=1)) + else: + ret[2] = t0 @ (coords[idx_l0]**2).sum(axis=1) \ + + rho[idx_l0] @ I2 \ + + 2.0 * (t1 * coords[idx_l1]).sum() + + return tuple(ret[i] for i in moments) diff --git a/qstack/mathutils/__init__.py b/qstack/mathutils/__init__.py index dcd5d900..b38ee9ff 100644 --- a/qstack/mathutils/__init__.py +++ b/qstack/mathutils/__init__.py @@ -1,3 +1,3 @@ +"""Mathematical utilities module.""" + from . import fps, matrix -# from . import xyz_integrals_float # hidden by default to make the next part more discoverable -# from . import wigner, xyz_integrals_sym # needs sympy diff --git a/qstack/mathutils/array.py b/qstack/mathutils/array.py new file mode 100644 index 00000000..9647126e --- /dev/null +++ b/qstack/mathutils/array.py @@ -0,0 +1,96 @@ +"""Array manipulation utility functions.""" + +import numpy as np +from qstack.tools import slice_generator + + +def scatter(values, indices): + """Scatter values into a new array based on provided indices. + + Does the same as + ``` + for i, j in enumerate(indices): + x[...,i,j] = values[...,i] + ``` + + Args: + values (numpy.ndarray): Array of values to be scattered of shape (..., N). + indices (numpy.ndarray): Array of indices indicating where to scatter the values of shape (N,). + + Returns: + numpy.ndarray: New array with scattered values of shape (..., N, max(indices)+1). + + + """ + x = np.zeros((*values.shape, max(indices)+1)) + x[...,np.arange(len(indices)),indices] = values + return x + + +def safe_divide(a, b): + """Divide numpy arrays avoiding division by zero. + + Args: + a (numpy.ndarray): Numerator array. + b (numpy.ndarray): Denominator array. + + Returns: + numpy.ndarray: Result of element-wise division of a by b, with zeros where b is zero. + """ + return np.divide(a, b, out=np.zeros_like(b), where=b!=0) + + +def stack_padding(xs): + """Stack arrays with different shapes along a new axis by padding smaller arrays with zeros. + + Analogous to numpy.stack(axis=0). + + Args: + xs (list): List of numpy arrays to be stacked. + + Returns: + numpy.ndarray : A stacked array with shape (len(xs), *max_shape). + + Raises: + ValueError: If input arrays have different number of dimensions. + """ + xs = [np.asarray(x) for x in xs] + if len({x.ndim for x in xs}) > 1: + raise ValueError("All input arrays must have the same number of dimensions.") + shapes = [x.shape for x in xs] + max_size = max(shapes) + if max_size == min(shapes): + return np.stack(xs, axis=0) + X = np.zeros((len(xs), *max_size)) + for i, x in enumerate(xs): + slices = tuple(np.s_[0:s] for s in x.shape) + X[i][slices] = x + return X + + +def vstack_padding(xs): + """Vertically stack arrays with different shapes by padding smaller arrays with zeros. + + 1D input arrays of shape (N,) are reshaped to (1,N). + Analogous to numpy.vstack. + + Args: + xs (list): List of numpy arrays to be stacked. + + Returns: + numpy.ndarray : A stacked array with shape (sum(x.shape[0], *max_shape[1:]). + + Raises: + ValueError: If input arrays have different number of dimensions. + """ + xs = [np.atleast_2d(np.asarray(x)) for x in xs] + if len({x.ndim for x in xs}) > 1: + raise ValueError("All input arrays must have the same number of dimensions.") + shapes_common_axis, shapes_other_axes = np.split(np.array([x.shape for x in xs]), (1,), axis=1) + if len(np.unique(shapes_other_axes, axis=0))==1: + return np.vstack(xs) + X = np.zeros((shapes_common_axis.sum(), *shapes_other_axes.max(axis=0))) + for x, s0 in slice_generator(xs, inc=lambda x: x.shape[0]): + slices = (s0, *(np.s_[0:s] for s in x.shape[1:])) + X[slices] = x + return X diff --git a/qstack/mathutils/fps.py b/qstack/mathutils/fps.py index ec335be0..7c9a16b0 100644 --- a/qstack/mathutils/fps.py +++ b/qstack/mathutils/fps.py @@ -1,7 +1,35 @@ +"""Farthest Point Sampling algorithm implementation.""" + import numpy as np + def do_fps(x, d=0): - # Code from Giulio Imbalzano + """Perform Farthest Point Sampling on a set of points. + + References: + P. O. Dral, A. Owens, S. N. Yurchenko, W. Thiel, + "Structure-based sampling and self-correcting machine learning for accurate calculations of potential energy surfaces and vibrational levels", + J. Chem. Phys. 146 244108 (2017), doi:10.1063/1.4989536. + + G. Imbalzano, A. Anelli, D. Giofré, S. Klees, J. Behler, M. Ceriotti + "Automatic selection of atomic fingerprints and reference configurations for machine-learning potentials", + J. Chem. Phys. 148 241730 (2018), doi:10.1063/1.5024611. + + K. Rossi, V. Jurásková, R. Wischert, L. Garel, C. Corminboeuf, M. Ceriotti + "Simulating solvation and acidity in complex mixtures with first-principles accuracy: the case of CH3SO3H and H2O2 in phenol", + J. Chem. Theory Comput. 16 5139–5149 (2020), doi:10.1021/acs.jctc.0c00362. + + Code from Giulio Imbalzano. + + Args: + x (numpy.ndarray): 2D array of points, shape (n_points, n_features). + d (int): Number of points to sample. If 0, samples all points. Defaults to 0. + + Returns: + tuple: A tuple containing: + - iy (numpy.ndarray): Indices of sampled points. + - measure (numpy.ndarray): Distances to nearest selected point for each iteration. + """ n = len(x) if d==0: d = n diff --git a/qstack/mathutils/matrix.py b/qstack/mathutils/matrix.py index d8bca2ed..5bf60f0d 100644 --- a/qstack/mathutils/matrix.py +++ b/qstack/mathutils/matrix.py @@ -1,13 +1,16 @@ +"""Matrix operation utility functions.""" + import numpy as np + def from_tril(mat_tril): """Restore a symmetric matrix from its lower-triangular form. Args: - mat_tril (numpy 1darray): matrix in a lower-triangular form. + mat_tril (numpy.ndarray): 1D array containing matrix in lower-triangular form. Returns: - A numpy 2darray containing the matrix. + numpy.ndarray: 2D symmetric matrix. """ n = int((np.sqrt(1+8*len(mat_tril))-1)/2) ind = np.tril_indices(n) @@ -16,7 +19,17 @@ def from_tril(mat_tril): mat = mat + mat.T - np.diag(np.diag(mat)) return mat + def sqrtm(m, eps=1e-13): + """Compute the matrix square root of a symmetric matrix. + + Args: + m (numpy.ndarray): 2D symmetric matrix. + eps (float): Threshold for eigenvalues to be considered zero. Defaults to 1e-13. + + Returns: + numpy.ndarray: 2D symmetric matrix, the square root of the input matrix. + """ e, b = np.linalg.eigh(m) e[abs(e) < eps] = 0.0 sm = b @ np.diag(np.sqrt(e)) @ b.T diff --git a/qstack/mathutils/rotation_matrix.py b/qstack/mathutils/rotation_matrix.py new file mode 100644 index 00000000..c4cf40b5 --- /dev/null +++ b/qstack/mathutils/rotation_matrix.py @@ -0,0 +1,76 @@ +"""Rotation matrix generation functions.""" + +import numpy as np + + +def _Rz(a): + """Compute the rotation matrix around laboratory z-axis. + + Args: + a (float): Rotation angle in radians. + + Returns: + numpy.ndarray: 3x3 rotation matrix. + """ + ca, sa = np.cos(a), np.sin(a) + return np.array([ + [ca, -sa, 0], + [sa, ca, 0], + [0, 0, 1], + ]) + + +def _Ry(b): + """Compute the rotation matrix around laboratory y-axis. + + Args: + b (float): Rotation angle in radians. + + Returns: + numpy.ndarray: 3x3 rotation matrix. + """ + cb, sb = np.cos(b), np.sin(b) + return np.array([ + [ cb, 0, sb], + [ 0, 1, 0 ], + [-sb, 0, cb], + ]) + + +def _Rx(g): + """Compute the rotation matrix around laboratory x-axis. + + Args: + g (float): Rotation angle in radians. + + Returns: + numpy.ndarray: 3x3 rotation matrix. + """ + cg, sg = np.cos(g), np.sin(g) + return np.array([ + [1, 0, 0 ], + [0, cg, -sg], + [0, sg, cg], + ]) + + +def rotate_euler(a, b, g, rad=False): + """Compute the rotation matrix given Cardan angles (x-y-z). + + Args: + a (float): Alpha Euler angle. + b (float): Beta Euler angle. + g (float): Gamma Euler angle. + rad (bool): Whether the angles are in radians. Defaults to False (degrees). + + Returns: + numpy.ndarray: 3x3 rotation matrix. + """ + if not rad: + a, b, g = np.radians([a, b, g]) + + A = _Rz(a) + B = _Ry(b) + G = _Rx(g) + + return A@B@G diff --git a/qstack/mathutils/wigner.py b/qstack/mathutils/wigner.py index ef7da4e1..7b822945 100755 --- a/qstack/mathutils/wigner.py +++ b/qstack/mathutils/wigner.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +"""Wigner D-matrices and spherical harmonic transformations.""" + import sys import sympy as sp from .xyz_integrals_sym import xyz as xyzint @@ -14,90 +16,145 @@ yx,yy,yz = symbols('yx yy yz') zx,zy,zz = symbols('zx zy zz') + def real_Y_correct_phase(l, m, theta, phi): - # returns real spherical harmonic in Condon--Shortley phase convention - # (sympy's Znm uses some other convention) - ym1 = Ynm (l, -abs(m), theta, phi) - ym2 = Ynm_c(l, -abs(m), theta, phi) - if m==0: - return ym1 - elif m<0: - return sp.I / sp.sqrt(2) * (ym1 - ym2) - elif m>0: - return 1 / sp.sqrt(2) * (ym1 + ym2) + """Return real spherical harmonic in Condon--Shortley phase convention. + + Note: sympy's Znm uses a different convention. + + Args: + l (int): Orbital angular momentum quantum number. + m (int): Magnetic quantum number. + theta (sympy.Symbol): Polar angle. + phi (sympy.Symbol): Azimuthal angle. + + Returns: + sympy.Expr: Real spherical harmonic expression. + """ + ym1 = Ynm (l, -abs(m), theta, phi) + ym2 = Ynm_c(l, -abs(m), theta, phi) + if m==0: + return ym1 + elif m<0: + return sp.I / sp.sqrt(2) * (ym1 - ym2) + elif m>0: + return 1 / sp.sqrt(2) * (ym1 + ym2) + def get_polynom_Y(l, m): - # rewrites a real spherical harmonic as a polynom of x,y,z - theta = Symbol("theta", real=True) - phi = Symbol("phi", real=True) - r = Symbol('r', nonnegative=True) - expr = real_Y_correct_phase(l,m, theta, phi) * r**l - expr = expand(expr, func=True) - expr = expr.rewrite(sp.cos)#.simplify().trigsimp() - expr = expand_trig(expr) - expr = cancel(expr) - expr = expr.subs({r: sp.sqrt(x*x+y*y+z*z), phi: sp.atan2(y,x), theta: sp.atan2(sp.sqrt(x*x+y*y),z)}) - if m!=0: - expr = cancel(expr).simplify() - expr = expr.subs({x*x+y*y: 1-z*z, - 3*x*x+3*y*y : 3-3*z*z }) - return expr + """Rewrite a real spherical harmonic as a polynomial of x, y, z. + + Args: + l (int): Orbital angular momentum quantum number. + m (int): Magnetic quantum number. + + Returns: + sympy.Expr: Polynomial expression in Cartesian coordinates. + """ + theta = Symbol("theta", real=True) + phi = Symbol("phi", real=True) + r = Symbol('r', nonnegative=True) + expr = real_Y_correct_phase(l,m, theta, phi) * r**l + expr = expand(expr, func=True) + expr = expr.rewrite(sp.cos) # .simplify().trigsimp() + expr = expand_trig(expr) + expr = cancel(expr) + expr = expr.subs({r: sp.sqrt(x*x+y*y+z*z), phi: sp.atan2(y,x), theta: sp.atan2(sp.sqrt(x*x+y*y),z)}) + if m!=0: + expr = cancel(expr).simplify() + expr = expr.subs({x*x+y*y: 1-z*z, + 3*x*x+3*y*y : 3-3*z*z }) + return expr + def xyzint_wrapper(knm, integrals_xyz_dict): - k,n,m = knm - if k%2 or n%2 or m%2: - return 0 - else: - knm = tuple(sorted([k//2, n//2, m//2], reverse=True)) - if knm not in integrals_xyz_dict: - integrals_xyz_dict[knm] = xyzint(*knm) - return integrals_xyz_dict[knm] + """Compute xyz integrals with caching. + + Computes the integral of x^k * y^n * z^m over the unit sphere. + Integral is zero if any power is odd. + + Args: + knm (tuple): Tuple of three integers (k, n, m) representing powers. + integrals_xyz_dict (dict): Cache dictionary for computed integrals. + + Returns: + float or sympy.Expr: Integral value, or 0 if any power is odd. + """ + k,n,m = knm + if k%2 or n%2 or m%2: + return 0 + else: + knm = tuple(sorted([k//2, n//2, m//2], reverse=True)) + if knm not in integrals_xyz_dict: + integrals_xyz_dict[knm] = xyzint(*knm) + return integrals_xyz_dict[knm] + def product_Y(Y1,Y2): - # computes the product of two spherical harmonics - # and returns coefficients and a list of powers - prod = Y1 * Y2 - prod = prod.expand().cancel() - prod = poly(prod, gens=[x,y,z]) - return Matrix(prod.coeffs()), prod.monoms() + """Compute the product of two spherical harmonics. + Args: + Y1 (sympy.Expr): First spherical harmonic polynomial. + Y2 (sympy.Expr): Second spherical harmonic polynomial. -def print_wigner(D): - for l,d in enumerate(D): - for m1 in range(-l,l+1): - for m2 in range(-l,l+1): - print(f'D[{l}][{m1: d},{m2: d}] = {d[m1,m2]}') - print() + Returns: + tuple: A tuple containing: + - coefficients (sympy.Matrix): Coefficients of the product. + - monomials (list): List of monomial powers. + """ + prod = Y1 * Y2 + prod = prod.expand().cancel() + prod = poly(prod, gens=[x,y,z]) + return Matrix(prod.coeffs()), prod.monoms() -def compute_wigner(lmax): - Y = [ [0]*(2*l+1) for l in range(lmax+1)] - Y_rot = [ [0]*(2*l+1) for l in range(lmax+1)] - for l in range(lmax+1): - for m in range(-l,l+1): - # spherical harmonic - Y[l][m] = get_polynom_Y(l, m) - # rotated spherical harmonic - Y_rot[l][m] = Y[l][m].subs({x: x1, y:y1, z:z1}).subs({x1:xx*x+xy*y+xz*z, y1:yx*x+yy*y+yz*z, z1:zx*x+zy*y+zz*z}) +def print_wigner(D): + """Print Wigner D matrices in formatted output. + + Args: + D (list): List of Wigner D matrices for each l value. + """ + for l,d in enumerate(D): + for m1 in range(-l,l+1): + for m2 in range(-l,l+1): + print(f'D[{l}][{m1: d},{m2: d}] = {d[m1,m2]}') + print() - D = [zeros(2*l+1,2*l+1) for l in range(lmax+1)] - integrals_xyz_dict = {} - for l in range(lmax+1): - for m1 in range(-l,l+1): - for m2 in range(-l,l+1): - coefs, pows = product_Y(Y[l][m2], Y_rot[l][m1]) - mono_integrals = [xyzint_wrapper(p,integrals_xyz_dict) for p in pows] - D[l][m1,m2] = coefs.dot(mono_integrals).factor() .subs({zx**2+zy**2: 1-zz**2, xx**2+xy**2:1-xz**2, yx**2+yy**2:1-yz**2}).simplify() - return D +def compute_wigner(lmax): + """Compute Wigner D matrices for real spherical harmonics up to a maximum angular momentum. + + Args: + lmax (int): Maximum angular momentum quantum number. + + Returns: + list: List of Wigner D matrices (sympy.Matrix) for each l from 0 to lmax. + """ + Y = [ [0]*(2*l+1) for l in range(lmax+1)] + Y_rot = [ [0]*(2*l+1) for l in range(lmax+1)] + for l in range(lmax+1): + for m in range(-l,l+1): + # spherical harmonic + Y[l][m] = get_polynom_Y(l, m) + # rotated spherical harmonic + Y_rot[l][m] = Y[l][m].subs({x: x1, y:y1, z:z1}).subs({x1:xx*x+xy*y+xz*z, y1:yx*x+yy*y+yz*z, z1:zx*x+zy*y+zz*z}) + + D = [zeros(2*l+1,2*l+1) for l in range(lmax+1)] + integrals_xyz_dict = {} + for l in range(lmax+1): + for m1 in range(-l,l+1): + for m2 in range(-l,l+1): + coefs, pows = product_Y(Y[l][m2], Y_rot[l][m1]) + mono_integrals = [xyzint_wrapper(p,integrals_xyz_dict) for p in pows] + D[l][m1,m2] = coefs.dot(mono_integrals).factor() .subs({zx**2+zy**2: 1-zz**2, xx**2+xy**2:1-xz**2, yx**2+yy**2:1-yz**2}).simplify() + return D if __name__ == "__main__": - if len(sys.argv)<2: - lmax = 2 - else: - lmax = int(sys.argv[1]) - - D = compute_wigner(lmax) - print_wigner(D) + if len(sys.argv)<2: + lmax = 2 + else: + lmax = int(sys.argv[1]) + D = compute_wigner(lmax) + print_wigner(D) diff --git a/qstack/mathutils/xyz_integrals_float.py b/qstack/mathutils/xyz_integrals_float.py index 95fae4a2..f0b4b2f4 100755 --- a/qstack/mathutils/xyz_integrals_float.py +++ b/qstack/mathutils/xyz_integrals_float.py @@ -1,37 +1,72 @@ #!/usr/bin/env python3 +"""Cartesian integrals for spherical harmonics (float version).""" + import sys -def xyz(n,m,k): -# computes the integral of x^2k y^2n z^2m over a sphere - k,n,m = sorted([k,n,m], reverse=True) - # k>=n>=m - if n==0: - xyz = 2.0 * (1.0 - (2.0*k-1.0)/(2.0*k+1.0)) - else: - xyz = (2*k-1) * I23(n,m,k) - return xyz + +def xyz(n, m, k): + """Compute the integral of x^2k y^2n z^2m over a unit sphere. + + Args: + n (int): Half of power of y. + m (int): Half of power of z. + k (int): Half of power of x. + + Note: + The argument order does not matter. + + Returns: + float: The value of the integral. + """ + k,n,m = sorted([k,n,m], reverse=True) + if n==0: # both n and m are 0 + xyz = 2.0 * (1.0 - (2.0*k-1.0)/(2.0*k+1.0)) + else: + xyz = (2*k-1) * I23(n,m,k) + return xyz + def I23(n,m,k): - I23 = 0.0 - for l in range(n+m+2): - I23 = I23 + (-1)**l * trinomial( n+m+1, n+m+1-l, l) / (2.0*l+2.0*k-1.0) - I23 = I23 / ( (2*n+1) * 2**(2*n+2*m) ) - for l in range(1, n+2): - I23 = I23 * (2*n+3-2*l) / (2*m-1+2*l) - return I23 - -def trinomial(k1,k2,k3): -# (k1+k2+k3)! / (k1! * k2! * k3!) - k1,k2,k3 = sorted([k1,k2,k3], reverse=True) - trinom = 1.0 - for k in range(1,k2+1): - trinom = trinom * (k+k1) / k - for k in range(1,k3+1): - trinom = trinom * (k+k1+k2) / k - return trinom + """Compute an auxiliary integral needed for the integral over the unit sphere. -if __name__ == "__main__": - k,n,m = map(int, sys.argv[1:4]) - print(f"{xyz(k,n,m):.15f} π") + Args: + n (int) + m (int) + k (int) + Returns: + float: The value of the integral. + """ + I23 = 0.0 + for l in range(n+m+2): + I23 = I23 + (-1)**l * trinomial( n+m+1, n+m+1-l, l) / (2.0*l+2.0*k-1.0) + I23 = I23 / ( (2*n+1) * 2**(2*n+2*m) ) + for l in range(1, n+2): + I23 = I23 * (2*n+3-2*l) / (2*m-1+2*l) + return I23 + + +def trinomial(k1, k2, k3): + """Compute the trinomial coefficient (k1+k2+k3)! / (k1! * k2! * k3!). + + Args: + k1 (int) + k2 (int) + k3 (int) + + Returns: + float: The value of the trinomial coefficient. + """ + k1,k2,k3 = sorted([k1,k2,k3], reverse=True) + trinom = 1.0 + for k in range(1,k2+1): + trinom = trinom * (k+k1) / k + for k in range(1,k3+1): + trinom = trinom * (k+k1+k2) / k + return trinom + + +if __name__ == "__main__": + k,n,m = map(int, sys.argv[1:4]) + print(f"{xyz(k,n,m):.15f} π") diff --git a/qstack/mathutils/xyz_integrals_sym.py b/qstack/mathutils/xyz_integrals_sym.py index 0dd5381e..c1b07929 100755 --- a/qstack/mathutils/xyz_integrals_sym.py +++ b/qstack/mathutils/xyz_integrals_sym.py @@ -1,48 +1,81 @@ #!/usr/bin/env python3 -import sys +"""Cartesian integrals for spherical harmonics (symbolic version).""" +import sys try: import sympy except ImportError: print(""" - ERROR: cannot import sympy. Have you installed qstack with the \"wigner\" option?\n\n (for instance, with `pip install qstack[wigner]` or `pip install qstack[all]`) """) raise -def xyz(n,m,k): -# computes the integral of x^2k y^2n z^2m over a sphere - k,n,m = sorted([k,n,m], reverse=True) - # k>=n>=m - if n==0: - K = sympy.symbols('K') - xyz = (2 * (1 - (2*K-1)/(2*K+1))).subs(K,k) - else: - xyz = (2*k-1) * I23(n,m,k) - return xyz * sympy.pi + +def xyz(n, m, k): + """Compute the integral of x^2k y^2n z^2m over a unit sphere. + + Args: + n (int): Half of power of y. + m (int): Half of power of z. + k (int): Half of power of x. + + Note: + The argument order does not matter. + + Returns: + sympy.Expr: The value of the integral. + """ + k,n,m = sorted([k,n,m], reverse=True) + if n==0: # both n and m are 0 + K = sympy.symbols('K') + xyz = (2 * (1 - (2*K-1)/(2*K+1))).subs(K,k) + else: + xyz = (2*k-1) * I23(n,m,k) + return xyz * sympy.pi + def I23(n,m,k): - I23 = 0.0 - K = sympy.symbols('K') - for l in range(n+m+2): - I23 = I23 + (-1)**l * trinomial( n+m+1, n+m+1-l, l) / (2*l+2*K-1) - I23 = I23.subs(K,k) - I23 = I23 / ( (2*n+1) * 2**(2*n+2*m) ) - for l in range(1, n+2): - I23 = I23 * (2*n+3-2*l) / (2*m-1+2*l) - return I23 - -def trinomial(k1,k2,k3): -# (k1+k2+k3)! / (k1! * k2! * k3!) - k1,k2,k3 = sorted([k1,k2,k3]) - trinom = sympy.FallingFactorial(k1+k2+k3, k3) / (sympy.factorial(k1)*sympy.factorial(k2)) - return trinom + """Compute an auxiliary integral needed for the integral over the unit sphere. -if __name__ == "__main__": - k,n,m = map(int, sys.argv[1:4]) - x = xyz(k,n,m) - print(f"{x:.15f} = {x}") + Args: + n (int) + m (int) + k (int) + + Returns: + sympy.Expr: The value of the integral. + """ + I23 = 0.0 + K = sympy.symbols('K') + for l in range(n+m+2): + I23 = I23 + (-1)**l * trinomial( n+m+1, n+m+1-l, l) / (2*l+2*K-1) + I23 = I23.subs(K,k) + I23 = I23 / ( (2*n+1) * 2**(2*n+2*m) ) + for l in range(1, n+2): + I23 = I23 * (2*n+3-2*l) / (2*m-1+2*l) + return I23 + + +def trinomial(k1, k2, k3): + """Compute the trinomial coefficient (k1+k2+k3)! / (k1! * k2! * k3!). + Args: + k1 (int) + k2 (int) + k3 (int) + + Returns: + sympy.Expr: The value of the trinomial coefficient. + """ + k1,k2,k3 = sorted([k1,k2,k3]) + trinom = sympy.FallingFactorial(k1+k2+k3, k3) / (sympy.factorial(k1)*sympy.factorial(k2)) + return trinom + + +if __name__ == "__main__": + k,n,m = map(int, sys.argv[1:4]) + x = xyz(k,n,m) + print(f"{x:.15f} = {x}") diff --git a/qstack/orcaio.py b/qstack/orcaio.py index 27814ee5..a39ade1c 100644 --- a/qstack/orcaio.py +++ b/qstack/orcaio.py @@ -1,27 +1,31 @@ +"""ORCA quantum chemistry package I/O utilities. + +Read and parse ORCA output files, including orbitals and densities binary files. +""" + import warnings import struct import numpy as np import pyscf from qstack.mathutils.matrix import from_tril -from qstack.tools import reorder_ao +from qstack.reorder import reorder_ao +from qstack.tools import Cursor def read_input(fname, basis, ecp=None): - """Read the structure from an Orca input (XYZ coordinates in simple format only) + """Read the structure from an Orca input (XYZ coordinates in simple format only). - Note: we do not read basis set info from the file. - TODO: read also %coords block? + Note: We do not read basis set info from the file. + TODO: Read also %coords block? Args: - fname (str) : path to file - basis (str/dict) : basis name, path to file, or dict in the pyscf format - Kwargs: - ecp (str) : ECP to use + fname (str): Path to Orca input file. + basis (str or dict): Basis name, path to file, or dict in the pyscf format. + ecp (str): Effective core potential to use. Defaults to None. Returns: - pyscf Mole object. + pyscf.gto.Mole: pyscf Mole object. """ - with open(fname) as f: lines = [x.strip() for x in f] @@ -52,19 +56,20 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re Tested on Orca versions 4.0, 4.2, and 5.0. Args: - mol (pyscf Mole): pyscf Mole object. + mol (pyscf.gto.Mole): pyscf Mole object. basename (str): Job name (without extension). - Kwargs: - directory (str) : path to the directory with the density files. - version (int): ORCA version (400 for 4.0, 421 for 4.2, 500 for 5.0). - openshell (bool): If read spin density in addition to the electron density. - reorder_dest (str): Which AO ordering convention to use. + directory (str): Path to the directory with the density files. Defaults to './'. + version (int): ORCA version (400 for 4.0, 421 for 4.2, 500 for 5.0). Defaults to 500. + openshell (bool): Whether to read spin density in addition to electron density. Defaults to False. + reorder_dest (str): Which AO ordering convention to use. Defaults to 'pyscf'. Returns: - A numpy 2darray containing the density matrix (openshell=False) - or a numpy 3darray containing the density and spin density matrices (openshell=True). - """ + numpy.ndarray: 2D array containing density matrix (openshell=False) or + 3D array containing density and spin density matrices (openshell=True). + Raises: + RuntimeError: If density matrix reordering is compromised for def2 basis with 3d elements. + """ path = directory+'/'+basename if version < 500: if version==400: @@ -86,7 +91,6 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re else: dm = np.fromfile(path[0], offset=8, count=mol.nao*mol.nao*nspin).reshape((nspin,mol.nao,mol.nao)) - is_def2 = 'def2' in pyscf.gto.basis._format_basis_name(mol.basis) has_3d = np.any([21 <= pyscf.data.elements.charge(q) <= 30 for q in mol.elements]) if is_def2 and has_3d: @@ -104,22 +108,26 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re def _parse_gbw(fname): - """ Parse ORCA .gbw files. + """Parse ORCA .gbw files. Many thanks to https://pysisyphus.readthedocs.io/en/latest/_modules/pysisyphus/calculators/ORCA.html Args: - fname (str): path to the gbw file. + fname (str): Path to the gbw file. Returns: - numpy 3darray of (s,nao,nao) containing the density matrix - numpy 2darray of (s,nao) containing the MO energies - numpy 2darray of (s,nao) containing the MO occupation numbers - dict of {int : [int]} with a list of basis functions angular momenta - for each atom (not for element!) + tuple: A tuple containing: + - coefficients_ab (numpy.ndarray): 3D array of shape (s,nao,nao) with MO coefficients. + - energies_ab (numpy.ndarray): 2D array of shape (s,nao) with MO energies. + - occupations_ab (numpy.ndarray): 2D array of shape (s,nao) with MO occupation numbers. + - ls (dict): Dictionary mapping atom index to list of basis function angular momenta. + s=1 for closed-shell and 2 for open-shell computation, + nao is the number of atomic/molecular orbitals. + + Raises: + RuntimeError: If number of MO sets is not 1 or 2. """ - def read_array(f, n, dtype): return np.frombuffer(f.read(dtype().itemsize * n), dtype=dtype) @@ -177,18 +185,20 @@ def read_basis(MAX_PRIMITIVES=37): def _get_indices(mol, ls_from_orca): - """ Get coefficient needed to reorder the AO read from Orca. + """Get coefficients needed to reorder the AO read from Orca. Args: - mol (pyscf Mole): pyscf Mole object. - ls_from_orca : dict of {int : [int]} with a list of basis functions - angular momenta for those atoms (not elements!) - whose basis functions are *not* sorted wrt to angular momenta. - The lists represent the Orca order. + mol (pyscf.gto.Mole): pyscf Mole object. + ls_from_orca (dict): Dictionary mapping atom index to list of basis function angular momenta + for atoms whose basis functions are NOT sorted wrt angular momenta. + The lists represent the Orca order. Returns: - numpy int 1darray of (nao,) containing the indices to be used as - c_reordered = c_orca[indices] + numpy.ndarray: 1D integer array of shape (nao,) containing reordering indices. + Use as: c_reordered = c_orca[indices] + + Raises: + RuntimeError: If AO reordering fails. """ if ls_from_orca is None: return None @@ -196,10 +206,9 @@ def _get_indices(mol, ls_from_orca): indices_full = np.arange(mol.nao) for iat, ls in ls_from_orca.items(): indices = [] - i = 0 + i = Cursor(action='ranger') for il, l in enumerate(ls): - indices.append((l, il, i + np.arange(2*l+1))) - i += 2*l+1 + indices.append((l, il, i(2*l+1))) indices = sorted(indices, key=lambda x: (x[0], x[1])) indices = np.array([j for i in indices for j in i[2]]) atom_slice = np.s_[ao_limits[iat][0]:ao_limits[iat][1]] @@ -210,18 +219,14 @@ def _get_indices(mol, ls_from_orca): def reorder_coeff_inplace(c_full, mol, reorder_dest='pyscf', ls_from_orca=None): - """ Reorder coefficient read from ORCA .gbw + """Reorder coefficients read from ORCA .gbw in-place. Args: - c_full : numpy 3darray of (s,nao,nao) containing the MO coefficients - to reorder - mol (pyscf Mole): pyscf Mole object. - Kwargs: - reorder_dest (str): Which AO ordering convention to use. - ls_from_orca : dict of {int : [int]} with a list of basis functions - angular momenta for those atoms (not elements!) - whose basis functions are *not* sorted wrt to angular momenta. - The lists represent the Orca order. + c_full (numpy.ndarray): 3D array of shape (s,nao,nao) containing MO coefficients to reorder. + mol (pyscf.gto.Mole): pyscf Mole object. + reorder_dest (str): Which AO ordering convention to use. Defaults to 'pyscf'. + ls_from_orca (dict): Dictionary mapping atom index to list of basis function angular momenta + for atoms whose basis functions are NOT sorted wrt angular momenta. Defaults to None. """ def _reorder_coeff(c): # In ORCA, at least def2-SVP and def2-TZVP for 3d metals @@ -242,19 +247,22 @@ def read_gbw(mol, fname, reorder_dest='pyscf', sort_l=True): Limited for Orca version 4.0 (cannot read the basis set). Args: - mol (pyscf Mole): pyscf Mole object. - fname (str): path to the gbw file. - Kwargs: - reorder_dest (str): Which AO ordering convention to use. - sort_l (bool): if sort the basis functions wrt angular momenta. - e.g. PySCF requires them sorted. + mol (pyscf.gto.Mole): pyscf Mole object. + fname (str): Path to the gbw file. + reorder_dest (str): Which AO ordering convention to use. Defaults to 'pyscf'. + sort_l (bool): Whether to sort basis functions wrt angular momenta. + PySCF requires them sorted. Defaults to True. Returns: - numpy 3darray of (s,nao,nao) containing the MO coefficients - numpy 2darray of (s,nao) containing the MO energies - numpy 2darray of (s,nao) containing the MO occupation numbers - s is 1 for closed-shell and 2 for open-shell computation. - nao is number of atomic/molecular orbitals. + tuple: A tuple containing: + - c (numpy.ndarray): 3D array of shape (s,nao,nao) with MO coefficients. + - e (numpy.ndarray): 2D array of shape (s,nao) with MO energies. + - occ (numpy.ndarray): 2D array of shape (s,nao) with MO occupation numbers. + Where s is 1 for closed-shell and 2 for open-shell computation, + and nao is the number of atomic/molecular orbitals. + + Raises: + RuntimeError: If basis set information not found and sort_l=True. """ c, e, occ, ls = _parse_gbw(fname) if not ls and sort_l: @@ -268,4 +276,3 @@ def read_gbw(mol, fname, reorder_dest='pyscf', sort_l=True): if reorder_dest is not None: reorder_coeff_inplace(c, mol, reorder_dest, ls if (ls and sort_l) else None) return c, e, occ - diff --git a/qstack/qml/__init__.py b/qstack/qml/__init__.py index 6242d154..27ad73c6 100644 --- a/qstack/qml/__init__.py +++ b/qstack/qml/__init__.py @@ -1,3 +1,5 @@ +"""Quantum Machine Learning representations module.""" + from . import b2r2 from . import slatm diff --git a/qstack/qml/b2r2.py b/qstack/qml/b2r2.py index 0d5e1763..6a27b14e 100644 --- a/qstack/qml/b2r2.py +++ b/qstack/qml/b2r2.py @@ -1,28 +1,61 @@ +"""Bond-based reaction representation (B2R2) for chemical reactions. + +Provides: + defaults: default parameters for B2R2 computation. +""" + import itertools from types import SimpleNamespace import numpy as np from scipy.special import erf +from tqdm import tqdm defaults = SimpleNamespace(rcut=3.5, gridspace=0.03) def get_bags(unique_ncharges): + """Generate all unique element pair combinations including self-interactions. + + Args: + unique_ncharges (array-like): Array of unique atomic charges/numbers. + + Returns: + list: List of all unique element pairs [Z_i, Z_j] including self-interactions. + """ combs = list(itertools.combinations(unique_ncharges, r=2)) combs = [list(x) for x in combs] - # add self interaction self_combs = [[x, x] for x in unique_ncharges] combs += self_combs return combs def get_mu_sigma(R): + """Get Gaussian distribution parameters from interatomic distance. + + The constants used here are taken from the original B2R2 implementation. + + Args: + R (float): Interatomic distance. + + Returns: + tuple: Mean (mu) and standard deviation (sigma) for the Gaussian distribution. + """ mu = R * 0.5 sigma = R * 0.125 return mu, sigma def get_gaussian(x, R): + """Compute Gaussian function values for a given interatomic distance. + + Args: + x (numpy ndarray): Grid points to evaluate the Gaussian. + R (float): Interatomic distance determining the Gaussian parameters. + + Returns: + numpy ndarray: Gaussian function values at the grid points. + """ mu, sigma = get_mu_sigma(R) X = (x-mu) / (sigma*np.sqrt(2)) g = np.exp(-X**2) / (np.sqrt(2*np.pi) * sigma) @@ -30,6 +63,17 @@ def get_gaussian(x, R): def get_skew_gaussian_l_both(x, R, Z_I, Z_J): + """Compute skewed Gaussian distributions for B2R2_l representation. + + Args: + x (numpy ndarray): Grid points to evaluate the functions. + R (float): Interatomic distance. + Z_I (int): Atomic number of atom I. + Z_J (int): Atomic number of atom J. + + Returns: + tuple: Two skewed Gaussian distributions (a, b) for the atom pair. + """ mu, sigma = get_mu_sigma(R) # a = Z_J * scipy.stats.skewnorm.pdf(x, Z_J, mu, sigma) # b = Z_I * scipy.stats.skewnorm.pdf(x, Z_I, mu, sigma) @@ -45,6 +89,17 @@ def get_skew_gaussian_l_both(x, R, Z_I, Z_J): def get_skew_gaussian_n_both(x, R, Z_I, Z_J): + """Compute combined skewed Gaussian distribution for B2R2_n representation. + + Args: + x (numpy ndarray): Grid points to evaluate the function. + R (float): Interatomic distance. + Z_I (int): Atomic number of atom I. + Z_J (int): Atomic number of atom J. + + Returns: + numpy ndarray: Combined skewed Gaussian distribution for the atom pair. + """ mu, sigma = get_mu_sigma(R) # a = Z_I * scipy.stats.skewnorm.pdf(x, Z_J, mu, sigma) # b = Z_J * scipy.stats.skewnorm.pdf(x, Z_I, mu, sigma) @@ -61,7 +116,18 @@ def get_skew_gaussian_n_both(x, R, Z_I, Z_J): def get_b2r2_n_molecular(ncharges, coords, elements, rcut=defaults.rcut, gridspace=defaults.gridspace): + """Compute B2R2_n representation for a single molecule. + + Args: + ncharges (array-like): Atomic numbers for all atoms in the molecule. + coords (array-like): Atomic coordinates in Å, shape (natom, 3). + elements (array-like): Unique atomic numbers present in the dataset. + rcut (float): Cutoff radius for bond detection in Å. Defaults to 3.5. + gridspace (float): Grid spacing for discretization in Å. Defaults to 0.03. + Returns: + numpy.ndarray: B2R2_n representation (ngrid,). + """ idx_relevant_atoms = np.where(np.sum(np.array(ncharges)==np.array(elements)[:,None], axis=0)) ncharges = np.array(ncharges)[idx_relevant_atoms] coords = np.array(coords)[idx_relevant_atoms] @@ -82,7 +148,18 @@ def get_b2r2_n_molecular(ncharges, coords, elements, def get_b2r2_a_molecular(ncharges, coords, elements, rcut=defaults.rcut, gridspace=defaults.gridspace): + """Compute B2R2_a representation for a single molecule. + + Args: + ncharges (array-like): Atomic numbers for all atoms in the molecule. + coords (array-like): Atomic coordinates in Å, shape (natom, 3). + elements (array-like): Unique atomic numbers present in the dataset. + rcut (float): Cutoff radius for bond detection in Å. Defaults to 3.5. + gridspace (float): Grid spacing for discretization in Å. Defaults to 0.03. + Returns: + numpy.ndarray: B2R2_a representation (n_pairs*ngrid,). + """ idx_relevant_atoms = np.where(np.sum(np.array(ncharges)==np.array(elements)[:,None], axis=0)) ncharges = np.array(ncharges)[idx_relevant_atoms] coords = np.array(coords)[idx_relevant_atoms] @@ -100,7 +177,7 @@ def get_b2r2_a_molecular(ncharges, coords, elements, coords_b = coords[j] R = np.linalg.norm(coords_b - coords_a) if R < rcut: - twobodyrep[bag_idx[(ncharge_a, ncharge_b)]] += get_gaussian(grid, R) + twobodyrep[bag_idx[ncharge_a, ncharge_b]] += get_gaussian(grid, R) twobodyrep = 2.0*np.concatenate(twobodyrep) return twobodyrep @@ -108,7 +185,18 @@ def get_b2r2_a_molecular(ncharges, coords, elements, def get_b2r2_l_molecular(ncharges, coords, elements, rcut=defaults.rcut, gridspace=defaults.gridspace): + """Compute B2R2_l representation for a single molecule. + + Args: + ncharges (array-like): Atomic numbers for all atoms in the molecule. + coords (array-like): Atomic coordinates in Å, shape (natom, 3). + elements (array-like): Unique atomic numbers present in the dataset. + rcut (float): Cutoff radius for bond detection in Å. Defaults to 3.5. + gridspace (float): Grid spacing for discretization in Å. Defaults to 0.03. + Returns: + numpy.ndarray: B2R2_l representation (n_elements*ngrid,). + """ idx_relevant_atoms = np.where(np.sum(np.array(ncharges)==np.array(elements)[:,None], axis=0)) ncharges = np.array(ncharges)[idx_relevant_atoms] coords = np.array(coords)[idx_relevant_atoms] @@ -134,6 +222,35 @@ def get_b2r2_l_molecular(ncharges, coords, elements, def get_b2r2(reactions, variant='l', progress=False, rcut=defaults.rcut, gridspace=defaults.gridspace): + """High-level interface for computing bond-based reaction representations (B2R2). + + Reference: + P. van Gerwen, A. Fabrizio, M. D. Wodrich, C. Corminboeuf, + "Physics-based representations for machine learning properties of chemical reactions", + Mach. Learn.: Sci. Technol. 3, 045005 (2022), doi:10.1088/2632-2153/ac8f1a. + + Args: + reactions (List[rxn]): List of reaction objects with attributes: + - rxn.reactants (List[Mol]): List of reactant molecules. + - rxn.products (List[Mol]): List of product molecules. + Mol can be any type with .numbers and .positions (Å) attributes, + for example ASE Atoms objects. + variant (str): B2R2 variant to compute. Options: + - 'l': Local variant with element-resolved skewed Gaussians (default). + - 'a': Agnostic variant with element-pair Gaussians. + - 'n': Nuclear variant with combined skewed Gaussians. + progress (bool): If True, displays progress bar. Defaults to False. + rcut (float): Cutoff radius for bond detection in Å. Defaults to 3.5. + gridspace (float): Grid spacing for discretization in Å. Defaults to 0.03. + + Returns: + numpy.ndarray: B2R2 representations of shape (n_reactions, n_features). + For variants 'l' and 'a', returns difference (products - reactants). + For variant 'n', returns concatenation [reactants, products]. + + Raises: + RuntimeError: If an unknown variant is specified. + """ if variant=='l': get_b2r2_molecular=get_b2r2_l_molecular combine = lambda r,p: p-r @@ -153,37 +270,35 @@ def get_b2r2_inner(reactions, progress=False, rcut=defaults.rcut, gridspace=defaults.gridspace, get_b2r2_molecular=None, combine=None): + """Compute the B2R2 representations for a list of reactions. - """ Computes the B2R2 representations for a list of reactions. - - Reference: - P. van Gerwen, A. Fabrizio, M. D. Wodrich, C. Corminboeuf, - "Physics-based representations for machine learning properties of chemical reactions", - Mach. Learn.: Sci. Technol. 3, 045005 (2022), doi:10.1088/2632-2153/ac8f1a. + Internal implementation function that computes B2R2 representations using + provided molecular representation function and combination strategy. + Automatically determines element set from all reactant molecules. Args: - reactions (List[rxn]): a list of rxn objects containing reaction information. - rxn.reactants (List[ase.Atoms]) is a list of reactants (ASE molecules), - rxn.products (List[ase.Atoms]) is a list of products. - rcut (float): cutoff radius (Å) - gridspace (float): grid spacing (Å) - get_b2r2_molecular (func): function to compute the molecular representations, - i.e. one of `get_b2r2_{l,n,a}_molecular` - combine (func(r: ndarray, p: ndarray)): function to combine the reactants and products representations, - e.g. difference or concatenation + reactions (List[rxn]): List of reaction objects with attributes: + - rxn.reactants (List[Mol]): List of reactant molecules. + - rxn.products (List[Mol]): List of product molecules. + Mol can be any type with .numbers and .positions (Å) attributes, + for example ASE Atoms objects. + progress (bool): If True, displays progress bar. Defaults to False. + rcut (float): Cutoff radius for bond detection in Å. Defaults to 3.5. + gridspace (float): Grid spacing for discretization in Å. Defaults to 0.03. + get_b2r2_molecular (callable): Function to compute molecular representations. + Should be one of get_b2r2_{l,n,a}_molecular. + combine (callable): Function(r: ndarray, p: ndarray) -> ndarray to combine + reactant and product representations (e.g., difference or concatenation). + Returns: - ndrarray containing the B2R2 representation for each reaction + numpy.ndarray: B2R2 representations of shape (n_reactions, n_features), + where each row represents a reaction according to the combine function. """ - qs = [mol.numbers for rxn in reactions for mol in rxn.reactants] elements = np.unique(np.concatenate(qs)) - if progress: - import tqdm - reactions = tqdm.tqdm(reactions) - b2r2_diff = [] - for reaction in reactions: + for reaction in tqdm(reactions, disable=not progress): b2r2_reactants, b2r2_products = [ sum(get_b2r2_molecular(mol.numbers, mol.positions, rcut=rcut, diff --git a/qstack/qml/slatm.py b/qstack/qml/slatm.py index 11997fd4..d1386753 100644 --- a/qstack/qml/slatm.py +++ b/qstack/qml/slatm.py @@ -1,13 +1,32 @@ +"""Spectrum of London and Axilrod-Teller-Muto potential (SLATM) representation. + +Provides: + defaults: Default parameters for SLATM representation. +""" + import itertools from types import SimpleNamespace import numpy as np +from tqdm import tqdm +from qstack.mathutils.array import stack_padding, vstack_padding defaults = SimpleNamespace(sigma2=0.05, r0=0.1, rcut=4.8, dgrid2=0.03, theta0=20.0*np.pi/180.0, sigma3=0.05, dgrid3=0.03) def get_mbtypes(qs, qml=False): + """Generate many-body types (elements, pairs, triples) for SLATM representation. + + Args: + qs (list): List of atomic number arrays for all molecules. + qml (bool): If True, uses set ordering (QML-compatible). If False, uses sorted ordering. Defaults to False. + Returns: + dict: Dictionary with keys 1, 2, 3 containing: + - 1: Array of unique elements + - 2: List of element pairs (including self-pairs) + - 3: List of valid element triples + """ # all the elements elements = itertools.chain.from_iterable(list(i) for i in qs) if qml: @@ -32,19 +51,27 @@ def get_mbtypes(qs, qml=False): return {1: elements, 2: pairs, 3: triples} -def pad_zeros(slatm): - n_features = np.array([x.shape[-1] for x in slatm]) - pad_sizes = max(n_features)-n_features - for i in range(len(slatm)): - if pad_sizes[i]: - slatm[i] = np.pad(slatm[i], (0, pad_sizes[i])) - return slatm - - def get_two_body(i, mbtype, q, dist, r0=defaults.r0, rcut=defaults.rcut, sigma=defaults.sigma2, dgrid=defaults.dgrid2): + """Compute two-body London dispersion contribution for atom i. + + Evaluates the two-body term from pairwise 1/r^6 London dispersion interactions, + projected onto a radial grid with Gaussian broadening of interatomic distances. + + Args: + i (int): Index of the central atom. + mbtype (tuple): Element pair (q1, q2) defining the two-body interaction type. + q (numpy.ndarray): Array of atomic numbers for all atoms in molecule. + dist (numpy.ndarray): Pairwise distance matrix (natom,natom) in Å. + r0 (float): Minimum radial distance for grid. Defaults to 0.1 Å. + rcut (float): Radial cutoff distance. Defaults to 4.8 Å. + sigma (float): Gaussian width for distance broadening. Defaults to 0.05 Å. + dgrid (float): Grid spacing for radial discretization. Defaults to 0.03 Å. + Returns: + numpy.ndarray: Two-body contribution on radial grid (ngrid,). + """ ngrid = int((rcut - r0)/dgrid) + 1 rgrid = np.linspace(r0, rcut, ngrid) @@ -73,12 +100,40 @@ def get_two_body(i, mbtype, q, dist, return 0.5 * dgrid * london * deltas - def get_three_body(j, mbtype, q, r, dist, rcut=defaults.rcut, theta0=defaults.theta0, sigma=defaults.sigma3, dgrid=defaults.dgrid3): + """Compute three-body Axilrod-Teller-Muto contribution for atom j. + + Evaluates the three-body ATM term from triple-dipole interactions, + projected onto an angular grid with Gaussian broadening of bond angles. + + Args: + j (int): Index of the central atom in the triplet. + mbtype (tuple): Element triple (q1, q2, q3) defining the three-body interaction type. + q (numpy.ndarray): Array of atomic numbers for all atoms. + r (numpy.ndarray): Atomic position array (natom,3) in Å. + dist (numpy.ndarray): Pairwise distance matrix (natom,natom) in Å. + rcut (float): Distance cutoff for triplet formation. Defaults to 4.8 Å. + theta0 (float): Margin for angular grid in radians. Defaults to 20°. + sigma (float): Gaussian width for angle broadening in radians. Defaults to 0.05. + dgrid (float): Grid spacing for angular discretization in radians. Defaults to 0.03. + + Returns: + numpy.ndarray: Three-body contribution on angular grid (ngrid,). + """ def get_cos(a, b, c): + """Compute cosine of angle abc from atomic positions. + + Args: + a (int): Index of first atom. + b (int): Index of vertex atom. + c (int): Index of third atom. + + Returns: + float: Cosine of angle abc. + """ v1 = r[a] - r[b] v2 = r[c] - r[b] return v1 @ v2 / (dist[a,b] * dist[b,c]) @@ -117,19 +172,53 @@ def get_cos(a, b, c): return spectrum * dgrid * q1 * q2 * q3 / 3.0 - def get_slatm(q, r, mbtypes, qml_compatible=True, stack_all=True, global_repr=False, r0=defaults.r0, rcut=defaults.rcut, sigma2=defaults.sigma2, dgrid2=defaults.dgrid2, theta0=defaults.theta0, sigma3=defaults.sigma3, dgrid3=defaults.dgrid3): + """Compute SLATM representation for a single molecule. + + Constructs the SLATM (Spectrum of London and Axilrod-Teller-Muto potential) + representation by combining one-body (nuclear charges), two-body (London dispersion), + and three-body (Axilrod-Teller-Muto) contributions. + Reference: + B. Huang, O. A. von Lilienfeld, + "Quantum machine learning using atom-in-molecule-based fragments selected on the fly", + Nat. Chem. 12, 945–951 (2020), doi:10.1038/s41557-020-0527-z. + + Args: + q (numpy.ndarray): Array of atomic numbers (natom,). + r (numpy.ndarray): Atomic position array (natom,3) in Å. + mbtypes (dict): Many-body types from get_mbtypes with keys 1, 2, 3. + qml_compatible (bool): If True, maintains QML package compatibility. + If False, uses condensed representation (less 0s). Defaults to True. + Is set to True if global_repr is True. + stack_all (bool): If True, stacks all representations into one array. + Defaults to True. + global_repr (bool): If True, returns molecular SLATM (sum over atoms). + If False, returns atomic SLATM. Defaults to False. + r0 (float): Minimum radial distance for 2-body grid. Defaults to 0.1 Å. + rcut (float): Radial cutoff for 2-body and 3-body terms. Defaults to 4.8 Å. + sigma2 (float): Gaussian width for 2-body term. Defaults to 0.05 Å. + dgrid2 (float): Grid spacing for 2-body term. Defaults to 0.03 Å. + theta0 (float): Minimum angle for 3-body grid in radians. Defaults to 20°. + sigma3 (float): Gaussian width for 3-body term in radians. Defaults to 0.05. + dgrid3 (float): Grid spacing for 3-body term in radians. Defaults to 0.03. + + Returns: + numpy.ndarray or dict: SLATM representation. + - If stack_all=True and global_repr=False, numpy ndarray of shape (natom,n_features). + - If global_repr=True, numpy ndarray of shape (n_features,). + - If stack_all=False, returns dict with keys 1, 2, 3 containing lists of numpy ndarrays. + """ # for global representation, qml_compatible should be True qml_compatible = qml_compatible or global_repr natoms = len(q) dist = np.zeros((natoms, natoms)) - for (i,j) in itertools.product(range(natoms), range(natoms)): - dist[i,j] = np.linalg.norm(r[i]-r[j]) + for (i, j) in itertools.combinations_with_replacement(range(natoms), 2): + dist[i,j] = dist[j,i] = np.linalg.norm(r[i]-r[j]) slatm = [] for i, qi in enumerate(q): @@ -167,9 +256,7 @@ def get_slatm(q, r, mbtypes, qml_compatible=True, stack_all=True, slatm.append({1: slatm1b, 2: slatm2b, 3: slatm3b}) if stack_all or global_repr: - if not qml_compatible: - slatm = pad_zeros(slatm) - slatm = np.vstack(slatm) + slatm = stack_padding(slatm) if global_repr: slatm = np.sum(slatm, axis=0) @@ -177,43 +264,44 @@ def get_slatm(q, r, mbtypes, qml_compatible=True, stack_all=True, return slatm - def get_slatm_for_dataset(molecules, progress=False, global_repr=False, qml_mbtypes=True, qml_compatible=True, stack_all=True, r0=defaults.r0, rcut=defaults.rcut, sigma2=defaults.sigma2, dgrid2=defaults.dgrid2, theta0=defaults.theta0, sigma3=defaults.sigma3, dgrid3=defaults.dgrid3): - """ Computes the (a)SLATM representation for a set of molecules. + """Compute the (a)SLATM representation for a set of molecules. - Reference: - B. Huang, O. A. von Lilienfeld, - "Quantum machine learning using atom-in-molecule-based fragments selected on the fly", - Nat. Chem. 12, 945–951 (2020), doi:10.1038/s41557-020-0527-z. + Generates SLATM descriptors for molecular datasets, automatically determining + many-body types from all molecules. Args: - molecules (Union(List[ase.Atoms], List[str]): pre-loaded ASE molecules or paths to the xyz files. - Alternatively, a list of any objects providing fields .numbers and .positions (Å) - global_repr (bool): return molecular SLATM if True, return atomic SLATM (aSLATM) if False - qml_mbtypes (bool): if True, mbtypes order should be identical as from QML (https://www.qmlcode.org/). - if False, the elements are sorted thus mbtype order can differ from QML in some cases - qml_compatible (bool): if False, the local representation (global_repr=False) is condensed - stack_all (bool): if stack the representations into one big ndarray - - rcut (float): radial cutoff (Å) for the 2- and 3-body terms - r0 (float): grid range parameter (Å) [r0, rcut] for the 2-body term - sigma2 (float): gaussian width for the 2-body term (Å) - dgrid2 (float): grid spacing for the 2-body term (Å) - theta0 (float): grid range parameter (°) [theta0, 180-theta0] for the 3-body term - sigma3 (float): gaussian width for the 3-body term (°) - dgrid3 (float): grid spacing for the 3-body term (°) - - progress (bool): if print progress bar + molecules (Union[List[Mol], List[str]]): Pre-loaded molecules or paths + to XYZ files. Mol can be any type with .numbers and .positions (Å) attributes, + for example ASE Atoms objects. + progress (bool): If True, displays progress bar. Defaults to False. + global_repr (bool): If True, returns molecular SLATM (sum over atoms). + If False, returns atomic SLATM (aSLATM). Defaults to False. + qml_mbtypes (bool): If True, uses element ordering compatible with QML package + (https://www.qmlcode.org/). If False, uses sorted ordering. Defaults to True. + qml_compatible (bool): If False, uses condensed representation for local + (global_repr=False) mode. Defaults to True. + stack_all (bool): If True, stacks representations into one array. Defaults to True. + r0 (float): Minimum radial distance for 2-body grid in Å. Defaults to 0.1. + rcut (float): Radial cutoff for 2-body and 3-body terms in Å. Defaults to 4.8. + sigma2 (float): Gaussian width for 2-body term in Å. Defaults to 0.05. + dgrid2 (float): Grid spacing for 2-body term in Å. Defaults to 0.03. + theta0 (float): Minimum angle for 3-body grid in radians. Defaults to 20° (π/9). + sigma3 (float): Gaussian width for 3-body term in radians. Defaults to 0.05. + dgrid3 (float): Grid spacing for 3-body term in radians. Defaults to 0.03. Returns: - ndrarray or List[List[ndarray]] containing the SLATM representation for each molecule. + numpy.ndarray or List[List[numpy.ndarray]]: SLATM representations for all molecules. + - If stack_all=True and global_repr=True, np.ndarray of shape (n_molecules, n_features), + - If stack_all=True and global_repr=False, np.ndarray of shape (n_atoms_total, n_features), + - If stack_all=False and global_repr=True, list of np.ndarrays of shape (n_features,) per molecule, + - If stack_all=False and global_repr=False, list of lists of dicts per molecule with keys (1,2,3). """ - if isinstance(molecules[0], str): import ase.io molecules = [ase.io.read(xyz) for xyz in molecules] @@ -221,30 +309,27 @@ def get_slatm_for_dataset(molecules, qs = [mol.numbers for mol in molecules] mbtypes = get_mbtypes(qs, qml=qml_mbtypes) - if progress: - import tqdm - molecules = tqdm.tqdm(molecules) - slatm = [get_slatm(mol.numbers, mol.positions, mbtypes, global_repr=global_repr, qml_compatible=qml_compatible, stack_all=stack_all, r0=r0, rcut=rcut, sigma2=sigma2, dgrid2=dgrid2, theta0=theta0, sigma3=sigma3, dgrid3=dgrid3) - for mol in molecules] + for mol in tqdm(molecules, disable=not progress)] if stack_all: - if not qml_compatible: - slatm = pad_zeros(slatm) - slatm = np.vstack(slatm) + slatm = vstack_padding(slatm) return slatm - def get_slatm_rxn(reactions, progress=False, qml_mbtypes=True, r0=defaults.r0, rcut=defaults.rcut, sigma2=defaults.sigma2, dgrid2=defaults.dgrid2, theta0=defaults.theta0, sigma3=defaults.sigma3, dgrid3=defaults.dgrid3): - """ Computes the SLATM_d representation for a list of reactions. + """Compute the SLATM_d representation for chemical reactions. + + Calculates reaction representations as the difference between product and reactant + SLATM descriptors (ΔR = R_products - R_reactants), suitable for predicting + reaction properties like barriers and energies. Reference: P. van Gerwen, A. Fabrizio, M. D. Wodrich, C. Corminboeuf, @@ -252,35 +337,31 @@ def get_slatm_rxn(reactions, progress=False, qml_mbtypes=True, Mach. Learn.: Sci. Technol. 3, 045005 (2022), doi:10.1088/2632-2153/ac8f1a. Args: - reactions (List[rxn]): a list of rxn objects containing reaction information. - rxn.reactants (List[ase.Atoms]) is a list of reactants (ASE molecules), - rxn.products (List[ase.Atoms]) is a list of products. - qml_mbtypes (bool): if True, mbtypes order should be identical as from QML (https://www.qmlcode.org/). - if False, the elements are sorted thus mbtype order can differ from QML in some cases - - rcut (float): radial cutoff (Å) for the 2- and 3-body terms - r0 (float): grid range parameter (Å) [r0, rcut] for the 2-body term - sigma2 (float): gaussian width for the 2-body term (Å) - dgrid2 (float): grid spacing for the 2-body term (Å) - theta0 (float): grid range parameter (°) [theta0, 180-theta0] for the 3-body term - sigma3 (float): gaussian width for the 3-body term (°) - dgrid3 (float): grid spacing for the 3-body term (°) - - progress (bool): if print progress bar + reactions (List[rxn]): List of reaction objects with attributes: + - rxn.reactants (List[Mol]): List of reactant molecules. + - rxn.products (List[Mol]): List of product molecules. + Mol can be any type with .numbers and .positions (Å) attributes, + for example ASE Atoms objects. + progress (bool): If True, displays progress bar. Defaults to False. + qml_mbtypes (bool): If True, uses element ordering compatible with QML package + (https://www.qmlcode.org/). If False, uses sorted ordering. Defaults to True. + r0 (float): Minimum radial distance for 2-body grid in Å. Defaults to 0.1. + rcut (float): Radial cutoff for 2-body and 3-body terms in Å. Defaults to 4.8. + sigma2 (float): Gaussian width for 2-body term in Å. Defaults to 0.05. + dgrid2 (float): Grid spacing for 2-body term in Å. Defaults to 0.03. + theta0 (float): Minimum angle for 3-body grid in radians. Defaults to 20° (π/9). + sigma3 (float): Gaussian width for 3-body term in radians. Defaults to 0.05. + dgrid3 (float): Grid spacing for 3-body term in radians. Defaults to 0.03. Returns: - ndrarray containing the SLATM_d representation for each reaction + numpy.ndarray: SLATM_d difference representations of shape (n_reactions, n_features), + where each row is the difference between product and reactant SLATM vectors. """ - qs = [mol.numbers for rxn in reactions for mol in rxn.reactants] mbtypes = get_mbtypes(qs, qml=qml_mbtypes) - if progress: - import tqdm - reactions = tqdm.tqdm(reactions) - slatm_diff = [] - for reaction in reactions: + for reaction in tqdm(reactions, disable=not progress): slatm_reactants, slatm_products = [ sum(get_slatm(mol.numbers, mol.positions, mbtypes, global_repr=True, stack_all=True, diff --git a/qstack/regression/__init__.py b/qstack/regression/__init__.py index 5efafc29..e717b304 100644 --- a/qstack/regression/__init__.py +++ b/qstack/regression/__init__.py @@ -1,13 +1,13 @@ +"""Kernel Ridge Regression (KRR) module.""" + try: import sklearn del sklearn except ImportError: print(""" - ERROR: cannot import scikit-learn. Have you installed qstack with the \"regression\" option?\n\n (for instance, with `pip install qstack[regression] or `pip install qstack[all]``) """) raise - from . import kernel_utils diff --git a/qstack/regression/condition.py b/qstack/regression/condition.py index 71d80240..e029d659 100644 --- a/qstack/regression/condition.py +++ b/qstack/regression/condition.py @@ -1,3 +1,5 @@ +"""Kernel matrix condition number.""" + import numpy as np from qstack.mathutils.fps import do_fps from qstack.tools import correct_num_threads @@ -9,26 +11,28 @@ def condition(X, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, test_size=defaults.test_size, idx_test=None, idx_train=None, sparse=None, random_state=defaults.random_state): - """ Compute kernel matrix condition number + """Compute kernel matrix condition number. Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples - read_kernel (bool): if 'X' is a kernel and not an array of representations - sigma (float): width of the kernel - eta (float): regularization strength for matrix inversion - akernel (str): local kernel (Laplacian, Gaussian, linear) - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels - test_size (float or int): test set fraction (or number of samples) - random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (list): list of indices for the test set (based on the sequence in X) - idx_train (list): list of indices for the training set (based on the sequence in X) - sparse (int): the number of reference environnments to consider for sparse regression + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + read_kernel (bool): If 'X' is a kernel and not an array of representations. + sigma (float): Width of the kernel. + eta (float): Regularization strength for matrix inversion. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + random_state (int): The seed used for random number generator (controls train/test splitting). + idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). + sparse (int): The number of reference environnments to consider for sparse regression. Returns: - float : condition number - """ + float: Condition number. + Raises: + RuntimeError: If 'X' is a kernel and sparse regression is chosen. + """ idx_train, _, _, _ = train_test_split_idx(y=np.arange(len(X)), idx_test=idx_test, idx_train=idx_train, test_size=test_size, random_state=random_state) if read_kernel is False: @@ -52,12 +56,13 @@ def condition(X, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, def main(): + """Command-line entry point for computing kernel matrix condition numbers.""" parser = RegressionParser(description='This program computes the condition number for the kernel matrix.', hyperparameters_set='single') parser.remove_argument('prop') parser.remove_argument('train_size') args = parser.parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) c = condition(X, read_kernel=args.readk, sigma=args.sigma, eta=args.eta, diff --git a/qstack/regression/cross_validate_results.py b/qstack/regression/cross_validate_results.py index 3c77150e..50dc3bd1 100644 --- a/qstack/regression/cross_validate_results.py +++ b/qstack/regression/cross_validate_results.py @@ -1,3 +1,5 @@ +"""Cross-validation results.""" + import numpy as np from tqdm import tqdm from qstack.tools import correct_num_threads @@ -8,35 +10,35 @@ def cv_results(X, y, - sigmaarr=defaults.sigmaarr, etaarr=defaults.etaarr, gkernel=defaults.gkernel, - gdict=defaults.gdict, akernel=defaults.kernel, test_size=defaults.test_size, + sigmaarr=defaults.sigmaarr, etaarr=defaults.etaarr, akernel=defaults.kernel, + gkernel=defaults.gkernel, gdict=defaults.gdict, test_size=defaults.test_size, train_size=defaults.train_size, splits=defaults.splits, printlevel=0, adaptive=False, read_kernel=False, n_rep=defaults.n_rep, save=False, preffix='unknown', save_pred=False, progress=False, sparse=None, seed0=0): - """ Computes various learning curves (LC) ,with random sampling, and returns the average performance. + """Compute various learning curves (LC) ,with random sampling, and returns the average performance. Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples - y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples - sigmaar (list): list of kernel widths for the hyperparameter optimization - etaar (list): list of regularization strength for the hyperparameter optimization - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels - akernel (str): local kernel (Laplacian, Gaussian, linear) - test_size (float or int): test set fraction (or number of samples) - train_size (list): list of training set size fractions used to evaluate the points on the LC - splits (int): K number of splits for the Kfold cross-validation - printlevel (int): controls level of output printing - adaptative (bool): to expand the grid for optimization adaptatively - read_kernel (bool): if 'X' is a kernel and not an array of representations - n_rep (int): the number of repetition for each point (using random sampling) - save (bool): wheather to save intermediate LCs (.npy) - preffix (str): the prefix to use for filename when saving intemediate results - save_pred (bool): to save predicted targets for all LCs (.npy) - progress (bool): to print a progress bar - sparse (int): the number of reference environnments to consider for sparse regression - seed0 (int): the initial seed to produce a set of seeds used for random number generator + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. + sigmaarr (list): List of kernel width for the grid search. + etaarr (list): List of regularization strength for the grid search. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + train_size (list): List of training set size fractions used to evaluate the points on the LC. + splits (int): K number of splits for the Kfold cross-validation. + printlevel (int): Controls level of output printing. + adaptive (bool): To expand the grid for optimization adaptatively. + read_kernel (bool): If 'X' is a kernel and not an array of representations. + n_rep (int): The number of repetition for each point (using random sampling). + save (bool): Wheather to save intermediate LCs (.npy). + preffix (str): The prefix to use for filename when saving intemediate results. + save_pred (bool): To save predicted targets for all LCs (.npy). + progress (bool): To print a progress bar. + sparse (int): The number of reference environnments to consider for sparse regression. + seed0 (int): The initial seed to produce a set of seeds used for random number generator. Returns: The averaged LC data points as a numpy.ndarray containing (train sizes, MAE, std) @@ -77,14 +79,15 @@ def cv_results(X, y, np.save(f"{preffix}_{n_rep}-lc-runs.npy", lc_runs) if save_pred: np_pred = np.array(predictions_n) - ##### Can not take means !!! Test-set varies with run ! - ##### pred_mean = np.concatenate([np_pred.mean(axis=0),np_pred.std(axis=0)[1].reshape((1,-1))], axis=0) + # Can not take means !!! Test-set varies with run ! + # pred_mean = np.concatenate([np_pred.mean(axis=0),np_pred.std(axis=0)[1].reshape((1,-1))], axis=0) pred_mean = np.concatenate([*np_pred.reshape((n_rep, 2, -1))], axis=0) np.savetxt(f"{preffix}_{n_rep}-predictions.txt", pred_mean.T) return lc def main(): + """Command-line entry point for full cross-validation with hyperparameter search.""" parser = RegressionParser(description='This program runs a full cross-validation of the learning curves (hyperparameters search included).', hyperparameters_set='array') parser.remove_argument('random_state') parser.add_argument('--n', type=int, dest='n_rep', default=defaults.n_rep, help='the number of repetition for each point') @@ -92,14 +95,14 @@ def main(): parser.add_argument('--save-pred', action='store_true', dest='save_pred', default=False, help='if save test-set prediction') args = parser.parse_args() - if(args.readk): + if args.readk: args.sigma = [np.nan] - if(args.ll): + if args.ll: correct_num_threads() + print(vars(args)) X = np.load(args.repr) y = np.loadtxt(args.prop) - print(vars(args)) final = cv_results(X, y, sigmaarr=args.sigma, etaarr=args.eta, gdict=args.gdict, gkernel=args.gkernel, akernel=args.akernel, read_kernel=args.read_kernel, diff --git a/qstack/regression/final_error.py b/qstack/regression/final_error.py index 246167c7..0cf2ed23 100644 --- a/qstack/regression/final_error.py +++ b/qstack/regression/final_error.py @@ -1,3 +1,5 @@ +"""Final error computation on test sets.""" + import sys import numpy as np import scipy @@ -12,29 +14,32 @@ def final_error(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, test_size=defaults.test_size, idx_test=None, idx_train=None, sparse=None, random_state=defaults.random_state, return_pred=False, return_alpha=False): - """ Perform prediction on the test set using the full training set. + """Perform prediction on the test set using the full training set. Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples - y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples - read_kernel (bool): if 'X' is a kernel and not an array of representations - sigma (float): width of the kernel - eta (float): regularization strength for matrix inversion - akernel (str): local kernel (Laplacian, Gaussian, linear) - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels - test_size (float or int): test set fraction (or number of samples) - random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (list): list of indices for the test set (based on the sequence in X) - idx_train (list): list of indices for the training set (based on the sequence in X) - sparse (int): the number of reference environnments to consider for sparse regression - return_pred (bool) : return predictions - return_alpha (bool) : return regression weights + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. + read_kernel (bool): If 'X' is a kernel and not an array of representations. + sigma (float): Width of the kernel. + eta (float): Regularization strength for matrix inversion. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + random_state (int): The seed used for random number generator (controls train/test splitting). + idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). + sparse (int): The number of reference environnments to consider for sparse regression. + return_pred (bool): Return predictions. + return_alpha (bool): Return regression weights. Returns: np.1darray(Ntest) : prediction absolute errors on the test set np.1darray(Ntest) : (if return_pred is True) predictions on the test set np.1darray(Ntrain or sparse) : (if return_alpha is True) regression weights + + Raises: + RuntimeError: If 'X' is a kernel and sparse regression is chosen. """ idx_train, idx_test, y_train, y_test = train_test_split_idx(y=y, idx_test=idx_test, idx_train=idx_train, test_size=test_size, random_state=random_state) @@ -71,12 +76,13 @@ def final_error(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, def main(): + """Command-line entry point for computing final prediction errors.""" parser = RegressionParser(description='This program computes the full-training error for each molecule.', hyperparameters_set='single') parser.remove_argument('train_size') parser.add_argument('--save-alpha', type=str, dest='save_alpha', default=None, help='file to write the regression coefficients to') args = parser.parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) y = np.loadtxt(args.prop) diff --git a/qstack/regression/global_kernels.py b/qstack/regression/global_kernels.py index c94339ee..38bf9494 100644 --- a/qstack/regression/global_kernels.py +++ b/qstack/regression/global_kernels.py @@ -1,14 +1,29 @@ +"""Global (molecular) kernel implementations. + +Provides: + global_kernels_dict: Dictionary mapping global kernel names to functions. +""" + import math from collections import Counter import numpy as np from tqdm import tqdm +from qstack.tools import slice_generator def get_global_K(X, Y, sigma, local_kernel, global_kernel, options): - """ - - .. todo:: - Write the docstring + """Compute global kernel matrix between two sets of molecular representations. + + Args: + X (list): List of molecular representations (first set). + Y (list): List of molecular representations (second set). + sigma (float): Kernel width parameter. + local_kernel (callable): Local kernel function for atomic environments. + global_kernel (callable): Global kernel function for combining local kernels. + options (dict): Dictionary of global kernel options. + + Returns: + numpy ndarray: Global kernel matrix of shape (len(X), len(Y)). """ self = (Y is X) verbose = options.get('verbose', 0) @@ -54,32 +69,44 @@ def get_global_K(X, Y, sigma, local_kernel, global_kernel, options): def get_covariance(mol1, mol2, species, max_atoms, max_size, kernel, sigma=None): - """ - - .. todo:: - Write the docstring + """Compute the covariance matrix between two molecules using local kernels. + + Args: + mol1 (dict): First molecule represented as dictionary of atomic environments by species. + mol2 (dict): Second molecule represented as dictionary of atomic environments by species. + species (numpy ndarray): Array of unique atomic species present in the dataset. + max_atoms (dict): Maximum number of atoms per species across all molecules. + max_size (int): Total size of the padded covariance matrix. + kernel (callable): Local kernel function. + sigma (float, optional): Kernel width parameter. Defaults to None. + + Returns: + numpy ndarray: Covariance matrix of shape (max_size, max_size). """ K_covar = np.zeros((max_size, max_size)) - idx = 0 - for q in species: + for q, slice_ in slice_generator(species, inc=lambda q: max_atoms[q]): n1 = len(mol1[q]) n2 = len(mol2[q]) q_size = max_atoms[q] if n1==0 or n2==0: - idx += q_size continue x1 = np.pad(mol1[q], ((0, q_size - n1),(0,0)), 'constant') x2 = np.pad(mol2[q], ((0, q_size - n2),(0,0)), 'constant') - K_covar[idx:idx+q_size, idx:idx+q_size] = kernel(x1, x2, sigma) - idx += q_size + K_covar[slice_, slice_] = kernel(x1, x2, sigma) return K_covar def normalize_kernel(kernel, self_x=None, self_y=None, verbose=0): - """ + """Normalize a kernel matrix using self-kernel values. + + Args: + kernel (numpy ndarray): Kernel matrix to normalize. + self_x (numpy ndarray, optional): Self-kernel values for X. If None, extracted from diagonal. Defaults to None. + self_y (numpy ndarray, optional): Self-kernel values for Y. If None, extracted from diagonal. Defaults to None. + verbose (int): Verbosity level. Defaults to 0. - .. todo:: - Write the docstring + Returns: + numpy ndarray: Normalized kernel matrix. """ if verbose: print("Normalizing kernel.") @@ -91,12 +118,15 @@ def normalize_kernel(kernel, self_x=None, self_y=None, verbose=0): def mol_to_dict(mol, species): - """ + """Convert molecular representation to a dictionary organized by atomic species. - .. todo:: - Write the docstring - """ + Args: + mol (numpy ndarray): Molecular representation where each row is [atomic_number, features...]. + species (numpy ndarray): Array of unique atomic species. + Returns: + dict: Dictionary mapping atomic numbers to arrays of atomic feature vectors. + """ mol_dict = {q:[] for q in species} for atom in mol: mol_dict[atom[0]].append(atom[1]) @@ -106,23 +136,41 @@ def mol_to_dict(mol, species): def sumsq(x): + """Compute sum of squares (dot product with itself). + + Args: + x (numpy ndarray): Input vector. + + Returns: + float: Sum of squared elements. + """ return x@x def avg_kernel(kernel, _options): - """ + """Compute the average kernel value. - .. todo:: - Write the docstring + Args: + kernel (numpy ndarray): Kernel matrix. + _options (dict): Options dictionary (unused). + + Returns: + float: Average of all kernel matrix elements. """ return np.sum(kernel) / math.prod(kernel.shape) def rematch_kernel(kernel, options): - """ + """Compute the REMatch (Regularized Entropy Match) kernel. + + Uses Sinkhorn algorithm to compute optimal transport-based kernel similarity. + + Args: + kernel (numpy ndarray): Local kernel matrix. + options (dict): Options dictionary containing 'alpha' parameter for regularization. - .. todo:: - Write the docstring + Returns: + float: REMatch kernel value. """ alpha = options['alpha'] thresh = 1e-6 diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py index 61dc517f..9d86af91 100644 --- a/qstack/regression/hyperparameters.py +++ b/qstack/regression/hyperparameters.py @@ -1,3 +1,5 @@ +"""Hyperparameter optimization.""" + import sys import numpy as np import scipy @@ -9,33 +11,36 @@ def hyperparameters(X, y, - sigma=defaults.sigmaarr, eta=defaults.etaarr, gkernel=defaults.gkernel, gdict=defaults.gdict, - akernel=defaults.kernel, test_size=defaults.test_size, splits=defaults.splits, idx_test=None, idx_train=None, + sigma=defaults.sigmaarr, eta=defaults.etaarr, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, + test_size=defaults.test_size, splits=defaults.splits, idx_test=None, idx_train=None, printlevel=0, adaptive=False, read_kernel=False, sparse=None, random_state=defaults.random_state): - """ Performs a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter). + """Perform a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter). Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples - y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples - sigma (list): list of kernel width for the grid search - eta (list): list of regularization strength for the grid search - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels - akernel (str): local kernel (Laplacian, Gaussian, linear) - test_size (float or int): test set fraction (or number of samples) - splits (int): K number of splits for the Kfold cross-validation - idx_test (list): list of indices for the test-set (based on the sequence in X - idx_train (list): list of indices for the training set (based on the sequence in X) - printlevel (int): controls level of output printing - adaptative (bool): to expand the grid search adaptatively - read_kernel (bool): if 'X' is a kernel and not an array of representations - sparse (int): the number of reference environnments to consider for sparse regression - random_state (int): the seed used for random number generator (controls train/test splitting) + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. + sigma (list): List of kernel width for the grid search. + eta (list): List of regularization strength for the grid search. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + splits (int): K number of splits for the Kfold cross-validation. + idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). + printlevel (int): Controls level of output printing. + adaptive (bool): To expand the grid search adaptatively. + read_kernel (bool): If 'X' is a kernel and not an array of representations. + sparse (int): The number of reference environnments to consider for sparse regression. + random_state (int): The seed used for random number generator (controls train/test splitting). Returns: The results of the grid search as a numpy.2darray [Cx(MAE,std,eta,sigma)], - where C is the number of parameter set and - the array is sorted according to MAEs (last is minimum) + where C is the number of parameter set and + the array is sorted according to MAEs (last is minimum) + + Raises: + RuntimeError: If 'X' is a kernel and sparse regression is chosen. """ def k_fold_opt(K_all, eta): kfold = KFold(n_splits=splits, shuffle=False) @@ -120,9 +125,9 @@ def hyper_loop(sigma, eta): # at the 1st iteration if is checked twice on purpose if direction=='up' and best_sigma==max(work_sigma): - new_sigma = best_sigma*np.array(defaults.sigmaarr_mult[1:]) + new_sigma = best_sigma*np.array(defaults.sigmaarr_mult[1:]) elif direction=='down' and best_sigma==min(work_sigma): - new_sigma = best_sigma/np.array(defaults.sigmaarr_mult[1:]) + new_sigma = best_sigma/np.array(defaults.sigmaarr_mult[1:]) if new_sigma is None: break @@ -132,15 +137,16 @@ def hyper_loop(sigma, eta): def main(): + """Command-line entry point for hyperparameter optimization.""" parser = RegressionParser(description='This program finds the optimal hyperparameters.', hyperparameters_set='array') parser.remove_argument("random_state") parser.remove_argument("train_size") args = parser.parse_args() - if(args.readk): + if args.readk: args.sigma = [np.nan] - print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() + print(vars(args)) X = np.load(args.repr) y = np.loadtxt(args.prop) diff --git a/qstack/regression/kernel.py b/qstack/regression/kernel.py index 7be898dd..9f2685e5 100644 --- a/qstack/regression/kernel.py +++ b/qstack/regression/kernel.py @@ -1,3 +1,5 @@ +"""Kernel matrix computation.""" + import os import numpy as np from qstack.tools import correct_num_threads @@ -6,15 +8,15 @@ def kernel(X, Y=None, sigma=defaults.sigma, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict): - """ Computes a kernel between sets A and B (or A and A) using their representations. + """Compute a kernel between sets A and B (or A and A) using their representations. Args: - X (list of arrays): Representation of A - Y (list of arrays): Representation of B. - sigma (): Sigma hyperparameter. - akernel (): Kernel type (G for Gaussian, L for Laplacian, and myL for Laplacian for open-shell systems). - gkernel (): Global kernel type (agv for average, rem for REMatch kernel, None for local kernels). - gdict (): Dictionary like input string to initialize global kernel parameters. Defaults to {'alpha':1.0, 'normalize':1}. + X (numpy.ndarray): Representation of A. + Y (numpy.ndarray): Representation of B. + sigma (float): Width of the kernel. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. Returns: A numpy ndarray containing the kernel. @@ -27,6 +29,7 @@ def kernel(X, Y=None, sigma=defaults.sigma, akernel=defaults.kernel, gkernel=def def main(): + """Command-line entry point for computing kernel matrices.""" parser = RegressionParser(description='This program computes kernel.', hyperparameters_set='single') parser.remove_argument('prop') parser.remove_argument('test_size') @@ -38,7 +41,7 @@ def main(): parser.add_argument('--dir', type=str, dest='dir', default='./', help='directory to save the output in') args = parser.parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() if os.path.isfile(args.repr): X = np.load(args.repr) diff --git a/qstack/regression/kernel_utils.py b/qstack/regression/kernel_utils.py index 2e24817c..1fbaf601 100644 --- a/qstack/regression/kernel_utils.py +++ b/qstack/regression/kernel_utils.py @@ -1,56 +1,56 @@ +"""Kernel computation utility functions and defaults. + +Provides: + REGMODULE_PATH: Path to the module. + defaults: Default parameters. +""" + import os -import argparse import warnings from types import SimpleNamespace import numpy as np +from sklearn.model_selection import train_test_split from .local_kernels import local_kernels_dict from .global_kernels import global_kernels_dict, get_global_K -REGMODULE_PATH = os.path.dirname(__file__) - -class ParseKwargs(argparse.Action): - def __call__(self, _parser, namespace, values, _option_string=None): - setattr(namespace, self.dest, defaults.gdict) - for value in values: - key, value = value.split('=') - for t in [int, float]: - try: - value = t(value) - break - except ValueError: - continue - getattr(namespace, self.dest)[key] = value +REGMODULE_PATH = os.path.dirname(__file__) defaults = SimpleNamespace( - sigma=32.0, - eta=1e-5, - kernel='L', - gkernel=None, - gdict={'alpha':1.0, 'normalize':1, 'verbose':0}, - test_size=0.2, - n_rep=5, - splits=5, - train_size=[0.125, 0.25, 0.5, 0.75, 1.0], - etaarr=np.logspace(-10, 0, 5).tolist(), - sigmaarr=np.logspace(0,6, 13).tolist(), - sigmaarr_mult=np.logspace(0,2, 5).tolist(), - random_state=0, - ) + sigma=32.0, + eta=1e-5, + kernel='L', + gkernel=None, + gdict={'alpha':1.0, 'normalize':1, 'verbose':0}, + test_size=0.2, + n_rep=5, + splits=5, + train_size=[0.125, 0.25, 0.5, 0.75, 1.0], + etaarr=np.logspace(-10, 0, 5).tolist(), + sigmaarr=np.logspace(0,6, 13).tolist(), + sigmaarr_mult=np.logspace(0,2, 5).tolist(), + random_state=0, + ) def get_local_kernel(arg): - """ Obtains a local-envronment kernel by name. + """Obtain a local-environment kernel function by name. Args: - arg (str): the name of the kernel, in [''] # TODO + arg (str): Kernel name. Available options include: + - 'G': Gaussian (RBF) kernel. + - 'L': Laplacian kernel. + - 'dot': Linear (dot product) kernel. + - 'cosine': Cosine similarity kernel. + - Implementation-specific variants: 'G_sklearn', 'G_custom_c', 'L_sklearn', 'L_custom_c', 'L_custom_py'. Returns: - kernel (Callable[np.ndarray,np.ndarray,float -> np.ndarray]): the actual kernel function, to call as ``K = kernel(X,Y,gamma)`` + callable: Kernel function with signature kernel(X, Y, gamma) -> numpy.ndarray. - .. todo:: - Write the docstring + Raises: + NotImplementedError: If the specified kernel is not implemented. + RuntimeError: If the kernel implementation is not available (e.g., C library missing). """ if arg not in local_kernels_dict: raise NotImplementedError(f'{arg} kernel is not implemented') @@ -62,10 +62,17 @@ def get_local_kernel(arg): def get_global_kernel(arg, local_kernel): - """ + """Create a global kernel function from a local kernel. + + Args: + arg (tuple): Tuple of (gkernel_name, options_dict). + local_kernel (callable): Local kernel function. - .. todo:: - Write the docstring + Returns: + callable: Global kernel function that combines local kernels. + + Raises: + NotImplementedError: If the specified global kernel is not implemented. """ gkernel, options = arg @@ -76,23 +83,26 @@ def get_global_kernel(arg, local_kernel): def get_kernel(arg, arg2=None): - """ Returns the kernel function depending on the cli argument + """Return the appropriate kernel function based on arguments. - .. todo:: - Write the docstring - """ + Args: + arg (str): Local kernel name. + arg2 (tuple, optional): If provided, tuple of (global_kernel_name, options) for global kernel. Defaults to None. - local_kernel = get_local_kernel(arg) + Returns: + callable: Kernel function (local or global). + """ + local_kernel = get_local_kernel(arg) - if arg2 is None or arg2[0] is None: - return local_kernel - else: - return get_global_kernel(arg2, local_kernel) + if arg2 is None or arg2[0] is None: + return local_kernel + else: + return get_global_kernel(arg2, local_kernel) def train_test_split_idx(y, idx_test=None, idx_train=None, test_size=defaults.test_size, random_state=defaults.random_state): - """ Perfrom test/train data split based on random shuffling or given indices. + """Perfrom test/train data split based on random shuffling or given indices. If neither `idx_test` nor `idx_train` are specified, the splitting is done randomly using `random_state`. @@ -104,20 +114,20 @@ def train_test_split_idx(y, idx_test=None, idx_train=None, Args: y (numpy.1darray(Nsamples)): array containing the target property of all Nsamples - test_size (float or int): test set fraction (or number of samples) - idx_test ([int] / numpy.1darray): list of indices for the test set (based on the sequence in X) - idx_train ([int] / numpy.1darray): list of indices for the training set (based on the sequence in X) - random_state (int): the seed used for random number generator (controls train/test splitting) + test_size (float or int): Test set fraction (or number of samples). + idx_test ([int] / numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train ([int] / numpy.1darray): List of indices for the training set (based on the sequence in X). + random_state (int): The seed used for random number generator (controls train/test splitting). Returns: numpy.1darray(Ntest, dtype=int) : test indices numpy.1darray(Ntrain, dtype=int) : train indices numpy.1darray(Ntest, dtype=float) : test set target property numpy.1darray(Ntrain, dtype=float) : train set target property - """ - - from sklearn.model_selection import train_test_split + Raises: + RuntimeError: If test indices are repeated. + """ if idx_test is None and idx_train is None: idx_train, idx_test = train_test_split(np.arange(len(y)), test_size=test_size, random_state=random_state) elif idx_test is not None and idx_train is None: @@ -142,7 +152,7 @@ def train_test_split_idx(y, idx_test=None, idx_train=None, def sparse_regression_kernel(K_train, y_train, sparse_idx, eta): - r""" Compute the sparse regression matrix and vector. + r"""Compute the sparse regression matrix and vector. Solution of a sparse regression problem is $$ \vec w = \left( \mathbf{K}_{MN} \mathbf{K}_{NM} + \eta \mathbf{1} \right) ^{-1} \mathbf{K}_{MN}\vec y $$ @@ -156,12 +166,10 @@ def sparse_regression_kernel(K_train, y_train, sparse_idx, eta): and y_solve $\mathbf{K}_{MN}\vec y$. Args: - K_train (numpy.1darray(Ntrain1,Ntrain): kernel computed on the training set. - Ntrain1 (N in the equation) may differ from the full training set Ntrain (e.g. a subset) + K_train (numpy.1darray(Ntrain1,Ntrain): Kernel computed on the training set. Ntrain1 (N in the equation) may differ from the full training set Ntrain (e.g. a subset). y_train (numpy.1darray(Ntrain)): array containing the target property of the full training set - sparse_idx (numpy.1darray of int) : (M in the equation): sparse subset indices - wrt to the order of the full training set. - eta (float): regularization strength for matrix inversion + sparse_idx (numpy.1darray of int): (M in the equation): sparse subset indices wrt to the order of the full training set. + eta (float): Regularization strength for matrix inversion. Returns: numpy.2darray((len(sparse), len(sparse)), dtype=float) : matrix to be inverted diff --git a/qstack/regression/local_kernels.py b/qstack/regression/local_kernels.py index b2d177c2..69aa7130 100644 --- a/qstack/regression/local_kernels.py +++ b/qstack/regression/local_kernels.py @@ -1,4 +1,12 @@ +"""Local (atomic) kernel implementations. + +Provides: + local_kernels_dict: Dictionary mapping kernel names to their implementations. +""" + import os +import ctypes +import sysconfig import warnings import numpy as np import sklearn.metrics.pairwise as _SKLEARN_PAIRWISE @@ -6,34 +14,47 @@ def custom_laplacian_kernel(X, Y, gamma): - """ Compute Laplacian kernel between X and Y - - .. todo:: - Write the docstring - """ - if X.shape[1:] != Y.shape[1:]: - raise RuntimeError(f"Incompatible shapes {X.shape} and {Y.shape}") - def cdist(X, Y): - K = np.zeros((len(X),len(Y))) - for i,x in enumerate(X): - x = np.array([x] * len(Y)) - d = np.abs(x-Y) - d = np.sum(d, axis=tuple(range(1, len(d.shape)))) - K[i,:] = d - return K - K = -gamma * cdist(X, Y) - np.exp(K, out=K) - return K + """Compute Laplacian kernel between X and Y using Python implementation. + K(x, y) = exp(-gamma * ||x - y||_1) -def custom_C_kernels(kernel_function, return_distance_function=False): + Args: + X (numpy ndarray): First set of samples (can be multi-dimensional). + Y (numpy ndarray): Second set of samples. + gamma (float): Kernel width parameter. + + Returns: + numpy ndarray: Laplacian kernel matrix of shape (len(X), len(Y)). + + Raises: + RuntimeError: If X and Y have incompatible shapes. """ + if X.shape[1:] != Y.shape[1:]: + raise RuntimeError(f"Incompatible shapes {X.shape} and {Y.shape}") + + def cdist(X, Y): + K = np.zeros((len(X),len(Y))) + for i,x in enumerate(X): + x = np.array([x] * len(Y)) + d = np.abs(x-Y) + d = np.sum(d, axis=tuple(range(1, len(d.shape)))) + K[i,:] = d + return K + K = -gamma * cdist(X, Y) + np.exp(K, out=K) + return K + + +def custom_C_kernels(kernel_function, return_distance_function=False): + """Create kernel function wrappers using C implementation for speed. - .. todo:: - Write the docstring + Args: + kernel_function (str): Kernel type ('L' for Laplacian, 'G' for Gaussian). + return_distance_function (bool): If True, returns distance function instead of kernel. Defaults to False. + + Returns: + callable or None: Kernel or distance function, or None if C library cannot be loaded. """ - import ctypes - import sysconfig array_2d_double = np.ctypeslib.ndpointer(dtype=np.float64, ndim=2, flags='CONTIGUOUS') lib_path = REGMODULE_PATH[0]+"/lib/manh"+sysconfig.get_config_var('EXT_SUFFIX') @@ -79,21 +100,55 @@ def kernel_func_c(X, Y, gamma): def dot_kernel_wrapper(x, y, *_kargs, **_kwargs): + """Compute linear (dot product) kernel. + + Args: + x (numpy ndarray): First set of samples. + y (numpy ndarray): Second set of samples. + *_kargs: Unused positional arguments (for compatibility). + **_kwargs: Unused keyword arguments (for compatibility). + + Returns: + numpy ndarray: Linear kernel matrix. + """ return _SKLEARN_PAIRWISE.linear_kernel(x, y) def cosine_similarity_wrapper(x, y, *_kargs, **_kwargs): + """Compute cosine similarity kernel. + + Args: + x (numpy ndarray): First set of samples. + y (numpy ndarray): Second set of samples. + *_kargs: Unused positional arguments (for compatibility). + **_kwargs: Unused keyword arguments (for compatibility). + + Returns: + numpy ndarray: Cosine similarity matrix. + """ return _SKLEARN_PAIRWISE.cosine_similarity(x, y) def local_laplacian_kernel_wrapper(X, Y, gamma): - """ Wrapper that acts as a generic laplacian kernel function - It simply decides which kernel implementation to call. + """Decide which kernel implementation to call. + + Wrapper that acts as a generic Laplacian kernel function. + + Args: + X (numpy ndarray): First set of samples (can be multi-dimensional). + Y (numpy ndarray): Second set of samples. + gamma (float): Kernel width parameter. + + Returns: + numpy ndarray: Laplacian kernel matrix of shape (len(X), len(Y)). + + Raises: + RuntimeError: If X and Y have incompatible shapes. """ X, Y = np.asarray(X), np.asarray(Y) if X.shape[1:] != Y.shape[1:]: raise RuntimeError(f"Incompatible shapes {X.shape} and {Y.shape}") - if X.ndim==1: # do not extend so the behavior is the same for 'L' and 'L_custom_py' + if X.ndim==1: # do not extend so the behavior is the same for 'L' and 'L_custom_py' raise RuntimeError("Dimensionality of X should be > 1") if X.ndim>2: diff --git a/qstack/regression/oos.py b/qstack/regression/oos.py index 7083b414..eca53cbf 100644 --- a/qstack/regression/oos.py +++ b/qstack/regression/oos.py @@ -1,3 +1,5 @@ +"""Out-of-sample prediction.""" + import sys import numpy as np from qstack.mathutils.fps import do_fps @@ -10,26 +12,25 @@ def oos(X, X_oos, alpha, sigma=defaults.sigma, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, test_size=defaults.test_size, idx_test=None, idx_train=None, sparse=None, random_state=defaults.random_state): - """ Perform prediction on an out-of-sample (OOS) set. + """Perform prediction on an out-of-sample (OOS) set. Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples - X_oos (numpy.2darray[Noos,Nfeat]): array of OOS representations. - alpha (numpy.1darray(Ntrain or sparse)): regression weights. - sigma (float): width of the kernel - akernel (str): local kernel (Laplacian, Gaussian, linear) - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels - test_size (float or int): test set fraction (or number of samples) - random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (list): list of indices for the test set (based on the sequence in X) - idx_train (list): list of indices for the training set (based on the sequence in X) - sparse (int): the number of reference environnments to consider for sparse regression + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + X_oos (numpy.ndarray[Noos,...]): Array of OOS representations. + alpha (numpy.1darray(Ntrain or sparse)): Regression weights. + sigma (float): Width of the kernel. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + random_state (int): The seed used for random number generator (controls train/test splitting). + idx_test (list): List of indices for the test set (based on the sequence in X). + idx_train (list): List of indices for the training set (based on the sequence in X). + sparse (int): The number of reference environnments to consider for sparse regression. Returns: np.1darray(Noos) : predictions on the OOS set """ - idx_train, _, _, _, = train_test_split_idx(y=np.arange(len(X)), idx_test=idx_test, idx_train=idx_train, test_size=test_size, random_state=random_state) kernel = get_kernel(akernel, [gkernel, gdict]) @@ -43,6 +44,7 @@ def oos(X, X_oos, alpha, sigma=defaults.sigma, def main(): + """Command-line entry point for out-of-sample predictions.""" parser = RegressionParser(description='This program makes prediction for OOS.', hyperparameters_set='single') parser.remove_argument('prop') parser.remove_argument('train_size') @@ -52,7 +54,7 @@ def main(): parser.add_argument('--alpha', type=str, dest='alpha', required=True, help='path to the regression weights file') args = parser.parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) X_oos = np.load(args.x_oos) diff --git a/qstack/regression/parser.py b/qstack/regression/parser.py index e46fc3cf..d807eab3 100644 --- a/qstack/regression/parser.py +++ b/qstack/regression/parser.py @@ -1,7 +1,73 @@ +"""Command-line argument parser for regression tasks.""" + import argparse -from .kernel_utils import defaults, ParseKwargs, local_kernels_dict, global_kernels_dict +from qstack.tools import FlexParser +from .kernel_utils import defaults, local_kernels_dict, global_kernels_dict + + +class ParseKwargs(argparse.Action): + """Parser for the global kernel parameters.""" + def __call__(self, _parser, namespace, values, _option_string=None): + """Set attributes. + + Args: + _parser: Unused (for interface compatibility). + namespace (argparse.Namespace): Namespace to set attributes to. + values (list[str]): The associated command-line arguments. + _option_string: Unused (for interface compatibility). + """ + setattr(namespace, self.dest, defaults.gdict) + for value in values: + key, value = value.split('=') + for t in [int, float]: + try: + value = t(value) + break + except ValueError: + continue + getattr(namespace, self.dest)[key] = value + + +class RegressionParser(FlexParser): + """Custom argument parser for kernel ridge regression tasks. + + Provides pre-configured argument sets for KRR routines. -class RegressionParser(argparse.ArgumentParser): + Args: + hyperparameters_set (str, optional): Hyperparameter mode. Options: + - None: No hyperparameter arguments added + - 'single': Single eta/sigma values for direct regression + - 'array': Multiple eta/sigma values for grid search/cross-validation + Defaults to None. + **kwargs: Additional arguments passed to ArgumentParser. + + Attributes: + Standard arguments added for all modes: + - x (--x): Path to molecular representations file + - y (--y): Path to target properties file + - akernel (--akernel): Local/atomic kernel type (Gaussian, Laplacian, etc.) + - gkernel (--gkernel): Global/molecular kernel type (average, REMatch) + - gdict (--gdict): Global kernel parameters dictionary + - test (--test): Test set fraction (0.0-1.0) + - train (--train): Training set fraction list for learning curvers + (0.0-1.0 where 1.0 means full training set minus test set) + - ll (--ll): Thread correction flag for running on clusters + - readkernel (--readkernel): Flag if input is pre-computed kernel + - sparse (--sparse): Sparse regression basis size + - random_state (--random_state): Random seed for reproducibility + + Additional for 'single' mode: + - eta (--eta): Single regularization parameter + - sigma (--sigma): Single kernel width parameter + + Additional for 'array' mode: + - eta (--eta): Array of regularization parameters + - sigma (--sigma): Array of kernel width parameters + - splits (--splits): Number of k-fold cross-validation splits + - print (--print): Verbosity level + - ada (--ada): Adaptive sigma flag + - name (--name): Output filename + """ def __init__(self, hyperparameters_set=None, **kwargs): super().__init__( formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -22,7 +88,7 @@ def __init__(self, hyperparameters_set=None, **kwargs): "L_custompy" is suited to open-shell systems') parser.add_argument('--gkernel', type=str, dest='gkernel', default=defaults.gkernel, choices=global_kernels_dict.keys(), help='global kernel type: "avg" for average, "rem" for REMatch') - parser.add_argument('--gdict', action=ParseKwargs, dest='gdict', default=defaults.gdict, nargs='*', help='dictionary like input string to initialize global kernel parameters') + parser.add_argument('--gdict', action=ParseKwargs, dest='gdict', default=defaults.gdict, nargs='*', help='dictionary like input string to initialize global kernel parameters, e.g. "--gdict alpha=2 normalize=0"') parser.add_argument('--test', type=float, dest='test_size', default=defaults.test_size, help='test set fraction') parser.add_argument('--train', type=float, dest='train_size', default=defaults.train_size, nargs='+', help='training set fractions') parser.add_argument('--ll', action='store_true', dest='ll', default=False, help='if correct for the numper of threads') @@ -34,18 +100,3 @@ def __init__(self, hyperparameters_set=None, **kwargs): parser.add_argument('--print', type=int, dest='printlevel', default=0, help='printlevel') parser.add_argument('--ada', action='store_true', dest='adaptive', default=False, help='if adapt sigma') parser.add_argument('--name', type=str, dest='nameout', default=None, help='the name of the output file') - - - def remove_argument(parser, arg): - for action in parser._actions: - opts = action.option_strings - if (opts and opts[0] == arg) or action.dest == arg: - parser._remove_action(action) - break - - for action in parser._action_groups: - for group_action in action._group_actions: - opts = group_action.option_strings - if (opts and opts[0] == arg) or group_action.dest == arg: - action._group_actions.remove(group_action) - return diff --git a/qstack/regression/regression.py b/qstack/regression/regression.py index 24b83f59..572c3cd1 100644 --- a/qstack/regression/regression.py +++ b/qstack/regression/regression.py @@ -1,3 +1,5 @@ +"""Learning curve computation.""" + import numpy as np import scipy from qstack.mathutils.fps import do_fps @@ -11,32 +13,34 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, test_size=defaults.test_size, train_size=defaults.train_size, n_rep=defaults.n_rep, random_state=defaults.random_state, idx_test=None, idx_train=None, sparse=None, debug=False, save_pred=False): - """ Produces learning curves (LC) data, for various training sizes, using kernel ridge regression and the user specified parameters + """Produce learning curves (LC) data using kernel ridge regression. Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples - y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples - read_kernel (bool): if 'X' is a kernel and not an array of representations - sigma (float): width of the kernel - eta (float): regularization strength for matrix inversion - akernel (str): local kernel (Laplacian, Gaussian, linear) - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels - test_size (float or int): test set fraction (or number of samples) - train_size (list): list of training set size fractions used to evaluate the points on the LC - n_rep (int): the number of repetition for each point (using random sampling) - random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (list): list of indices for the test set (based on the sequence in X) - idx_train (list): list of indices for the training set (based on the sequence in X) - sparse (int): the number of reference environnments to consider for sparse regression - debug (bool): to use a fixed seed for random sampling (for reproducibility) - save_pred (bool): to return all predicted targets + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. + read_kernel (bool): If 'X' is a kernel and not an array of representations. + sigma (float): Width of the kernel. + eta (float): Regularization strength for matrix inversion. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + train_size (list): List of training set size fractions used to evaluate the points on the LC. + n_rep (int): The number of repetition for each point (using random sampling). + random_state (int): The seed used for random number generator (controls train/test splitting). + idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). + sparse (int): The number of reference environnments to consider for sparse regression. + debug (bool): To use a fixed seed for partial training set selection (for reproducibility). + save_pred (bool): To return all predicted targets. Returns: The computed LC, as a list containing all its points (train size, MAE, std) If save_pres is True, a tuple with (results, (target values, predicted values)) - """ + Raises: + RuntimeError: If 'X' is a kernel and sparse regression is chosen. + """ idx_train, idx_test, y_train, y_test = train_test_split_idx(y=y, idx_test=idx_test, idx_train=idx_train, test_size=test_size, random_state=random_state) if read_kernel is False: @@ -54,7 +58,7 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, else: if read_kernel: raise RuntimeError('Cannot do FPS with kernels') - sparse_idx = do_fps(X_train)[0][:sparse] # indices within the training set + sparse_idx = do_fps(X_train)[0][:sparse] # indices within the training set if debug: # Ensures reproducibility of the sample selection for each train_size over repetitions (n_rep) @@ -67,7 +71,7 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, size_train = int(np.floor(len(y_train)*size)) if size <= 1.0 else size maes = [] for _rep in range(n_rep): - train_idx = rng.choice(all_indices_train, size = size_train, replace=False) + train_idx = rng.choice(all_indices_train, size=size_train, replace=False) y_kf_train = y_train[train_idx] if not sparse: @@ -86,13 +90,14 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, def main(): + """Command-line entry point for computing learning curves.""" parser = RegressionParser(description='This program computes the learning curve.', hyperparameters_set='single') parser.add_argument('--splits', type=int, dest='splits', default=defaults.n_rep, help='number of splits') parser.add_argument('--name', type=str, dest='nameout', default=None, help='the name of the output file containting the LC data (.txt)') parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='enable debug') args = parser.parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) y = np.loadtxt(args.prop) diff --git a/qstack/reorder.py b/qstack/reorder.py new file mode 100644 index 00000000..5c50ba75 --- /dev/null +++ b/qstack/reorder.py @@ -0,0 +1,133 @@ +"""Functions for reordering atomic orbitals between different conventions. + +Provides: + pyscf2gpr_l1_order: indices to reorder l=1 orbitals from PySCF to GPR. +""" + +import numpy as np +from .tools import slice_generator + + +pyscf2gpr_l1_order = [1,2,0] + + +def get_mrange(l): + """Get the m quantum number range for a given angular momentum l. + + For l=1, returns pyscf order: x,y,z which is (1,-1,0). + For other l, returns the standard range from -l to +l. + + Args: + l (int): Angular momentum quantum number. + + Returns: + tuple or range: Magnetic quantum numbers for the given l. + """ + if l==1: + return (1,-1,0) + else: + return range(-l,l+1) + + +def _orca2gpr_idx(l_slices, m): + """Given a molecule returns a list of reordered indices to tranform Orca AO ordering into SA-GPR. + + In Orca, orbital ordering corresponds to: + m=0, +1, +2, ..., l, -1, -2, ..., -l + while in SA-GPR it is: + m=-l, -l+1, ..., -1, 0, +1, ..., l-1, l + Additionally, Orca uses a different sign convention for |m|>=3. + + Args: + l_slices (iterator): Iterator that yeilds (l: int, s: slice) per shell, where + l is angular momentum quantum number and s is the corresponding slice of size 2*l+1. + m (np.ndarray): Array of magnetic quantum numbers per AO. + + Returns: + tuple: Re-arranged indices array and sign array. + """ + idx = np.arange(len(m)) + for _l, s in l_slices: + idx[s] = np.concatenate((idx[s][::-2], idx[s][1::2])) + signs = np.ones_like(idx) + signs[np.where(np.abs(m)>=3)] = -1 # in pyscf order + signs[idx] = signs # in orca order + return idx, signs + + +def _pyscf2gpr_idx(l_slices, m): + """Given a molecule returns a list of reordered indices to tranform pyscf AO ordering into SA-GPR. + + In SA-GPR, orbital ordering corresponds to: + m=-l, -l+1, ..., -1, 0, +1, ..., l-1, l + In PySCF, it is the same except for p-orbitals which are ordered as: + m=+1, -1, 0 (i.e., x,y,z). + Signs are the same in both conventions, so they are returned for compatibility. + + Args: + l_slices (iterator): Iterator that yeilds (l: int, s: slice) per shell, where + l is angular momentum quantum number and s is the corresponding slice of size 2*l+1. + m (np.ndarray): Array of magnetic quantum numbers per AO. + + Returns: + tuple: Re-arranged indices array and sign array. + """ + idx = np.arange(len(m)) + for l, s in l_slices: + if l==1: + idx[s] = idx[s][pyscf2gpr_l1_order] + return idx, np.ones_like(idx) + + +def reorder_ao(mol, vector, src='pyscf', dest='gpr'): + """Reorder the atomic orbitals from one convention to another. + + For example, src=pyscf dest=gpr reorders p-orbitals from +1,-1,0 (pyscf convention) + to -1,0,+1 (SA-GPR convention). + + Args: + mol (pyscf.gto.Mole): pyscf Mole object. + vector (numpy.ndarray): Vector (nao,) or matrix (mol.nao,mol.nao) to reorder. + src (str): Current convention. Defaults to 'pyscf'. + dest (str): Convention to convert to (available: 'pyscf', 'gpr', 'orca'). Defaults to 'gpr'. + + Returns: + numpy.ndarray: Reordered vector or matrix. + + Raises: + NotImplementedError: If the specified convention is not implemented. + ValueError: If vector dimension is not 1 or 2. + """ + def get_idx(L, m, convention): + convention = convention.lower() + l_slices = slice_generator(L, inc=lambda l: 2*l+1) + if convention == 'gpr': + return np.arange(len(m)), np.ones_like(m) + elif convention == 'pyscf': + return _pyscf2gpr_idx(l_slices, m) + elif convention == 'orca': + return _orca2gpr_idx(l_slices, m) + else: + errstr = f'Conversion to/from the {convention} convention is not implemented' + raise NotImplementedError(errstr) + + from .compound import basis_flatten + + (_, _, m), L = basis_flatten(mol, return_both=False, return_shells=True) + idx_src, sign_src = get_idx(L, m, src) + idx_dest, sign_dest = get_idx(L, m, dest) + + if vector.ndim == 2: + sign_src = np.einsum('i,j->ij', sign_src, sign_src) + sign_dest = np.einsum('i,j->ij', sign_dest, sign_dest) + idx_dest = np.ix_(idx_dest,idx_dest) + idx_src = np.ix_(idx_src,idx_src) + elif vector.ndim!=1: + errstr = f'Dim = {vector.ndim} (should be 1 or 2)' + raise ValueError(errstr) + + newvector = np.zeros_like(vector) + newvector[idx_dest] = (sign_src*vector)[idx_src] + newvector *= sign_dest + + return newvector diff --git a/qstack/spahm/LB2020guess.py b/qstack/spahm/LB2020guess.py index cf50f083..3227d562 100644 --- a/qstack/spahm/LB2020guess.py +++ b/qstack/spahm/LB2020guess.py @@ -1,390 +1,557 @@ +"""Laikov-Briling 2020 guess Hamiltonian implementation.""" + import copy import numpy as np from pyscf import data, df, scf -""" Taken from https://github.com/briling/aepm and modified """ class LB2020guess: + """Laikov-Briling 2020 guess Hamiltonian implementation. + + Reference: + D. N. Laikov, K. R. Briling, + "Atomic effective potentials for starting molecular electronic structure calculations", + Theor. Chem. Acc. 139, 17 (2020), doi:10.1007/s00214-019-2521-3. + + Implements the atomic effective potential method for initial guess generation. + See https://github.com/briling/aepm for a C implementation. + """ + def __init__(self, fname=None, parameters='HF'): + self.acfile_default = './parameters_HF.dat' + self.Qmax = 102 + self.init_data() + self.get_basis(fname, parameters) + + def renormalize(self, a): + r"""Compute renormalization factor for Gaussian basis functions. + + The auxiliary basis functions are given in charge normalization, thus + we need to renormalize them to square-integral normalization for use in integrals. + + 1/norm1 = \int \exp(-a*r^2) d^3 r => norm1 = (a/pi)^(3/2) + 1/norm2^2 = \int (\exp(-a*r^2))^2 d^3 r => norm2 = (2.0*a/pi)^(3/4) + coefficient = norm1 / norm2 = (0.5*a/pi)^(3/4) + + Args: + a (float): Gaussian exponent. + + Returns: + float: Renormalization factor (0.5*a/pi)^(3/4). + """ + x = np.sqrt(np.sqrt(0.5*a/np.pi)) + return x*x*x + + def read_ac(self, fname): + """Read auxiliary basis parameters from file. + + Args: + fname (str, optional): Path to parameter file. If None, uses default. + + Returns: + dict: Dictionary mapping element symbols to lists of basis function + parameters [[l, [exponent, coefficient]], ...]. + """ + if fname is None: + fname = self.acfile_default + with open(fname) as f: + lines = f.readlines() + basis = {'H': []} + il=0 + while il 0: + zrest = zcore + bad_idx = [] + for iprim in range(len(acbasis[q])): + if np.isclose(zrest, 0): + break + a, c = acbasis[q][iprim][1] + renorm = self.renormalize(a) + c /= renorm # convert back to charge units: sum {c} == charge(q) + dc = min(c, zrest) + if np.isclose(c, dc): + bad_idx.append(iprim) + else: + acbasis[q][iprim][1][1] = (c-dc)*renorm + zrest -= dc + for i in bad_idx[::-1]: + acbasis[q].pop(i) + return acbasis + + def get_auxweights(self, auxmol): + """Extract auxiliary basis weights from the basis. + + Collects the coefficients from each auxiliary basis primitive + into a single array aligned with auxiliary orbital indices. + + Args: + auxmol (pyscf.gto.Mole): Molecule object with auxiliary basis. + + Returns: + numpy.ndarray: Array of auxiliary basis function weights (length nao). + """ + w = np.zeros(auxmol.nao) + iao = 0 + for iat in range(auxmol.natm): + q = auxmol._atom[iat][0] + for prim in auxmol._basis[q]: + w[iao] = prim[1][1] + iao+=1 + return w + + def merge_caps(self, w, eri3c): + """Contracts 3-center integrals with auxiliary basis weights. + + PySCF internally renormalizes basis functions, thus ignores the charge normalization + of the auxiliary basis, and the weights must be used directly later. + + Args: + w (numpy.ndarray): Auxiliary basis weights. + eri3c (numpy.ndarray): 3-center electron repulsion integrals (ij|P). + + Returns: + numpy.ndarray: Contracted integrals (ij) = sum_P w_P * (ij|P). + """ + return np.einsum('...i,i->...', eri3c, w) + + def get_eri3c(self, mol, auxmol): + """Compute 3-center electron repulsion integrals. + + Args: + mol (pyscf.gto.Mole): Main molecule object. + auxmol (pyscf.gto.Mole): Auxiliary molecule object. + + Returns: + numpy.ndarray: 3-center ERIs (ij|P) where i,j are primary AO indices + and P is auxiliary basis index. + """ + pmol = mol + auxmol + shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) + eri3c = pmol.intor('int3c2e_sph', shls_slice=shls_slice) + return eri3c + + def check_coefficients(self, mol, acbasis): + """Validate that auxiliary basis coefficients sum to correct total charge. + + Ensures basis set modifications (charge adjustment, ECP) maintain + consistency with molecular electronic structure. + + Args: + mol (pyscf.gto.Mole): Molecule object. + acbasis (dict): Auxiliary basis set dictionary. + + Raises: + RuntimeError: If coefficient sum doesn't match expected charge. + """ + ch1 = sum(sum(c/self.renormalize(a) for _, (a, c) in acbasis[mol.atom_pure_symbol(iat)]) for iat in range(mol.natm)) + ch2 = sum(mol.atom_charges()) - (mol.charge if self.parameters == 'HF' else 0) + if not np.isclose(ch1, ch2): + raise RuntimeError("Coefficients corrupted after adding ECP") + + def HLB20(self, mol): + """Compute the LB2020 effective potential matrix. + + Args: + mol (pyscf.gto.Mole): Molecule object. + + Returns: + numpy.ndarray: LB2020 potential matrix in AO basis (nao x nao). + """ + acbasis = self.use_charge(mol) + acbasis = self.use_ecp(mol, acbasis) + self.check_coefficients(mol, acbasis) + auxmol = df.make_auxmol(mol, acbasis) + eri3c = self.get_eri3c(mol, auxmol) + auxw = self.get_auxweights(auxmol) + return self.merge_caps(auxw, eri3c) + + def Heff(self, mol): + """Construct one-electron Hamiltonian for initial guess. + + Combines standard core Hamiltonian with LB2020 effective potential. + + Args: + mol (pyscf.gto.Mole): Molecule object. + + Returns: + numpy.ndarray: Effective Hamiltonian matrix in AO basis (nao x nao). + """ + self.mol = mol + self.Hcore = scf.hf.get_hcore(mol) + self.H = self.Hcore + self.HLB20(mol) + return self.H + + def HLB20_ints_generator(self, mol, auxmol): + """Create generator for LB2020 potential gradients. + + Computes derivative integrals and returns a function that evaluates + the gradient of LB2020 potential with respect to atomic positions. + + Args: + mol (pyscf.gto.Mole): Molecule object. + auxmol (pyscf.gto.Mole): Auxiliary molecule object. + + Returns: + callable: Function that takes atom index and returns gradient integrals + as numpy.ndarray of shape (3, nao, nao, naux). + """ + pmol = mol + auxmol + shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) + eri3c2e_ip1 = pmol.intor('int3c2e_ip1', shls_slice=shls_slice) # (nabla \, \| \) + eri3c2e_ip2 = pmol.intor('int3c2e_ip2', shls_slice=shls_slice) # ( \, \| nabla\) + aoslices = mol.aoslice_by_atom()[:,2:] + auxaoslices = auxmol.aoslice_by_atom()[:,2:] + + def HLB20_ints_deriv(iat): + p0, p1 = aoslices[iat] + P0, P1 = auxaoslices[iat] + eri3c2e_ip = np.zeros_like(eri3c2e_ip1) + eri3c2e_ip[:,p0:p1,:,:] += eri3c2e_ip1[:,p0:p1,:,:] + eri3c2e_ip[:,:,p0:p1,:] += eri3c2e_ip1[:,p0:p1,:,:].transpose((0,2,1,3)) + eri3c2e_ip[:,:,:,P0:P1] += eri3c2e_ip2[:,:,:,P0:P1] + return -eri3c2e_ip + return HLB20_ints_deriv + + def HLB20_generator(self, mol): + """Create generator for LB2020 potential gradient contributions. + + Args: + mol (pyscf.gto.Mole): Molecule object. + + Returns: + callable: Function that takes atom index and returns LB2020 potential + gradient as numpy.ndarray of shape (3, nao, nao). + """ + acbasis = self.use_charge(mol) + acbasis = self.use_ecp(mol, acbasis) + self.check_coefficients(mol, acbasis) + auxmol = df.make_auxmol(mol, acbasis) + eri3c = self.HLB20_ints_generator(mol, auxmol) + auxw = self.get_auxweights(auxmol) + + def HLB20_deriv(iat): + return self.merge_caps(auxw, eri3c(iat)) + return HLB20_deriv + + def init_data(self): + """Set parameters. + + - self._caps_array: Diffuse function exponents for each element. + - self._hf_basis: Predefined HF parameter set for all elements. + - self._hfs_basis: Predefined HFS parameter set for all elements. + """ + self._caps_array = np.zeros(self.Qmax+1) + self._caps_array [ 1 : 2 +1] = 1.0 / 3.0 + self._caps_array [ 3 : 4 +1] = 1.0 / 16.0 + self._caps_array [ 5 : 10 +1] = 1.0 / 3.0 + self._caps_array [ 11 : 12 +1] = 1.0 / 32.0 + self._caps_array [ 13 : 18 +1] = 1.0 / 8.0 + self._caps_array [ 19 : 20 +1] = 1.0 / 32.0 + self._caps_array [ 21 : 30 +1] = 1.0 / 6.0 + self._caps_array [ 31 : 36 +1] = 1.0 / 12.0 + self._caps_array [ 37 : 38 +1] = 1.0 / 32.0 + self._caps_array [ 39 : 48 +1] = 1.0 / 8.0 + self._caps_array [ 49 : 54 +1] = 1.0 / 12.0 + self._caps_array [ 55 : 70 +1] = 1.0 / 32.0 + self._caps_array [ 71 : 86 +1] = 1.0 / 12.0 + self._caps_array [ 87 : 102 +1] = 1.0 / 32.0 + + self._hfs_basis = {'H': [[0, [0.0815877135278, 0.03846658840144482]]], + 'He': [[0, [0.808048051263, 0.42950970838920094]]], + 'Li': [[0, [2.60255347642, 0.9236581585938292]], [0, [0.0280604557276, 0.02092188631196157]]], + 'Be': [[0, [4.59692793038, 1.5671644720955082]], [0, [0.0804833286681, 0.07687177344753668]]], + 'B': [[0, [6.83323506001, 2.343454972959998]], [0, [0.128413097632, 0.15132206888434654]]], + 'C': [[0, [9.78271998209, 3.2338673789342076]], [0, [0.220436310973, 0.29830455285829904]]], + 'N': [[0, [13.0102305297, 4.234735126785875]], [0, [0.338162891505, 0.5080513541327736]]], + 'O': [[0, [16.0693906158, 5.282243530372744]], [0, [0.486361793604, 0.7907181038567846]]], + 'F': [[0, [19.101114431, 6.414979114451199]], [0, [0.654163546258, 1.1388759924473113]]], + 'Ne': [[0, [21.8775289055, 7.6030507281160205]], [0, [0.840940013903, 1.552754282665946]]], + 'Na': [[0, [39.3300572224, 8.771875563897146]], [0, [1.92102415925, 2.714794977091659]], [0, [0.07805961683, 0.0811487825176091]]], + 'Mg': [[0, [44.5119316877, 10.28939716988604]], [0, [2.2032684956, 3.187374601857695]], [0, [0.0887611981764, 0.1080039414860704]]], + 'Al': [[0, [48.8730920117, 11.90986953428574]], [0, [2.40497322587, 3.637221913420624]], [0, [0.0734291195179, 0.10551672569775308]]], + 'Si': [[0, [57.6233652793, 13.49494028809413]], [0, [2.94976481323, 4.320831140260877]], [0, [0.105177215317, 0.17781870789022175]]], + 'P': [[0, [66.8662881023, 15.16878399053013]], [0, [3.53343161485, 5.031541131577296]], [0, [0.143855885176, 0.2753173411346297]]], + 'S': [[0, [77.3837213998, 16.887417337354126]], [0, [4.24286242552, 5.806114862351204]], [0, [0.19956804901, 0.42411499313349615]]], + 'Cl': [[0, [87.9791594478, 18.69938754757193]], [0, [4.96724533871, 6.585294351730177]], [0, [0.25861139087, 0.5996439927014025]]], + 'Ar': [[0, [98.6384890866, 20.607909307338506]], [0, [5.70477691943, 7.365867515604668]], [0, [0.322389303278, 0.8050942514147619]]], + 'K': [[0, [127.304718328, 21.99381564116015]], [0, [8.14542318935, 8.914152537873168]], [0, [0.599909721308, 1.281113936405088]], [0, [0.035109044485, 0.03885492933110718]]], + 'Ca': [[0, [143.410292528, 23.911082457093176]], [0, [9.37643144182, 9.941770494312644]], [0, [0.707265349098, 1.5157353597122245]], [0, [0.0458961284132, 0.06364796065031839]]], + 'Sc': [[0, [158.994572671, 25.904442179513133]], [0, [10.6669154617, 10.883238053673036]], [0, [0.855948344081, 1.8492123402861176]], [0, [0.0663836669817, 0.10346527800168384]]], + 'Ti': [[0, [174.894134206, 27.966990953941423]], [0, [11.9900959183, 11.826374216637854]], [0, [0.99956141908, 2.2420793288691643]], [0, [0.0831227117008, 0.13681731240170136]]], + 'V': [[0, [190.604934656, 30.120471923041325]], [0, [13.3014644743, 12.749780443487515]], [0, [1.14767573276, 2.6800134943163965]], [0, [0.0996286409699, 0.17038950363938712]]], + 'Cr': [[0, [206.066074361, 32.365170842752164]], [0, [14.5931480682, 13.65262763441227]], [0, [1.30088471274, 3.1620979075567597]], [0, [0.116182713597, 0.2045615376503895]]], + 'Mn': [[0, [221.16302319, 34.70340183637225]], [0, [15.8499577468, 14.53121460187906]], [0, [1.45886982985, 3.6870872736982983]], [0, [0.132849408341, 0.23929909691538395]]], + 'Fe': [[0, [236.275958236, 37.13636549602007]], [0, [17.0021924689, 15.423416039160818]], [0, [1.58039830539, 4.245713892974994]], [0, [0.138154959446, 0.24497443207694142]]], + 'Co': [[0, [251.266826314, 39.654913061172365]], [0, [18.1259810867, 16.297312916228524]], [0, [1.72000963703, 4.847287712375843]], [0, [0.146811845647, 0.2591203258656231]]], + 'Ni': [[0, [265.85589001, 42.264044242431645]], [0, [19.1913776924, 17.14653742887106]], [0, [1.87127991906, 5.488214209582658]], [0, [0.15768951119, 0.278567741559687]]], + 'Cu': [[0, [279.89558372, 44.96377471419826]], [0, [20.1795701285, 17.969392784841585]], [0, [2.0307546413, 6.165773142025554]], [0, [0.170101920073, 0.30155942534387087]]], + 'Zn': [[0, [293.316698633, 47.75086136824285]], [0, [21.0782849144, 18.76770516482975]], [0, [2.19628414783, 6.877297576296975]], [0, [0.183611811011, 0.32702068243576027]]], + 'Ga': [[0, [223.351407737, 51.85980692866955]], [0, [13.8991890472, 16.536719699834574]], [0, [1.79766451577, 5.726042077556815]], [0, [0.101436081792, 0.1668401485879247]]], + 'Ge': [[0, [246.769907155, 54.68127699607652]], [0, [15.4889775395, 17.96144762660623]], [0, [2.02770427595, 6.416697553920722]], [0, [0.121093071072, 0.22751860302804602]]], + 'As': [[0, [270.61373978, 57.564175496815935]], [0, [17.0703038187, 19.413617521179415]], [0, [2.25845376191, 7.089265265044681]], [0, [0.143577215473, 0.3015460843175103]]], + 'Se': [[0, [300.848605175, 60.466799135862026]], [0, [19.2537085333, 21.03082294851702]], [0, [2.56276392363, 7.936692573640534]], [0, [0.179389676325, 0.42001811250289295]]], + 'Br': [[0, [328.797262949, 63.43810791545978]], [0, [21.1924514002, 22.57846913638884]], [0, [2.85204353045, 8.701657933885878]], [0, [0.215086275684, 0.5517188564364737]]], + 'Kr': [[0, [355.214174754, 66.5014860746436]], [0, [22.9403228854, 24.090357342766612]], [0, [3.13032037939, 9.401485056551708]], [0, [0.251740278777, 0.6984900695917353]]], + 'Rb': [[0, [514.90443499, 66.45387731817529]], [0, [39.0691107041, 29.02209698864997]], [0, [4.80426316714, 13.141490959616895]], [0, [0.563096338616, 1.4122294089225085]], [0, [0.0437868695146, 0.06021000634919698]]], + 'Sr': [[0, [543.738995237, 69.61302407200955]], [0, [41.128661972, 30.45525713468061]], [0, [5.13125190812, 14.000583603948632]], [0, [0.584163639475, 1.502072088846565]], [0, [0.0457473904015, 0.07193642805575438]]], + 'Y': [[0, [573.865438818, 72.8256986346715]], [0, [43.3418373882, 31.888048066550517]], [0, [5.52448276023, 14.85533123439925]], [0, [0.639200518782, 1.6690156488501313]], [0, [0.0565895915557, 0.0999066215124533]]], + 'Zr': [[0, [618.872151787, 75.68769297760856]], [0, [47.1806876039, 33.6719208912133]], [0, [6.08833013219, 15.986254333511788]], [0, [0.716501829036, 1.9375575902411635]], [0, [0.0713273733558, 0.13612560832942924]]], + 'Nb': [[0, [659.649771305, 78.78777525481571]], [0, [50.4574540637, 35.35272825043895]], [0, [6.61550206936, 17.019827159386775]], [0, [0.782993768027, 2.209716527541788]], [0, [0.0844791372764, 0.16887885100075942]]], + 'Mo': [[0, [699.220087037, 82.03480488866288]], [0, [53.5109266255, 37.0105613980275]], [0, [7.13722099954, 18.0118404058805]], [0, [0.84688911897, 2.5015703606076256]], [0, [0.0970485492451, 0.20007447901252992]]], + 'Tc': [[0, [738.224099414, 85.40820605841263]], [0, [56.4145863596, 38.665323565768624]], [0, [7.65973260957, 18.97333333242976]], [0, [0.910214479934, 2.8171999934246834]], [0, [0.109178173374, 0.22976890861393806]]], + 'Ru': [[0, [778.546434184, 88.81655493493976]], [0, [59.5272531305, 40.349976300677305]], [0, [8.19597691306, 20.019036121376814]], [0, [0.94324592826, 3.1566525285906963]], [0, [0.110016658835, 0.22366236275456197]]], + 'Rh': [[0, [823.569257154, 92.20831582218796]], [0, [63.1039857198, 42.120036402905754]], [0, [8.80656745978, 21.13062914191657]], [0, [0.997443318264, 3.555621798417296]], [0, [0.114146789352, 0.22823056867238134]]], + 'Pd': [[0, [870.785579213, 95.65527378041557]], [0, [66.8564686032, 43.92776585899257]], [0, [9.45937938438, 22.26339050610234]], [0, [1.06293161212, 3.9977377213753678]], [0, [0.120329936612, 0.2388496753106151]]], + 'Ag': [[0, [919.269642253, 99.18450014865658]], [0, [70.6762222363, 45.7571802751057]], [0, [10.1425268095, 23.40046850321277]], [0, [1.13573760258, 4.47761428381862]], [0, [0.12791270815, 0.25334823717250327]]], + 'Cd': [[0, [968.604184045, 102.80825873069548]], [0, [74.5132242078, 47.60242594900452]], [0, [10.8507700928, 24.534003350659496]], [0, [1.21398140348, 4.993379192403898]], [0, [0.136511946916, 0.27055997696052425]]], + 'In': [[0, [843.405124708, 110.96520306427927]], [0, [57.7349383985, 47.01539915999735]], [0, [9.2861062863, 21.19104807232844]], [0, [1.10262305907, 4.734376047579965]], [0, [0.0871639505153, 0.16198129973968295]]], + 'Sn': [[0, [884.286663849, 114.96679724832346]], [0, [60.3792607664, 48.9682687896402]], [0, [9.80580254299, 22.0195379814388]], [0, [1.1950856195, 5.126927789028787]], [0, [0.0980413909659, 0.2050113690611363]]], + 'Sb': [[0, [928.446756666, 119.0462561998037]], [0, [63.2301122301, 51.01110432146532]], [0, [10.334859026, 22.896873446536297]], [0, [1.2864338161, 5.510960266352291]], [0, [0.110597164197, 0.2557490251201422]]], + 'Te': [[0, [1006.91297059, 122.5086188723354]], [0, [69.9563984769, 53.37878173194244]], [0, [11.3710909629, 24.795357618473698]], [0, [1.42210784675, 6.046549960417364]], [0, [0.134157895373, 0.3465874814433826]]], + 'I': [[0, [1071.96922308, 126.39246353979226]], [0, [75.0210569929, 55.63865377654407]], [0, [12.2012216823, 26.216841337450926]], [0, [1.5460507813, 6.517948997414104]], [0, [0.156539597363, 0.44315588522145566]]], + 'Xe': [[0, [1130.33961713, 130.57341813447323]], [0, [79.1625523503, 57.86111406775242]], [0, [12.9165161164, 27.39407258678573]], [0, [1.66262421031, 6.948688659111207]], [0, [0.178743239447, 0.5471697476001655]]], + 'Cs': [[0, [1428.6078292, 128.82899897630176]], [0, [112.812659333, 62.53067152407603]], [0, [17.1798768915, 34.227961935433065]], [0, [2.21568367858, 8.479278930585247]], [0, [0.337292691131, 1.0003736377349504]], [0, [0.0255963992844, 0.03279972807297557]]], + 'Ba': [[0, [1523.81269929, 132.33374253222377]], [0, [121.683219116, 64.98719011675838]], [0, [18.5377225373, 36.123991347012996]], [0, [2.42239198451, 9.151849111983358]], [0, [0.37464487939, 1.1313217747067679]], [0, [0.0318322692155, 0.05088369770736389]]], + 'La': [[0, [1601.41213596, 136.45908437706606]], [0, [128.13744821, 67.27340937055364]], [0, [19.6176760354, 37.63312362169784]], [0, [2.60445015787, 9.70137042264615]], [0, [0.410586721293, 1.2723058645673604]], [0, [0.0407833256045, 0.07329654046194856]]], + 'Ce': [[0, [1658.47048562, 141.23225188668434]], [0, [132.175071965, 69.19731594670756]], [0, [20.5059445332, 38.7164904908682]], [0, [2.74661595209, 10.37163705261105]], [0, [0.44440565431, 1.4015607510299235]], [0, [0.0447234791579, 0.08190699460978135]]], + 'Pr': [[0, [1715.26288637, 146.15656574406643]], [0, [136.051409723, 71.1296245077095]], [0, [21.3888165255, 39.75113375394901]], [0, [2.89289683236, 11.083749067340108]], [0, [0.479036909221, 1.535278321067373]], [0, [0.0484113375519, 0.09010431008530405]]], + 'Nd': [[0, [1771.7679871, 151.23596000355406]], [0, [139.763308576, 73.0683098482763]], [0, [22.2678147838, 40.737595040425745]], [0, [3.04417565147, 11.835768086134143]], [0, [0.514725872587, 1.674859379548526]], [0, [0.051944834864, 0.09809792639764876]]], + 'Pm': [[0, [1827.66270931, 156.48142428705057]], [0, [143.259770003, 75.01290712875812]], [0, [23.1358417402, 41.66771534375595]], [0, [3.199640873, 12.62445386651362]], [0, [0.55121096113, 1.8197816596901466]], [0, [0.0553253104444, 0.10584644293996817]]], + 'Sm': [[0, [1882.00387559, 161.91851435984526]], [0, [146.409075469, 76.95898454189155]], [0, [23.9752478278, 42.51900002843879]], [0, [3.35734325422, 13.444573455091518]], [0, [0.58792965549, 1.9681143031964388]], [0, [0.0585081846119, 0.1131824248041495]]], + 'Eu': [[0, [1932.75854719, 167.6002366798858]], [0, [148.934876865, 78.89578945074894]], [0, [24.7493435866, 43.242193431261235]], [0, [3.51366553918, 14.286616540326525]], [0, [0.623904660384, 2.115867151197456]], [0, [0.0614016764838, 0.1198007548367038]]], + 'Gd': [[0, [1975.59820781, 173.62835263535527]], [0, [150.267478001, 80.80228887933148]], [0, [25.3812097224, 43.730413811304885]], [0, [3.66178046197, 15.131436422018986]], [0, [0.657389398602, 2.2552290399910286]], [0, [0.063837005797, 0.1251673713840444]]], + 'Tb': [[0, [2043.29310958, 179.2037445859498]], [0, [154.366907789, 83.0695482866181]], [0, [26.2243318766, 44.71490580298028]], [0, [3.7689949755, 16.120004675463715]], [0, [0.663323872601, 2.2895066251146168]], [0, [0.0627463378236, 0.12027956081283321]]], + 'Dy': [[0, [2104.40136331, 185.1021613728763]], [0, [157.480965804, 85.29961820416767]], [0, [26.9609319436, 45.495087853775985]], [0, [3.88372694635, 17.1127505005226]], [0, [0.672078437777, 2.338126517300418]], [0, [0.0616039775116, 0.1156643841617586]]], + 'Ho': [[0, [2154.45469909, 191.42238595600696]], [0, [159.026925085, 87.48357952837262]], [0, [27.5027486946, 45.95633835932799]], [0, [3.99631996625, 18.08974262324885]], [0, [0.680954465184, 2.3885052181436106]], [0, [0.060173741041, 0.1106488289229826]]], + 'Er': [[0, [2190.94335259, 198.20792842426218]], [0, [158.679181523, 89.64609353716017]], [0, [27.7793550439, 46.00785816969023]], [0, [4.10121508569, 19.03368366564328]], [0, [0.688486025083, 2.4335494796515578]], [0, [0.0583244945524, 0.10492614812539582]]], + 'Tm': [[0, [2214.48978734, 205.42335133632005]], [0, [156.520247439, 91.85354642909319]], [0, [27.7527332692, 45.61394284204278]], [0, [4.19629401273, 19.931108844198004]], [0, [0.69439592841, 2.4709679915183504]], [0, [0.0560809223751, 0.09861920356341443]]], + 'Yb': [[0, [2229.17694813, 212.95081901943223]], [0, [153.095425598, 94.19509432925543]], [0, [27.4346946752, 44.83036127743994]], [0, [4.28220834088, 20.774850510083816]], [0, [0.699283130104, 2.502096031620723]], [0, [0.0536017037618, 0.09215131893680198]]], + 'Lu': [[0, [2240.76323234, 220.64120470655195]], [0, [149.186347076, 96.73527206213477]], [0, [26.882917845, 43.799061576056026]], [0, [4.36078494051, 21.56356820520379]], [0, [0.703938851713, 2.529434335353653]], [0, [0.0510727969004, 0.08594615780727508]]], + 'Hf': [[0, [2597.75786792, 221.4984988672269]], [0, [188.512068741, 99.90153962759624]], [0, [33.1238337682, 51.910671812884125]], [0, [4.95610698524, 24.075881834623832]], [0, [0.840126255104, 3.0993574039786624]], [0, [0.0754290091254, 0.14830089683149864]]], + 'Ta': [[0, [2835.12187643, 224.97090244390182]], [0, [212.567396461, 103.35145847528267]], [0, [36.6312660085, 56.10051046630536]], [0, [5.37947393132, 25.81036889369674]], [0, [0.929151975174, 3.4952140173242676]], [0, [0.0934277241824, 0.20126674008286047]]], + 'W': [[0, [3025.30468794, 229.72846917871507]], [0, [230.276064677, 106.62250063664358]], [0, [39.2800873493, 59.00619946134962]], [0, [5.75689567535, 27.33479438233774]], [0, [1.00224682037, 3.845156936516533]], [0, [0.109770850673, 0.2527564765722409]]], + 'Re': [[0, [3195.61889568, 235.18655723502508]], [0, [245.095364506, 109.79052780485154]], [0, [41.5829628586, 61.378152855515665]], [0, [6.12180336884, 28.778519952039215]], [0, [1.06873134848, 4.1880086644629335]], [0, [0.12566089767, 0.3051098407321521]]], + 'Os': [[0, [3269.8476374, 242.79909040960803]], [0, [247.725409062, 112.31392244158873]], [0, [42.3817611068, 62.06265168735337]], [0, [6.3443841081, 29.935902842018656]], [0, [1.06629631325, 4.327346089905066]], [0, [0.129797938525, 0.311692170282036]]], + 'Ir': [[0, [3376.87667402, 250.01523976524726]], [0, [254.046404976, 115.1096116274901]], [0, [43.6319269616, 63.23965630985516]], [0, [6.61971894051, 31.182376053342995]], [0, [1.08166412391, 4.55923508274026]], [0, [0.135494138077, 0.32374501274030015]]], + 'Pt': [[0, [3503.10050982, 257.11908266124726]], [0, [262.371870877, 118.06427561966308]], [0, [45.1400059471, 64.68067709627911]], [0, [6.9289515116, 32.48110151528869]], [0, [1.1081209327, 4.853290082621408]], [0, [0.142293514583, 0.3395090236627465]]], + 'Au': [[0, [3642.37385874, 264.24694689878606]], [0, [271.922734082, 121.13071183685847]], [0, [46.8125693251, 66.2730681995686]], [0, [7.26324418491, 33.8141784669834]], [0, [1.14228427509, 5.19507983319498]], [0, [0.149995980588, 0.358211807740868]]], + 'Hg': [[0, [3791.49387942, 271.4719140255812]], [0, [282.287772429, 124.2856953523547]], [0, [48.5988727445, 67.95563981054985]], [0, [7.61787984362, 35.17197803197935]], [0, [1.18217257465, 5.576807483854668]], [0, [0.158470192072, 0.37936980854301705]]], + 'Tl': [[0, [2936.57544261, 296.6310154714052]], [0, [169.521034982, 128.06139769860988]], [0, [27.7278154244, 47.591482085503884]], [0, [6.18159381819, 28.12073706100974]], [0, [0.937899869214, 4.530828450871838]], [0, [0.0878953169292, 0.17357741811050326]]], + 'Pb': [[0, [3009.77971351, 305.25255358884556]], [0, [171.15240957, 131.55698757943694]], [0, [27.3362555847, 48.331254479828324]], [0, [6.28851042826, 28.17024630706816]], [0, [0.974528662433, 4.702910510256614]], [0, [0.0938717769309, 0.2034350353550231]]], + 'Bi': [[0, [3115.8496148, 313.8093294029781]], [0, [175.822879115, 134.97757970713243]], [0, [27.7224718639, 49.668864571379345]], [0, [6.46561267924, 28.69206651328442]], [0, [1.01344607802, 4.8981241805735936]], [0, [0.101357420858, 0.238854699432195]]], + 'Po': [[0, [3389.7826553, 320.66212678156774]], [0, [197.70471693, 137.6782930250117]], [0, [32.7684432058, 53.54264845480374]], [0, [7.20421362633, 32.282004781869944]], [0, [1.12137629162, 5.45713798816872]], [0, [0.122511534693, 0.3260853416866197]]], + 'At': [[0, [3615.64824975, 328.23201750107296]], [0, [214.627481704, 140.75596703512562]], [0, [36.212401456, 56.77561235809897]], [0, [7.72251960097, 34.59419860541525]], [0, [1.20498118619, 5.86665441892619]], [0, [0.140743435239, 0.4111765886798949]]], + 'Rn': [[0, [3822.43609476, 336.350129170213]], [0, [229.112932587, 144.07512932334393]], [0, [38.9069689318, 59.56038725013068]], [0, [8.15670864482, 36.44005731475775]], [0, [1.27699233159, 6.209274469973415]], [0, [0.157920555428, 0.49843705889064177]]], + 'Fr': [[0, [5482.49524373, 319.6645485275319]], [0, [423.079084959, 150.95938898566172]], [0, [69.2009027293, 87.27661177977454]], [0, [10.8608508924, 46.439279301313775]], [0, [1.90646535073, 8.470741952248504]], [0, [0.346917622534, 1.1656662095666177]], [0, [0.0307550493062, 0.04426509178665867]]], + 'Ra': [[0, [5824.22217495, 326.4339512907625]], [0, [453.52449169, 155.42009100679874]], [0, [73.3773009492, 90.8040545340973]], [0, [11.464490496, 48.34987083896135]], [0, [2.03967356151, 9.0403262759412]], [0, [0.371641031126, 1.2738484791994147]], [0, [0.0352213028729, 0.0608041346622409]]], + 'Ac': [[0, [6036.5236009, 335.7796360977328]], [0, [467.073213456, 159.12991928763344]], [0, [75.5296659631, 92.69417995252799]], [0, [11.8777377991, 49.82342721622108]], [0, [2.11757248944, 9.395644277773304]], [0, [0.381340313268, 1.3369398526252363]], [0, [0.0383652274726, 0.07276437615219129]]], + 'Th': [[0, [5781.02789712, 352.98106098074595]], [0, [422.978272928, 160.16991432703165]], [0, [70.6991828424, 88.76304121503837]], [0, [11.6767574085, 49.81457199002962]], [0, [1.97848007739, 9.160629598977392]], [0, [0.331860385876, 1.1973801004966318]], [0, [0.026439636441, 0.0440542374768854]]], + 'Pa': [[0, [6470.70044306, 355.6321452303036]], [0, [493.225990334, 166.6023122006732]], [0, [79.745338563, 96.26732853658515]], [0, [12.7459791331, 52.8675409376038]], [0, [2.24462222504, 10.084915235025559]], [0, [0.426863180027, 1.5811923530178016]], [0, [0.046081696294, 0.09023405892146073]]], + 'U': [[0, [6677.40964538, 366.3604410199135]], [0, [503.945262015, 170.3320215371999]], [0, [81.5371062053, 97.81121245233369]], [0, [13.1666520998, 54.33056511073182]], [0, [2.29389178189, 10.443579781479114]], [0, [0.446656456574, 1.692621958244384]], [0, [0.0486532127186, 0.09517496550595222]]], + 'Np': [[0, [6876.97988756, 377.6727808046505]], [0, [512.988616603, 174.062836818793]], [0, [83.094841748, 99.18659693335287]], [0, [13.577303204, 55.73888881006303]], [0, [2.33602765587, 10.819245703537456]], [0, [0.463908300107, 1.7929280737098292]], [0, [0.0503407579225, 0.09771182678295015]]], + 'Pu': [[0, [7073.46467092, 389.4767458720515]], [0, [520.913132774, 177.82649815105037]], [0, [84.4759601259, 100.44506864340687]], [0, [13.9835865192, 57.10032981294112]], [0, [2.37396977934, 11.219928664816145]], [0, [0.479267778382, 1.8834054685337047]], [0, [0.0513447915011, 0.09846227430507107]]], + 'Am': [[0, [7268.46090414, 401.81608908117465]], [0, [527.78044655, 181.63697263440923]], [0, [85.6760899888, 101.59181640122917]], [0, [14.3850956892, 58.408919501602334]], [0, [2.40863549995, 11.647846175973653]], [0, [0.492763833865, 1.9631974676672352]], [0, [0.0517261402691, 0.0976668611720969]]], + 'Cm': [[0, [7461.07270308, 414.6785096263954]], [0, [533.554758233, 185.49522592946025]], [0, [86.6825832446, 102.6241476347959]], [0, [14.7808918438, 59.65650561763819]], [0, [2.44083150917, 12.104436105449656]], [0, [0.504445261811, 2.0318083591637333]], [0, [0.05153336688, 0.09554389071702431]]], + 'Bk': [[0, [7834.61665484, 425.45484503199054]], [0, [559.731032607, 190.24065973922345]], [0, [90.3490076906, 105.5123797523743]], [0, [15.4132069342, 61.660011790470044]], [0, [2.50033510705, 12.766578648339491]], [0, [0.518065291853, 2.1191379396217878]], [0, [0.0502100687403, 0.09121645646373851]]], + 'Cf': [[0, [8201.72850818, 436.9331462249914]], [0, [584.091204457, 194.97482137107826]], [0, [93.761139844, 108.17900966131799]], [0, [16.0367739256, 63.59285048454238]], [0, [2.55798346418, 13.447343479938588]], [0, [0.530648155402, 2.197810376070656]], [0, [0.0488334639026, 0.08694654953904225]]], + 'Es': [[0, [8568.60789377, 449.0720813904205]], [0, [607.252250672, 199.7280083883994]], [0, [96.9993255946, 110.69452133675912]], [0, [16.6586969315, 65.47695911971626]], [0, [2.61542700505, 14.151859950374625]], [0, [0.542629123426, 2.2703523785804616]], [0, [0.0474279857112, 0.0828159494077535]]], + 'Fm': [[0, [8937.95057321, 461.81915524070774]], [0, [629.550428909, 204.51466732620824]], [0, [100.107987123, 113.0978434843216]], [0, [17.2830930058, 67.32473882802411]], [0, [2.67360927633, 14.882691359072656]], [0, [0.554298517748, 2.3385277257658816]], [0, [0.0460161264899, 0.07887980370375362]]], + 'Md': [[0, [9312.72294426, 475.2053390594495]], [0, [651.169079559, 209.34968317990007]], [0, [103.107616035, 115.4101662912288]], [0, [17.9118103054, 69.14191004315032]], [0, [2.73292190404, 15.640711247342152]], [0, [0.565798602387, 2.4032865063709417]], [0, [0.0446097028095, 0.07515373722237267]]], + 'No': [[0, [9692.47931286, 489.2442112346279]], [0, [672.034303671, 214.23197488829172]], [0, [105.988550516, 117.63038685999115]], [0, [18.5442146559, 70.92646079341314]], [0, [2.7932429022, 16.425417198806798]], [0, [0.577107028367, 2.4645969790388342]], [0, [0.0432042120982, 0.07161106318788321]]], + } - acfile_default = './parameters_HF.dat' - - def __init__(self, fname=None, parameters='HF'): - self.get_basis(fname, parameters) - - def renormalize(self, a): - # 1/norm1 = \int \exp(-a*r^2) d^3 r => norm1 = (a/pi)^(3/2) - # 1/norm2^2 = \int (\exp(-a*r^2))^2 d^3 r => norm2 = (2.0*a/pi)^(3/4) - # coefficient = norm1 / norm2 = (0.5*a/pi)^(3/4) - x = np.sqrt(np.sqrt(0.5*a/np.pi)) - return x*x*x - - def read_ac(self, fname): - if fname is None: - fname = self.acfile_default - with open(fname) as f: - lines = f.readlines() - basis = {'H': []} - il=0 - while il 0: - zrest = zcore - bad_idx = [] - for iprim in range(len(acbasis[q])): - if np.isclose(zrest, 0): - break - a, c = acbasis[q][iprim][1] - renorm = self.renormalize(a) - c /= renorm # convert back to charge units: sum {c} == charge(q) - dc = min(c, zrest) - if np.isclose(c, dc): - bad_idx.append(iprim) - else: - acbasis[q][iprim][1][1] = (c-dc)*renorm - zrest -= dc - for i in bad_idx[::-1]: - acbasis[q].pop(i) - return acbasis - - def get_auxweights(self, auxmol): - w = np.zeros(auxmol.nao) - iao = 0 - for iat in range(auxmol.natm): - q = auxmol._atom[iat][0] - for prim in auxmol._basis[q]: - w[iao] = prim[1][1] - iao+=1 - return w - - def merge_caps(self, w, eri3c): - return np.einsum('...i,i->...', eri3c, w) - - def get_eri3c(self, mol, auxmol): - pmol = mol + auxmol - shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) - eri3c = pmol.intor('int3c2e_sph', shls_slice=shls_slice) - return eri3c - - def check_coefficients(self, mol, acbasis): - ch1 = sum(sum(c/self.renormalize(a) for _, (a, c) in acbasis[mol.atom_pure_symbol(iat)]) for iat in range(mol.natm)) - ch2 = sum(mol.atom_charges()) - (mol.charge if self.parameters == 'HF' else 0) - if not np.isclose(ch1, ch2): - raise RuntimeError("Coefficients corrupted after adding ECP") - - def HLB20(self, mol): - acbasis = self.use_charge(mol) - if mol.has_ecp(): - acbasis = self.use_ecp(mol, acbasis) - self.check_coefficients(mol, acbasis) - - auxmol = df.make_auxmol(mol, acbasis) - eri3c = self.get_eri3c(mol, auxmol) - auxw = self.get_auxweights(auxmol) - return self.merge_caps(auxw, eri3c) - - def Heff(self, mol): - self.mol = mol - self.Hcore = scf.hf.get_hcore(mol) - self.H = self.Hcore + self.HLB20(mol) - return self.H - - - def HLB20_ints_generator(self, mol, auxmol): - pmol = mol + auxmol - shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) - eri3c2e_ip1 = pmol.intor('int3c2e_ip1', shls_slice=shls_slice) # (nabla \, \| \) - eri3c2e_ip2 = pmol.intor('int3c2e_ip2', shls_slice=shls_slice) # ( \, \| nabla\) - aoslices = mol.aoslice_by_atom()[:,2:] - auxaoslices = auxmol.aoslice_by_atom()[:,2:] - def HLB20_ints_deriv(iat): - p0, p1 = aoslices[iat] - P0, P1 = auxaoslices[iat] - eri3c2e_ip = np.zeros_like(eri3c2e_ip1) - eri3c2e_ip[:,p0:p1,:,:] += eri3c2e_ip1[:,p0:p1,:,:] - eri3c2e_ip[:,:,p0:p1,:] += eri3c2e_ip1[:,p0:p1,:,:].transpose((0,2,1,3)) - eri3c2e_ip[:,:,:,P0:P1] += eri3c2e_ip2[:,:,:,P0:P1] - return -eri3c2e_ip - return HLB20_ints_deriv - - def HLB20_generator(self, mol): - acbasis = self.use_charge(mol) - if mol.has_ecp(): - acbasis = self.use_ecp(mol, acbasis) - self.check_coefficients(mol, acbasis) - auxmol = df.make_auxmol(mol, acbasis) - eri3c = self.HLB20_ints_generator(mol, auxmol) - auxw = self.get_auxweights(auxmol) - def HLB20_deriv(iat): - return self.merge_caps(auxw, eri3c(iat)) - return HLB20_deriv + self._hf_basis = {'H': [], + 'He': [[0, [1.8865345899608519, 0.4056146926108746]]], + 'Li': [[0, [1.9854870701524918, 0.842937532901041]]], + 'Be': [[0, [4.744586184977778, 1.3574437702689057]], [0, [0.2792470137084066, 0.12818229520909]]], + 'B': [[0, [6.0338581393756145, 2.094637525409216]], [0, [0.2296652845463048, 0.1538820056563987]]], + 'C': [[0, [8.36842382629919, 2.912335066987576]], [0, [0.3175823851018592, 0.2825906903498745]]], + 'N': [[0, [10.93399949627562, 3.848864491590766]], [0, [0.43457823405570917, 0.4666119106370673]]], + 'O': [[0, [13.822779569568999, 4.823227937581987]], [0, [0.6163807631542392, 0.7589805047258943]]], + 'F': [[0, [16.696221288447184, 5.913626376015676]], [0, [0.8069674335184295, 1.1067332169360984]]], + 'Ne': [[0, [19.44766524633368, 7.113317051280908]], [0, [1.0081157441421305, 1.508827408605945]]], + 'Na': [[0, [22.043514485429395, 8.505970515543133]], [0, [1.0688208368282481, 1.7698621680543754]]], + 'Mg': [[0, [35.68089579776235, 9.370720219473146]], [0, [2.9023990296953044, 2.762905547578002]], [0, [0.391911845854857, 0.4395524604043637]]], + 'Al': [[0, [34.328377368288, 11.458430900825965]], [0, [1.895391976451897, 2.8294008855907764]], [0, [0.12243916188522636, 0.09609543557391674]]], + 'Si': [[0, [40.1763529442365, 13.078588758018803]], [0, [2.239495255980109, 3.340046848460941]], [0, [0.13204220229571037, 0.13836780632028686]]], + 'P': [[0, [46.66493733746877, 14.784964733718208]], [0, [2.6279568276824814, 3.8907484994126014]], [0, [0.1594036030260791, 0.2054997181258127]]], + 'S': [[0, [54.215297785332154, 16.531442577888246]], [0, [3.167647315145373, 4.489750969396064]], [0, [0.22671769463490918, 0.3487263502408896]]], + 'Cl': [[0, [62.03053259370884, 18.377558847158248]], [0, [3.700397336007754, 5.10183267645642]], [0, [0.28974576291563425, 0.5086598918385119]]], + 'Ar': [[0, [70.09781762916084, 20.327210911919742]], [0, [4.219331463603571, 5.7261201541458835]], [0, [0.35198503878294074, 0.6852541804555327]]], + 'K': [[0, [81.43333911489003, 22.298648478534886]], [0, [4.962141983102695, 6.635182027193869]], [0, [0.3731894290462584, 0.8199734170834927]]], + 'Ca': [[0, [74.41569962693416, 24.64109121546901]], [0, [4.071511193479644, 6.221216302620745]], [0, [0.28671657971452114, 0.6443784267175638]]], + 'Sc': [[0, [101.20791061796358, 26.412487878859128]], [0, [6.446723728431389, 8.03204205372517]], [0, [0.7834045065030468, 1.0165590397404942]], [0, [0.22063826662337555, 0.32378518633489617]]], + 'Ti': [[0, [114.39209603243822, 28.397321564939656]], [0, [7.751031064083496, 8.705787116537461]], [0, [1.3409079006639175, 1.4131635450481188]], [0, [0.29105515283701394, 0.5831178684215473]]], + 'V': [[0, [128.16680647954118, 30.409558254731866]], [0, [9.216695666616227, 9.327297806386307]], [0, [1.858353853141026, 2.0326669623504348]], [0, [0.3403321818150978, 0.7596484454818172]]], + 'Cr': [[0, [142.77227533657648, 32.44721271300131]], [0, [10.841336380933656, 9.962832486421375]], [0, [2.324791884735429, 2.760501995261298]], [0, [0.386053542334779, 0.9188917102547013]]], + 'Mn': [[0, [158.3329221300052, 34.5079863370424]], [0, [12.618932464144514, 10.650940881473641]], [0, [2.751317714668748, 3.5518867608144586]], [0, [0.4313986524269153, 1.0757402574568604]]], + 'Fe': [[0, [171.59971217722958, 36.95724348896622]], [0, [12.84888931599388, 12.016251508544268]], [0, [2.4208579434027238, 3.549284884553206]], [0, [0.44179048296062723, 1.0407595913547492]]], + 'Co': [[0, [186.74822291999737, 39.328592943410854]], [0, [13.74313677181651, 13.158002775580817]], [0, [2.3981274633981866, 3.9707170856647522]], [0, [0.4585307413973846, 1.0414789524987433]]], + 'Ni': [[0, [203.1786382537903, 41.69876688913565]], [0, [14.904507246734576, 14.254460551474274]], [0, [2.50518240559692, 4.554224625007843]], [0, [0.4821723138309187, 1.0776113454467862]]], + 'Cu': [[0, [220.69456355904438, 44.09047093492658]], [0, [16.228087185242245, 15.352003273713255]], [0, [2.6799011471909333, 5.235665891996169]], [0, [0.5116345189692049, 1.1415411696644089]]], + 'Zn': [[0, [239.22585729085247, 46.51256599644878]], [0, [17.67628445660993, 16.466826311793856]], [0, [2.8967970112707353, 5.996032281556834]], [0, [0.5457841216300788, 1.2269377067442289]]], + 'Ga': [[0, [207.77682210003923, 50.56169123112327]], [0, [11.856343239908128, 17.3168271457457]], [0, [1.435457236821561, 4.342639055962598]], [0, [0.17389779129425226, 0.1653080552415563]]], + 'Ge': [[0, [225.24685392781288, 53.26905157611517]], [0, [12.852764572486532, 18.78877288270596]], [0, [1.5215113800917213, 4.764856999241978]], [0, [0.1323014777214065, 0.14240658459873975]]], + 'As': [[0, [247.9086110252744, 56.028840702070525]], [0, [14.300295034709391, 20.402255662442446]], [0, [1.6861444954415892, 5.341032330752718]], [0, [0.13416499137137874, 0.173484907424891]]], + 'Se': [[0, [273.8649943966164, 58.799515164217304]], [0, [16.08110012602841, 22.06344368325976]], [0, [1.932031883742216, 6.021612670882894]], [0, [0.1792331056182616, 0.28093949286892783]]], + 'Br': [[0, [300.01299028853975, 61.61553868303424]], [0, [17.856104656403406, 23.74892022762549]], [0, [2.1685919518796943, 6.686153971304707]], [0, [0.21670708306814743, 0.39290870861189514]]], + 'Kr': [[0, [326.6902386555732, 64.48778748553242]], [0, [19.647882619248065, 25.46975468620672]], [0, [2.3989880465652473, 7.3449776683736046]], [0, [0.2510302844453139, 0.5108510489663171]]], + 'Rb': [[0, [371.63737826238116, 67.11280281784138]], [0, [23.248544533094694, 27.589207985709436]], [0, [2.8726598551454177, 8.65313863037671]], [0, [0.2777391154691928, 0.6698990577646332]]], + 'Sr': [[0, [370.72341161894167, 70.53669290120207]], [0, [22.369820917706722, 28.689970312925873]], [0, [2.74311782634614, 8.459944450402055]], [0, [0.2267095844986972, 0.568426058335472]]], + 'Y': [[0, [381.9433918083273, 73.79627223351419]], [0, [22.657005971801517, 30.132831274164765]], [0, [2.78181561035276, 8.545479781705756]], [0, [0.23142082707081157, 0.6180042289006495]]], + 'Zr': [[0, [404.12975489707895, 77.02187364839594]], [0, [23.913120490512053, 31.876617008220126]], [0, [2.928122394174498, 8.966554439771224]], [0, [0.2509660177582832, 0.715944254052498]]], + 'Nb': [[0, [432.000753226581, 80.2736752206378]], [0, [25.649609677505364, 33.771572054045144]], [0, [3.127263691497556, 9.56516637789088]], [0, [0.27614878489011474, 0.8386582279030884]]], + 'Mo': [[0, [462.68918558642525, 83.56383349210992]], [0, [27.620091978879955, 35.74744481883726]], [0, [3.350241541886852, 10.246548405106552]], [0, [0.3046372235349947, 0.9794709766343448]]], + 'Tc': [[0, [495.0483050672117, 86.89968164312515]], [0, [29.724523813339573, 37.779386272068194]], [0, [3.584071928719099, 10.970538625909704]], [0, [0.3354202932589703, 1.135308320005783]]], + 'Ru': [[0, [507.88917516825205, 90.49349034803686]], [0, [30.119638007448955, 39.47115792065111]], [0, [3.5989334168125757, 11.01711542201227]], [0, [0.36368976141299975, 1.2658304433949406]]], + 'Rh': [[0, [524.9638375058429, 94.07757110910124]], [0, [30.919224799048706, 41.29611262024772]], [0, [3.6440391113842647, 11.211906105364292]], [0, [0.39297773558866045, 1.4040392885288977]]], + 'Pd': [[0, [545.3714847389803, 97.68710808884666]], [0, [32.017351723397205, 43.22084107574552]], [0, [3.7145429669883927, 11.526806131814698]], [0, [0.4233961197550389, 1.5506684631945555]]], + 'Ag': [[0, [568.4833402808085, 101.34010161842532]], [0, [33.34592493342003, 45.22617974954437]], [0, [3.805924686600645, 11.93953395818886]], [0, [0.45499468116272807, 1.7061526933158015]]], + 'Cd': [[0, [593.885889821288, 105.04578226656376]], [0, [34.86259758880326, 47.30114504155227]], [0, [3.914677789101674, 12.43424303591464]], [0, [0.48779609479059793, 1.870799445536182]]], + 'In': [[0, [745.0453334167443, 107.35066272796716]], [0, [47.58971626349325, 50.926236984161385]], [0, [5.971203828280484, 16.665850453251153]], [0, [0.7430042209193889, 3.01900686459137]], [0, [0.08466099556037841, 0.0621837900529884]]], + 'Sn': [[0, [797.0285265417332, 110.81033718024638]], [0, [51.53384351159246, 53.28995874125511]], [0, [6.509259671570793, 17.920299624455424]], [0, [0.8054405533286846, 3.3669375931477203]], [0, [0.0663261465359773, 0.06275078847805306]]], + 'Sb': [[0, [854.332635733605, 114.22836471354059]], [0, [56.03010644110775, 55.71293325193932]], [0, [7.143402312658964, 19.30403594352593]], [0, [0.8834712631942941, 3.765970580984084]], [0, [0.06733579855319452, 0.07997832797135018]]], + 'Te': [[0, [912.166800137674, 117.7227728097036]], [0, [60.59380968267101, 58.126521408848]], [0, [7.8128697037188815, 20.648839233084743]], [0, [0.9864784933135154, 4.17777092264925]], [0, [0.1013950805635164, 0.14842059360104934]]], + 'I': [[0, [971.2326574046591, 121.29884984899455]], [0, [65.25254319977338, 60.56284562091366]], [0, [8.49547368191436, 22.00152224539976]], [0, [1.0874117592443089, 4.595440718085864]], [0, [0.1269930211411724, 0.2193048890155139]]], + 'Xe': [[0, [1031.8412270302938, 124.95241815534052]], [0, [70.03990230763266, 63.02489883673915]], [0, [9.196997644894727, 23.372474126117584]], [0, [1.1869856131289065, 5.023453919720388]], [0, [0.14857777514764356, 0.29253760225088343]]], + 'Cs': [[0, [1149.3826345739149, 127.00628909302009]], [0, [82.07400302361911, 65.73151848347841]], [0, [11.211986732106572, 26.3413877290317]], [0, [1.527946233339621, 6.140575117802016]], [0, [0.20092255135604542, 0.5358903213328864]]], + 'Ba': [[0, [1169.694775701659, 132.11174088077792]], [0, [81.77157679719838, 67.95207434814108]], [0, [11.07493150328844, 26.47785773461083]], [0, [1.5039225873956188, 6.206376352030552]], [0, [0.17008617153559455, 0.46825530317342057]]], + 'La': [[0, [1222.2841796269504, 136.44950093257452]], [0, [85.30738538527056, 70.43142971440702]], [0, [11.534893922454101, 27.525891530415866]], [0, [1.565216495786674, 6.497397763993776]], [0, [0.17220089046882703, 0.5081058416183188]]], + 'Ce': [[0, [1272.0807279952528, 141.03286793355306]], [0, [88.36657550864288, 72.8898142136249]], [0, [11.818291666733137, 28.480233667510007]], [0, [1.6071799883505355, 6.807847988718011]], [0, [0.18079491474660023, 0.5362729882922856]]], + 'Pr': [[0, [1322.1469857952939, 145.7514595496068]], [0, [91.39656432137411, 75.33333265405476]], [0, [12.1071170797223, 29.4276517750929]], [0, [1.6537813057728807, 7.163429031604369]], [0, [0.18837281505285847, 0.5591574497687]]], + 'Nd': [[0, [1372.5263581315082, 150.60889920740257]], [0, [94.38256339798501, 77.77663661280765]], [0, [12.388920046052041, 30.374947266803076]], [0, [1.7024088534425037, 7.543136623138128]], [0, [0.1952797675688985, 0.578520067184905]]], + 'Pm': [[0, [1423.2939068289443, 155.6052462723981]], [0, [97.32865783461368, 80.22405611653822]], [0, [12.661617245227912, 31.327874011441633]], [0, [1.7523980768357152, 7.939936976159989]], [0, [0.20168624380514635, 0.5952857313410442]]], + 'Sm': [[0, [1474.5487132299602, 160.73950011930427]], [0, [100.24430189239065, 82.67778973404094]], [0, [12.92552209508792, 32.292045829866765]], [0, [1.803472701882244, 8.350338175674331]], [0, [0.20769358126601012, 0.6100257919951613]]], + 'Eu': [[0, [1526.4150536618301, 166.01206305202103]], [0, [103.1400484979067, 85.1394038516512]], [0, [13.18165318229166, 33.27271244759562]], [0, [1.8554646230381475, 8.772175676339293]], [0, [0.2133674470507607, 0.6231207671624414]]], + 'Gd': [[0, [1578.9784735126057, 171.42174791339716]], [0, [106.0250115856627, 87.60976291496155]], [0, [13.431158961640053, 34.274486275744934]], [0, [1.908240993848972, 9.203901002823144]], [0, [0.218751211476024, 0.6348308667972493]]], + 'Tb': [[0, [1627.9257815186743, 177.01885393927807]], [0, [108.58046918725347, 89.98169013002152]], [0, [13.712227614246242, 35.08177288335976]], [0, [1.9934065152409421, 9.778608254703611]], [0, [0.22607538133284424, 0.6586164748301768]]], + 'Dy': [[0, [1675.7366368653, 182.79539454026207]], [0, [110.92891605390538, 92.35169430402841]], [0, [13.944492993755349, 35.88756038236498]], [0, [2.071361368544463, 10.324371945021113]], [0, [0.23293090457243362, 0.6793043698109138]]], + 'Ho': [[0, [1722.45102323609, 188.74643520738246]], [0, [113.09508331285164, 94.71282327683552]], [0, [14.135448633292121, 36.700404016777306]], [0, [2.143446883392562, 10.84407478396342]], [0, [0.23939934892582107, 0.6974618085298848]]], + 'Er': [[0, [1768.1273694341087, 194.86460193618592]], [0, [115.1045170512384, 97.059678831912]], [0, [14.292436046614585, 37.52919176101882]], [0, [2.210644796273452, 11.34014341321473]], [0, [0.24552713219420616, 0.7134490516878607]]], + 'Tm': [[0, [1812.8387097425375, 201.1430178623664]], [0, [116.98049742273002, 99.38821488326626]], [0, [14.422182620212654, 38.38205159874525]], [0, [2.273693630826137, 11.81459993451274]], [0, [0.25134177827762766, 0.7275092899716751]]], + 'Yb': [[0, [1856.6685254316594, 207.57575706111513]], [0, [118.7434510713386, 101.69567162657735]], [0, [14.53068606589803, 39.26596911718243]], [0, [2.333173888140194, 12.269202867396743]], [0, [0.25686096532697333, 0.7398210931489899]]], + 'Lu': [[0, [1899.7375959050908, 214.15900283658507]], [0, [120.41203401518791, 103.9808323573287]], [0, [14.623290920474743, 40.186822923208226]], [0, [2.389573140054433, 12.70562314337766]], [0, [0.2620987513832905, 0.7505337675010088]]], + 'Hf': [[0, [1967.3161513254126, 220.4818242350529]], [0, [124.34022381142523, 106.59712230324918]], [0, [15.07308379422105, 41.593870479307554]], [0, [2.480818570417923, 13.232329324645132]], [0, [0.2661713168622087, 0.7967557336599056]]], + 'Ta': [[0, [2070.081376528822, 226.40779086022104]], [0, [131.49175287923276, 109.64042202051841]], [0, [15.99100132534389, 43.55837828988383]], [0, [2.6318773077354023, 14.102161904766191]], [0, [0.28043101980165813, 0.889950291909441]]], + 'W': [[0, [2189.549358149728, 232.19925625507935]], [0, [140.25127395857805, 112.8716890901911]], [0, [17.13344756974962, 45.783279372087385]], [0, [2.8098978707573803, 15.126150439697248]], [0, [0.29967949093936525, 1.0098967362808797]]], + 'Re': [[0, [2318.4462036806926, 237.9628858834694]], [0, [149.9937976368708, 116.1966038301959]], [0, [18.404731677310796, 48.15430966963478]], [0, [3.001608061476728, 16.225684227300484]], [0, [0.32187252568656627, 1.1489884194097733]]], + 'Os': [[0, [2388.049070715689, 245.10047388036585]], [0, [153.7734923225801, 118.7983419940856]], [0, [18.857909721345862, 49.56025601814822]], [0, [3.11008633126925, 16.704279730923567]], [0, [0.34445180147113785, 1.2819732299675402]]], + 'Ir': [[0, [2467.4297047382197, 252.25866435592235]], [0, [158.46189701303072, 121.50759831320175]], [0, [19.439726509029725, 51.14252734248185]], [0, [3.2301679883724033, 17.26079206285206]], [0, [0.36862565727337615, 1.4293825704029288]]], + 'Pt': [[0, [2554.3592830018633, 259.50162267370445]], [0, [163.79362965973422, 124.30357066368583]], [0, [20.109681026841002, 52.84983429008009]], [0, [3.3577853513983555, 17.869227647362816]], [0, [0.3940749704373637, 1.58963560402845]]], + 'Au': [[0, [2647.942367886888, 266.86258142778973]], [0, [169.65288496596833, 127.17818206724604]], [0, [20.849001035167127, 54.65811794336809]], [0, [3.490999195504156, 18.517519625282052]], [0, [0.42063212480622336, 1.761992991117359]]], + 'Hg': [[0, [2747.928134675977, 274.3568830800225]], [0, [175.9987808231551, 130.12984199024015]], [0, [21.64967483069611, 56.556629698785166]], [0, [3.6289678032455828, 19.200696161193015]], [0, [0.44820670782959365, 1.946165274513181]]], + 'Tl': [[0, [3164.4430017902673, 275.33425125783316]], [0, [215.72530656955274, 136.5134853959493]], [0, [27.161342191636468, 62.3870818915601]], [0, [4.491427780128169, 23.28557562205648]], [0, [0.600440165078794, 2.698540051334935]], [0, [0.06853093205018684, 0.04464665376841102]]], + 'Pb': [[0, [3332.646311029052, 281.94999231460247]], [0, [229.0031090563242, 140.1437908618318]], [0, [28.975969438335593, 64.84409912856167]], [0, [4.810558525708687, 24.740295168731812]], [0, [0.6428089815920308, 2.996264069031993]], [0, [0.047924330717300866, 0.041440305792256264]]], + 'Bi': [[0, [3512.216508855337, 288.64078359604133]], [0, [243.39214593688772, 143.84037317308596]], [0, [30.97853246581632, 67.34690546839133]], [0, [5.172423589452024, 26.30220507176237]], [0, [0.6972914789511377, 3.343004331160787]], [0, [0.046483440633541435, 0.051734625570903164]]], + 'Po': [[0, [3687.56394734581, 295.7938412376767]], [0, [256.99663742660033, 147.48098812190506]], [0, [32.88525041027444, 69.69484331872636]], [0, [5.53728075165879, 27.772403578343333]], [0, [0.7669524019010989, 3.688915796422927]], [0, [0.07790941705971591, 0.10638615361580807]]], + 'At': [[0, [3867.2700297383376, 303.2212169509859]], [0, [270.7970840945, 151.14251555680508]], [0, [34.83422183513534, 72.0152741956569]], [0, [5.916332606555011, 29.262941710845208]], [0, [0.8371425774457082, 4.051982567189525]], [0, [0.10024708223935702, 0.16345538990921718]]], + 'Rn': [[0, [4051.137069750091, 310.9250988056561]], [0, [284.775937870393, 154.8229392338331]], [0, [36.824980911702696, 74.30862717101918]], [0, [6.309600846955326, 30.773810590825363]], [0, [0.9078056249178833, 4.433403377786259]], [0, [0.11834115297115232, 0.22290023511611973]]], + 'Fr': [[0, [4353.984794381789, 316.43748235638924]], [0, [315.0221897544036, 158.68006092193193]], [0, [41.78089597517104, 77.69002281082362]], [0, [7.324635183209355, 33.751487240198095]], [0, [1.2167680803927787, 5.5432018138923755]], [0, [0.18531646855260042, 0.5267499036883455]]], + 'Ra': [[0, [4448.350621866084, 326.7862729655624]], [0, [316.37235237572963, 162.10461390693163]], [0, [41.599655197053686, 78.94892733652733]], [0, [7.332003018858603, 34.0447122605427]], [0, [1.1859426306615157, 5.525797524492575]], [0, [0.16058344554826792, 0.4663667294265322]]], + 'Ac': [[0, [4603.153156415366, 336.6052579927967]], [0, [324.3007110669224, 165.83572180244386]], [0, [42.43724662599295, 80.83159851353724]], [0, [7.489245069769097, 34.88956929049148]], [0, [1.1874081908615182, 5.619709507030846]], [0, [0.1516443671133239, 0.4600690932932841]]], + 'Th': [[0, [4797.115103044666, 345.62274075733126]], [0, [338.2688019281645, 169.59801924262376]], [0, [44.29533540738354, 83.26599491898011]], [0, [7.831908387375935, 36.191673821065116]], [0, [1.2477414312105595, 5.98848264294172]], [0, [0.16327011497437025, 0.5198232907672228]]], + 'Pa': [[0, [4988.384347334797, 355.26683210258017]], [0, [350.79261962815326, 173.42724087767974]], [0, [45.86062629910834, 85.59403110807175]], [0, [8.127515730749716, 37.310058714683365]], [0, [1.2948293491755238, 6.333674516621873]], [0, [0.17433619558418045, 0.5704050762598124]]], + 'U': [[0, [5183.066162972329, 365.34950418100794]], [0, [362.8661720831389, 177.3353358479058]], [0, [47.296628009567854, 87.92696673647268]], [0, [8.393112085927632, 38.33948371626881]], [0, [1.3330953102037348, 6.665271903877122]], [0, [0.18401729827822358, 0.6114454029674808]]], + 'Np': [[0, [5384.543572863619, 375.8372854168172]], [0, [374.92172838433873, 181.32316439868376]], [0, [48.68255893505675, 90.29615221964652]], [0, [8.643048125493124, 39.32468442063093]], [0, [1.366773826787788, 6.999180874689732]], [0, [0.19260759242970482, 0.6449897137528064]]], + 'Pu': [[0, [5593.455128220382, 386.68279487022807]], [0, [387.1377374155312, 185.3845839171116]], [0, [50.05395085463852, 92.71346043659159]], [0, [8.883918591250191, 40.285846844592555]], [0, [1.3980540898103984, 7.34254653337652]], [0, [0.20029695125356592, 0.6723351335298008]]], + 'Am': [[0, [5811.506710991388, 397.9252363037504]], [0, [399.6133434666023, 189.52305406251924]], [0, [51.42829601635813, 95.1840529019412]], [0, [9.11898176976415, 41.23292843473057]], [0, [1.428182916482645, 7.698670102009173]], [0, [0.20721869574024737, 0.6944251274901461]]], + 'Cm': [[0, [6038.636135983948, 409.54741384643347]], [0, [412.3835587172814, 193.7348284033143]], [0, [52.81383174045898, 97.70910187250404]], [0, [9.34982889512178, 42.170969011372755]], [0, [1.4578869343677483, 8.06893586629329]], [0, [0.21346404640287797, 0.7119656673394167]]], + 'Bk': [[0, [6260.948816390237, 422.0410453374906]], [0, [422.91662308239006, 197.9666685432441]], [0, [53.90461251056316, 99.8974110849659]], [0, [9.602570532994145, 42.922482192514806]], [0, [1.4984353150383831, 8.5490189788099]], [0, [0.21896757528599817, 0.7251176961416461]]], + 'Cf': [[0, [6489.740134684498, 435.0218448995875]], [0, [433.34955871057997, 202.2541721796898]], [0, [54.94480645654533, 102.13481550166794]], [0, [9.83732955524915, 43.61885739445619]], [0, [1.5380653646173517, 9.027993242268069]], [0, [0.22430150549872171, 0.7368019584166986]]], + 'Es': [[0, [6726.1549694950845, 448.53297901385343]], [0, [443.7204453771275, 206.597327749617]], [0, [55.94404617815157, 104.4220836472625]], [0, [10.056118549065122, 44.26511722213098]], [0, [1.5773227998811425, 9.508944788748774]], [0, [0.22947752376909678, 0.7472304930317006]]], + 'Fm': [[0, [6970.136189873972, 462.57135978364494]], [0, [454.0434318759234, 210.99147916464239]], [0, [56.908789981374724, 106.75984221536478]], [0, [10.259912457405377, 44.864990614224354]], [0, [1.6164506247480044, 9.993218017831559]], [0, [0.23449400578056606, 0.7564937172127681]]], + 'Md': [[0, [7223.144242560901, 477.19533902849173]], [0, [464.3482819478374, 215.43979750119988]], [0, [57.84502682099961, 109.14985702691983]], [0, [10.449227089089558, 45.42214815564216]], [0, [1.6555508524664384, 10.481198923814517]], [0, [0.23935006172299833, 0.7646522895633704]]], + 'No': [[0, [7485.613396623628, 492.4146999962472]], [0, [474.6647744288757, 219.94073267865755]], [0, [58.76014532783936, 111.59422811133064]], [0, [10.624698106362063, 45.94171825601543]], [0, [1.6946873774494313, 10.973021567717673]], [0, [0.24404577984956427, 0.7717668700220324]]], + } diff --git a/qstack/spahm/__init__.py b/qstack/spahm/__init__.py index 4c1185e4..3a33238a 100644 --- a/qstack/spahm/__init__.py +++ b/qstack/spahm/__init__.py @@ -1,2 +1,3 @@ +"""SPAHM (spectrum of approximated Hamiltonian matrices representations) module.""" + from . import compute_spahm -#from . import rho diff --git a/qstack/spahm/compute_spahm.py b/qstack/spahm/compute_spahm.py index 602895cb..900cf8b4 100644 --- a/qstack/spahm/compute_spahm.py +++ b/qstack/spahm/compute_spahm.py @@ -1,22 +1,31 @@ +"""Eigenvalue SPAHM computation.""" + import numpy as np from pyscf import scf, grad -from .guesses import solveF, get_guess, get_occ, get_dm, eigenvalue_grad, get_guess_g +from .guesses import solveF, get_guess, get_occ, eigenvalue_grad, get_guess_g def get_guess_orbitals(mol, guess, xc="pbe", field=None, return_ao_dip=False): - """ Compute the guess Hamiltonian orbitals + """Compute MO energies and vectors using an initial guess Hamiltonian. Args: mol (pyscf Mole): pyscf Mole object. - guess (func): Method used to compute the guess Hamiltonian. Output of get_guess. - xc (str): Exchange-correlation functional. Defaults to pbe. - field (numpy.array(3)): applied uniform electric field i.e. $\\vec \\nabla \\phi$ in a.u. - return_ao_dip (bool): if return computed AO dipole integrals + guess (callable or str): Guess Hamiltonian method function (from get_guess) or 'huckel'. + xc (str): Exchange-correlation functional name. Defaults to 'pbe'. + field (numpy ndarray, optional): 3-component uniform electric field vector (∇φ) in atomic units. + Defaults to None. + return_ao_dip (bool): If True, also returns AO dipole integrals. Defaults to False. Returns: - 1D numpy array containing the eigenvalues - 2D numpy array containing the eigenvectors of the guess Hamiltonian. - (optional) 2D numpy array with the AO dipole integrals + tuple: Depending on return_ao_dip: + - If False: (e, v) where: + - e (numpy ndarray): 1D array (nao,) of orbital eigenvalues. + - v (numpy ndarray): 2D array (nao, nao) of MO coefficients. + - If True: (e, v, ao_dip) where ao_dip is 3D array (3, nao, nao) of AO dipole integrals + if field is not None, else None. + + Raises: + NotImplementedError: If field is specified with Hückel guess. """ if guess == 'huckel': if field is not None: @@ -38,46 +47,58 @@ def get_guess_orbitals(mol, guess, xc="pbe", field=None, return_ao_dip=False): def ext_field_generator(mol, field): - """ Generator for Hext (i.e. applied uniform electiric field interaction) gradient + """Generate external electric field Hamiltonian gradient function. + + Creates a function that computes derivatives of the external field interaction + Hamiltonian (H_ext) with respect to nuclear coordinates for each atom. Args: mol (pyscf Mole): pyscf Mole object. - field (numpy.array(3)): applied uniform electric field i.e. $\\vec \\nabla \\phi$ in a.u. + field (numpy ndarray or None): 3-component uniform electric field vector (∇φ) in atomic units. + If None, treated as zero field. Returns: - func(int: iat): returns the derivative of Hext wrt the coordinates of atom iat, i.e. dHext/dr[iat] + callable: Function field_deriv(iat) that takes atom index and returns + 3D array (3, nao, nao) of dH_ext/dr[iat] - external field Hamiltonian + gradient for atom iat. """ - shls_slice = (0, mol.nbas, 0, mol.nbas) with mol.with_common_orig((0,0,0)): - int1e_irp = mol.intor('int1e_irp', shls_slice=shls_slice).reshape(3, 3, mol.nao, mol.nao) # ( | rc nabla | ) + int1e_irp = mol.intor('int1e_irp', shls_slice=shls_slice).reshape(3, 3, mol.nao, mol.nao) # ( | rc nabla | ) aoslices = mol.aoslice_by_atom()[:,2:] if field is None: field = (0,0,0) + def field_deriv(iat): p0, p1 = aoslices[iat] dmu_dr = np.zeros_like(int1e_irp) # dim(mu)×dim(r)×nao×nao - dmu_dr[:,:,p0:p1,:] -= int1e_irp[:,:,:,p0:p1].transpose((0,1,3,2)) # TODO not sure why minus - dmu_dr[:,:,:,p0:p1] -= int1e_irp[:,:,:,p0:p1] # TODO check/fix E definition + dmu_dr[:,:,p0:p1,:] -= int1e_irp[:,:,:,p0:p1].transpose((0,1,3,2)) + dmu_dr[:,:,:,p0:p1] -= int1e_irp[:,:,:,p0:p1] dhext_dr = np.einsum('x,xypq->ypq', field, dmu_dr) return dhext_dr return field_deriv def get_guess_orbitals_grad(mol, guess, field=None): - """ Compute the guess Hamiltonian eigenvalues and their derivatives + """Compute guess Hamiltonian eigenvalues and their nuclear/field gradients. + + Calculates orbital energies and their derivatives with respect to both nuclear + coordinates (for geometry optimization/force calculations) and electric field + (for response properties). Args: mol (pyscf Mole): pyscf Mole object. - guess (func): Tuple of methods used to compute the guess Hamiltonian and its eigenvalue derivatives. Output of get_guess_g - field (numpy.array(3)): applied uniform electric field i.e. $\\vec \\nabla \\phi$ in a.u. + guess (tuple): Pair (hamiltonian_func, gradient_func) from get_guess_g(). + field (numpy ndarray, optional): 3-component uniform electric field (∇φ) in atomic units. + Defaults to None. Returns: - numpy 1d array (mol.nao,): eigenvalues - numpy 3d ndarray (mol.nao,mol.natm,3): gradient of the eigenvalues in Eh/bohr - numpy 2d ndarray (mol.nao,3): derivative of the eigenvalues wrt field in Eh/a.u. + tuple: (e, de_dr, de_dfield) where: + - e (numpy ndarray): 1D array (nao,) of orbital eigenvalues in Eh. + - de_dr (numpy ndarray): 3D array (nao, natm, 3) of eigenvalue gradients in Eh/bohr. + - de_dfield (numpy ndarray or None): 2D array (nao, 3) of eigenvalue derivatives + w.r.t. electric field in Eh/a.u., or None if field is None. """ - e, c, ao_dip = get_guess_orbitals(mol, guess[0], field=field, return_ao_dip=True) mf = grad.rhf.Gradients(scf.RHF(mol)) s1 = mf.get_ovlp(mol) @@ -93,33 +114,25 @@ def get_guess_orbitals_grad(mol, guess, field=None): return e, eigenvalue_grad(mol, e, c, s1, h1), de_dfield -def get_guess_dm(mol, guess, xc="pbe", openshell=None, field=None): - """ Compute the density matrix with the guess Hamiltonian. - - Args: - mol (pyscf Mole): pyscf Mole object. - guess (func): Method used to compute the guess Hamiltonian. Output of get_guess. - xc (str): Exchange-correlation functional. Defaults to pbe - openshell (bool): . Defaults to None. - - Returns: - A numpy ndarray containing the density matrix computed using the guess Hamiltonian. - """ - _e, v = get_guess_orbitals(mol, guess, xc, field=field) - return get_dm(v, mol.nelec, mol.spin if mol.spin>0 or openshell is not None else None) - - def get_spahm_representation(mol, guess_in, xc="pbe", field=None): - """ Compute the SPAHM representation. + """Compute the ε-SPAHM molecular representation. + + Reference: + A. Fabrizio, K. R. Briling, C. Corminboeuf, + "SPAHM: the spectrum of approximated Hamiltonian matrices representations", + Digital Discovery 1 286-294 (2022), doi:10.1039/d1dd00050k. Args: mol (pyscf Mole): pyscf Mole object. - guess_in (str): Method used to obtain the guess Hamiltoninan. - xc (str): Exchange-correlation functional. Defaults to pbe. - field (numpy.array(3)): applied uniform electric field i.e. $\\vec \\nabla \\phi$ in a.u. + guess_in (str): Guess method name (e.g., 'LB', 'SAD', 'core', 'GWH'). + xc (str): Exchange-correlation functional name. Defaults to 'pbe'. + field (numpy ndarray, optional): 3-component uniform electric field (∇φ) in atomic units. + Defaults to None. Returns: - A numpy ndarray containing the SPAHM representation. + numpy ndarray: SPAHM representation consisting of occupied orbital eigenvalues. + - Closed-shell: 1D array of shape (n_occupied,) in Eh. + - Open-shell: 2D array of shape (2, n_alpha) for alpha and beta orbitals (padded by zeros). """ guess = get_guess(guess_in) e, _v = get_guess_orbitals(mol, guess, xc, field=field) @@ -128,17 +141,25 @@ def get_spahm_representation(mol, guess_in, xc="pbe", field=None): def get_spahm_representation_grad(mol, guess_in, field=None): - """ Compute the SPAHM representation and its gradient + """Compute SPAHM representation and its nuclear/field gradients for force/response calculations. + + Calculates the SPAHM descriptor (occupied orbital energies) along with derivatives + needed for molecular dynamics, geometry optimization, and response properties. Args: mol (pyscf Mole): pyscf Mole object. - guess_in (str): Method used to obtain the guess Hamiltoninan. - field (numpy.array(3)): applied uniform electric field i.e. $\\vec \\nabla \\phi$ in a.u. + guess_in (str): Guess method name with gradient support ('core' or 'lb'). + field (numpy ndarray, optional): 3-component uniform electric field (∇φ) in atomic units. + Defaults to None. Returns: - numpy 1d array (occ,): the SPAHM representation (Eh). - numpy 3d array (occ,mol.natm,3): gradient of the representation (Eh/bohr) - numpy 2d array (occ,3): gradient of the representation wrt electric field (Eh/a.u.) + tuple: (spahm, spahm_grad, spahm_field_grad) where: + - spahm (numpy ndarray): SPAHM representation - occupied orbital energies in Eh. + Shape: (n_occ,) for closed-shell or (2, n_alpha) for open-shell. + - spahm_grad (numpy ndarray): Nuclear gradients of SPAHM in Eh/bohr. + Shape: (n_occ, natm, 3) or (2, n_alpha, natm, 3). + - spahm_field_grad (numpy ndarray or None): Electric field gradients in Eh/a.u. + Shape: (n_occ, 3) or (2, n_alpha, 3), or None if field is None. """ guess = get_guess_g(guess_in) e, agrad, fgrad = get_guess_orbitals_grad(mol, guess, field=field) diff --git a/qstack/spahm/guesses.py b/qstack/spahm/guesses.py index 9e6e2763..3314d9a7 100644 --- a/qstack/spahm/guesses.py +++ b/qstack/spahm/guesses.py @@ -1,3 +1,11 @@ +"""Initial guess Hamiltonian methods for SPAHM. + +Implements various guess methods: Hcore, Hückel, GWH, SAD, SAP, LB2020. + +Provides: + guesses_dict: Dictionary mapping guess names to functions. +""" + import warnings import numpy as np import scipy @@ -6,127 +14,173 @@ def hcore(mol, *_): - """Uses the core potential (kin + nuc + ecp) to compute the guess Hamiltonian. + """Compute guess Hamiltonian from core contributions (kinetic + nuclear + ECP). - Args: - mol (pyscf Mole): pyscf Mole object. + Args: + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). + + Returns: + numpy ndarray: 2D array containing the core Hamiltonian matrix in AO basis. + """ + return scf.hf.get_hcore(mol) - Returns: - A numpy ndarray containing the computed approximate Hamiltonian. - """ - return scf.hf.get_hcore(mol) def GWH(mol, *_): - """Uses the generalized Wolfsberg-Helmholtz to compute the guess Hamiltonian. - - Args: - mol (pyscf Mole): pyscf Mole object. - - Returns: - A numpy ndarray containing the computed approximate Hamiltonian. - """ - h = hcore(mol) - S = mol.intor_symmetric('int1e_ovlp') - K = 1.75 # See J. Chem. Phys. 1952, 20, 837 - h_gwh = np.zeros_like(h) - for i in range(h.shape[0]): - for j in range(h.shape[1]): - if i != j: - h_gwh[i,j] = 0.5 * K * (h[i,i] + h[j,j]) * S[i,j] - else: - h_gwh[i,j] = h[i,i] - return h_gwh - -def SAD(mol, func): - """Uses the superposition of atomic densities to compute the guess Hamiltonian. - - Args: - mol (pyscf Mole): pyscf Mole object. - func (str): Exchange-correlation functional. - - Returns: - A numpy ndarray containing the computed approximate Hamiltonian. - """ - hc = hcore(mol) - dm = scf.hf.init_guess_by_atom(mol) - mf = dft.RKS(mol) - mf.xc = func - vhf = mf.get_veff(dm=dm) - if vhf.ndim == 2: - fock = hc + vhf - else: - fock = hc + vhf[0] - if not np.array_equal(vhf[0], vhf[1]): - msg = f'The effective potential ({func}) return different alpha and beta matrix components from atomicHF DM' - warnings.warn(msg, RuntimeWarning, stacklevel=2) - return fock + """Compute guess Hamiltonian using Generalized Wolfsberg-Helmholtz (GWH) method. + + Uses the formula: H_ij = 0.5 * K * (H_ii + H_jj) * S_ij with K = 1.75. + + Reference: + M. Wolfsberg, L. Helmholtz, + "The spectra and electronic structure of the tetrahedral ions MnO4-, CrO4--, and ClO4-", + J. Chem. Phys. 20 837-843 (1952), doi:10.1063/1.1700580. + + Args: + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). + + Returns: + numpy ndarray: 2D GWH Hamiltonian matrix in AO basis. + """ + h = hcore(mol).diagonal() + S = mol.intor_symmetric('int1e_ovlp') + K = 1.75 + h_gwh = 0.5 * K * (h[:,None] + h[None,:]) * S + h_gwh[np.diag_indices_from(h_gwh)] = h + return h_gwh + + +def SAD(mol, xc): + """Compute guess Hamiltonian using Superposition of Atomic Densities (SAD). + + Constructs the Fock matrix from atomic Hartree-Fock density matrices + summed together as an initial guess for molecular calculations. + + Args: + mol (pyscf Mole): pyscf Mole object. + xc (str): Exchange-correlation functional. + + Returns: + numpy ndarray: 2D Fock matrix in AO basis computed from SAD. + + Warns: + RuntimeWarning: If alpha and beta effective potentials differ for the functional. + """ + hc = hcore(mol) + dm = scf.hf.init_guess_by_atom(mol) + mf = dft.RKS(mol) + mf.xc = xc + vhf = mf.get_veff(dm=dm) + if vhf.ndim == 2: + fock = hc + vhf + else: + fock = hc + vhf[0] + if not np.array_equal(vhf[0], vhf[1]): + msg = f'The effective potential ({xc}) returned different alpha and beta matrix components from atomicHF DM' + warnings.warn(msg, RuntimeWarning, stacklevel=2) + return fock + def SAP(mol, *_): - """Uses the superposition of atomic potentials to compute the guess Hamiltonian. + """Compute guess Hamiltonian using Superposition of Atomic Potentials (SAP). - Args: - mol (pyscf Mole): pyscf Mole object. + Constructs initial Hamiltonian from kinetic energy plus summed atomic potentials. + + Args: + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). + + Returns: + numpy ndarray: 2D Hamiltonian matrix (T + V_SAP) in AO basis. + """ + mf = dft.RKS(mol) + vsap = mf.get_vsap() + t = mol.intor_symmetric('int1e_kin') + fock = t + vsap + return fock - Returns: - A numpy ndarray containing the computed approximate Hamiltonian. - """ - mf = dft.RKS(mol) - vsap = mf.get_vsap() - t = mol.intor_symmetric('int1e_kin') - fock = t + vsap - return fock def LB(mol, *_): - """Uses the Laikov-Briling model with HF-based parameters to compute the guess Hamiltonian. + """Compute guess Hamiltonian using Laikov-Briling 2020 model with HF parameters. - Args: - mol (pyscf Mole): pyscf Mole object. + Args: + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). + + Returns: + numpy ndarray: 2D effective Hamiltonian matrix from LB2020 model in AO basis. + """ + return LB20(parameters='HF').Heff(mol) - Returns: - A numpy ndarray containing the computed approximate Hamiltonian. - """ - return LB20(parameters='HF').Heff(mol) def LB_HFS(mol, *_): - """ Laikov-Briling using HFS-based parameters + """Compute guess Hamiltonian using Laikov-Briling 2020 model with HFS parameters. - Args: - mol (pyscf Mole): pyscf Mole object. + Args: + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). + + Returns: + numpy ndarray: 2D effective Hamiltonian matrix from LB2020-HFS model in AO basis. + """ + return LB20(parameters='HFS').Heff(mol) - Returns: - A numpy ndarray containing the computed approximate Hamiltonian. - """ - return LB20(parameters='HFS').Heff(mol) def solveF(mol, fock): - """Computes the eigenvalues and eigenvectors corresponding to the given Hamiltonian. + """Solves generalized eigenvalue problem FC = SCε for the Fock/Hamiltonian matrix. + + Args: + mol (pyscf Mole): pyscf Mole object. + fock (numpy ndarray): 2D Fock or Hamiltonian matrix in AO basis. - Args: - mol (pyscf Mole): pyscf Mole object. - fock (numpy ndarray): Approximate Hamiltonian. - """ - s1e = mol.intor_symmetric('int1e_ovlp') - return scipy.linalg.eigh(fock, s1e) + Returns: + tuple: (eigenvalues, eigenvectors) where: + - eigenvalues: 1D array of orbital energies + - eigenvectors: 2D array of MO coefficients (columns are MOs) + """ + s1e = mol.intor_symmetric('int1e_ovlp') + return scipy.linalg.eigh(fock, s1e) def get_guess(arg): - """Returns the function of the method selected to compute the approximate hamiltoninan + """Return guess Hamiltonian function by name. - Args: - arg (str): Approximate Hamiltonian + Args: + arg (str): Guess method name. Available options: + - 'core': Core Hamiltonian (H_core). + - 'sad': Superposition of Atomic Densities. + - 'sap': Superposition of Atomic Potentials. + - 'gwh': Generalized Wolfsberg-Helmholtz. + - 'lb': Laikov-Briling 2020 (HF parameters). + - 'lb-hfs': Laikov-Briling 2020 (HFS parameters). + - 'huckel': Extended Hückel method. + + Returns: + callable: Guess Hamiltonian function with signature f(mol, xc) -> numpy.ndarray. - Returns: - The function of the selected method. - """ - arg = arg.lower() - if arg not in guesses_dict: - raise RuntimeError(f'Unknown guess. Available guesses: {list(guesses_dict.keys())}') - return guesses_dict[arg] + Raises: + RuntimeError: If the specified guess method is not available. + """ + arg = arg.lower() + if arg not in guesses_dict: + raise RuntimeError(f'Unknown guess. Available guesses: {list(guesses_dict.keys())}') + return guesses_dict[arg] def check_nelec(nelec, nao): - """ Checks if there is enough orbitals - for the electrons""" + """Validate that the number of electrons can be accommodated by available orbitals. + + Args: + nelec (tuple or int): Number of electrons (alpha, beta) or total. + nao (int): Number of atomic orbitals. + + Raises: + RuntimeError: If there are more electrons than available orbitals. + + Warns: + RuntimeWarning: If all orbitals are filled (complete shell warning). + """ if np.any(np.array(nelec) > nao): raise RuntimeError(f'Too many electrons ({nelec}) for {nao} orbitals') elif np.any(np.array(nelec) == nao): @@ -135,15 +189,18 @@ def check_nelec(nelec, nao): def get_occ(e, nelec, spin): - """Returns the occupied subset of e + """Extract occupied orbital eigenvalues/energies. Args: - e (numpy ndarray): Energy eigenvalues. - nelec(tuple): Number of alpha and beta electrons. - spin(int): Spin. + e (numpy ndarray): Full array of orbital eigenvalues (1D) + or possibly arrays of larger dimensionality. + nelec (tuple): Number of (alpha, beta) electrons. + spin (int or None): Spin multiplicity. If None, assumes closed-shell. Returns: - A numpy ndarray containing the occupied eigenvalues. + numpy ndarray: Occupied eigenvalues. Shape depends on spin: + - Closed-shell (spin=None): 1D array of occupied eigenvalues + - Open-shell: 2D array (2, nocc) for alpha and beta separately """ check_nelec(nelec, e.shape[0]) if spin is None: @@ -151,68 +208,101 @@ def get_occ(e, nelec, spin): return e[:nocc,...] else: nocc = nelec - e1 = np.zeros((2, *e.shape))[:,:nocc[0],...] + e1 = np.zeros((2, nocc[0], *e.shape[1:])) e1[0,:nocc[0],...] = e[:nocc[0],...] e1[1,:nocc[1],...] = e[:nocc[1],...] return e1 def get_dm(v, nelec, spin): - """Computes the density matrix. - - Args: - v (numpy ndarray): Eigenvectors of a previously solve Hamiltoinan. - nelec(tuple): Number of alpha and beta electrons. - spin(int): Spin. - - Return: - A numpy ndarray containing the density matrix computed using the guess Hamiltonian. - """ - - check_nelec(nelec, len(v)) - if spin is None: - nocc = nelec[0] - dm = v[:,:nocc] @ v[:,:nocc].T - return 2.0*dm - else: - nocc = nelec - dm0 = v[:,:nocc[0]] @ v[:,:nocc[0]].T - dm1 = v[:,:nocc[1]] @ v[:,:nocc[1]].T - return np.array((dm0,dm1)) - -############################################################################### + """Construct density matrix from occupied molecular orbitals. + + Args: + v (numpy ndarray): 2D array of MO coefficients (eigenvectors), columns are MOs. + nelec (tuple): Number of (alpha, beta) electrons. + spin (int or None): Spin multiplicity. If None, assumes closed-shell (RHF). + + Returns: + numpy ndarray: Density matrix in AO basis. + - Closed-shell: 2D array (nao, nao) + - Open-shell: 3D array (2, nao, nao) for alpha and beta + """ + check_nelec(nelec, len(v)) + if spin is None: + nocc = nelec[0] + dm = v[:,:nocc] @ v[:,:nocc].T + return 2.0*dm + else: + nocc = nelec + dm0 = v[:,:nocc[0]] @ v[:,:nocc[0]].T + dm1 = v[:,:nocc[1]] @ v[:,:nocc[1]].T + return np.array((dm0,dm1)) + def hcore_grad(mf): + """Return core Hamiltonian gradient generator function. + + Args: + mf: PySCF mean-field object. + + Returns: + callable: Function that returns core Hamiltonian gradient for a given atom. + """ return mf.hcore_generator(mf.mol) + def LB_grad(mf): + """Return Laikov-Briling Hamiltonian gradient generator function. + + Combines core Hamiltonian gradient with LB2020 model gradient. + + Args: + mf: Mean-field object with hcore_generator method. + + Returns: + callable: Function that returns total Hamiltonian gradient for a given atom. + """ hcore_grad = mf.hcore_generator(mf.mol) HLB_grad = LB20().HLB20_generator(mf.mol) + def H_grad(iat): return hcore_grad(iat) + HLB_grad(iat) return H_grad + def get_guess_g(arg): + """Return both guess Hamiltonian function and its gradient generator. + + Args: + arg (str): Guess method name. Available: 'core', 'lb'. + + Returns: + tuple: (hamiltonian_function, gradient_function) pair. + + Raises: + RuntimeError: If the specified guess method is not available for gradients. + """ arg = arg.lower() guesses = {'core':(hcore, hcore_grad), 'lb':(LB, LB_grad)} if arg not in guesses: raise RuntimeError(f'Unknown guess. Available guesses: {list(guesses.keys())}') return guesses[arg] + def eigenvalue_grad(mol, e, c, s1, h1): + """Compute nuclear gradients of orbital eigenvalues from generalized eigenvalue problem HC = eSC. - """Compute gradients of eigenvalues found from HC=eSC + Uses the Hellmann-Feynman theorem for eigenvalue derivatives. Args: - mol (pyscf Mole): pyscf Mole object - e (numpy 1d ndarray, mol.nao): eigenvalues - c (numpy 2d ndarray, mol.nao*mol.nao): eigenvectors - s1 (numpy 3d ndarray, 3*mol.nao*mol.nao): compact gradient of the overlap matrix [-(nabla \\|\\)] - h1 (func(int: iat)): returns the derivative of H wrt the coordinates of atom iat, i.e. dH/dr[iat] + mol (pyscf Mole): pyscf Mole object. + e (numpy ndarray): 1D array (nao,) of orbital eigenvalues. + c (numpy ndarray): 2D array (nao, nao) of MO coefficients (eigenvectors). + s1 (numpy ndarray): 3D array (3, nao, nao) - gradient of overlap matrix. + h1 (callable): Function returning dH/dr[iat] - Hamiltonian gradient for atom iat. Returns: - numpy 3d ndarray, mol.nao*mol.natm*3: gradient of the eigenvalues in Eh/bohr - + numpy ndarray: 3D array (nao, natm, 3) of eigenvalue gradients in Eh/bohr. """ de_dr = np.zeros((mol.nao, mol.natm, 3)) aoslices = mol.aoslice_by_atom()[:,2:] diff --git a/qstack/spahm/rho/Dmatrix.py b/qstack/spahm/rho/Dmatrix.py index 0bfff6d3..755c7585 100644 --- a/qstack/spahm/rho/Dmatrix.py +++ b/qstack/spahm/rho/Dmatrix.py @@ -1,252 +1,317 @@ +"""Wigner d-matrices for real spherical harmonics to symmetrize coefficient vectors.""" + import numpy as np from numpy import sqrt +from qstack.tools import Cursor + def c_split(mol, c): -# works for an uncontracted basis only - cs = [] - i0 = 0 - for at in mol.aoslice_by_atom(): - for b in range(at[0], at[1]): - l = mol.bas_angular(b) - msize = 2*l+1 - cs.append([l, c[i0:i0+msize]]) - i0 += msize - return cs + """Split coefficient vector by angular momentum quantum number for each atom. + + Organizes expansion coefficients into sublists grouped by angular momentum (l) + for each atomic basis function. + + Args: + mol (pyscf Mole): pyscf Mole object. + c (numpy ndarray): 1D array of expansion coefficients. + + Returns: + list: List of [l, coefficients] pairs where l is angular momentum and + coefficients is the subset of c for that angular momentum shell. + """ + cs = [] + slicer = Cursor(inc=lambda l: 2*l+1, action='slicer') + for at0, at1 in mol.aoslice_by_atom()[:,:2]: + for b in range(at0, at1): + l = mol.bas_angular(b) + cs.extend([[l, c[slicer(l)]] for _n in range(mol.bas_nctr(b))]) + return cs + def rotate_c(D, cs): - c_new = [] - for l,ci in cs: - ci_new = D[l] @ ci - c_new.append(ci_new) - return np.hstack(c_new) + """Rotate coefficient vector using real Wigner D-matrices. + + Applies angular momentum rotation to each angular momentum block separately. + + Args: + D (list): List of Wigner D-matrices (numpy ndarray of shape (2l+1,(2l+1) indexed by angular momentum l. + cs (list): List of [l, coefficients] pairs from c_split(). + + Returns: + numpy ndarray: 1D array of rotated coefficients. + """ + return np.hstack([D[l] @ ci for l, ci in cs]) + def new_xy_axis(z): - # finds the "optimal" axes x' and y' from z' - z = z/np.linalg.norm(z) # don't use /= so a copy of z is created - i = np.argmin(abs(z)) # find the axis with the minimal projection of the vector z - x = -z[i] * z - x[i] += 1.0 # create a vector orthogonal to z with dominant component i - x /= np.sqrt(1.0-z[i]*z[i]) # normalize - y = np.cross(z,x) - return np.array([x,y,z]) + """Construct orthonormal coordinate system from a given z-axis. + + Finds optimal x' and y' axes that form a right-handed orthonormal system + with the given z' direction. The algorithm chooses x' to have maximal + component along the original axis with minimal projection onto z'. + + Args: + z (numpy ndarray): 3D vector defining the new z-axis direction. + + Returns: + numpy ndarray: 3x3 rotation matrix with rows [x', y', z'] defining the + new orthonormal coordinate system. + """ + z = z/np.linalg.norm(z) # don't use /= so a copy of z is created + i = np.argmin(abs(z)) # find the axis with the minimal projection of the vector z + x = -z[i] * z + x[i] += 1.0 # create a vector orthogonal to z with dominant component i + x /= np.sqrt(1.0-z[i]*z[i]) # normalize + y = np.cross(z,x) + return np.array([x,y,z]) def Dmatrix(xyz, lmax, order='xyz'): - # generate Wigner D-matrices D[l][m1,m2] = D_{m1,m2}^l - # for a rotation encoded as x'=xyz[0], y'=xyz[1], z'=xyz[2] - # (m1 is rotated so D is transposed) - - xx = xyz[0,0]; xy = xyz[0,1]; xz = xyz[0,2] - yx = xyz[1,0]; yy = xyz[1,1]; yz = xyz[1,2] - zx = xyz[2,0]; zy = xyz[2,1]; zz = xyz[2,2] - - SQRT3 = sqrt(3.0) - - D = [np.zeros((2*l+1,2*l+1)) for l in range(lmax+1)] - - D[0][0,0] = 1.0 - - if lmax < 1: - return D - - l=1 - if order=='yzx': # -1 0 1 - D[1][l+ -1,l+ -1] = yy - D[1][l+ -1,l+ 0] = yz - D[1][l+ -1,l+ 1] = yx - D[1][l+ 0,l+ -1] = zy - D[1][l+ 0,l+ 0] = zz - D[1][l+ 0,l+ 1] = zx - D[1][l+ 1,l+ -1] = xy - D[1][l+ 1,l+ 0] = xz - D[1][l+ 1,l+ 1] = xx - elif order=='xyz': # 1 -1 0 - D[1][ 0, 0] = xx - D[1][ 0, 1] = xy - D[1][ 0, 2] = xz - D[1][ 1, 0] = yx - D[1][ 1, 1] = yy - D[1][ 1, 2] = yz - D[1][ 2, 0] = zx - D[1][ 2, 1] = zy - D[1][ 2, 2] = zz - - if lmax < 2: - return D - - l=2 - D[2][l+ -2,l+ -2] = xx*yy+xy*yx - D[2][l+ -2,l+ -1] = xy*yz+xz*yy - D[2][l+ -2,l+ 0] = xz*yz * SQRT3 - D[2][l+ -2,l+ 1] = xx*yz+xz*yx - D[2][l+ -2,l+ 2] = xx*yx-xy*yy - D[2][l+ -1,l+ -2] = yx*zy+yy*zx - D[2][l+ -1,l+ -1] = yy*zz+yz*zy - D[2][l+ -1,l+ 0] = yz*zz * SQRT3 - D[2][l+ -1,l+ 1] = yx*zz+yz*zx - D[2][l+ -1,l+ 2] = yx*zx-yy*zy - D[2][l+ 0,l+ -2] = zx*zy * SQRT3 - D[2][l+ 0,l+ -1] = zy*zz * SQRT3 - D[2][l+ 0,l+ 0] = 1.5*zz*zz - 0.5 - D[2][l+ 0,l+ 1] = zx*zz * SQRT3 - D[2][l+ 0,l+ 2] = (zx*zx-zy*zy) * 0.5 * SQRT3 - D[2][l+ 1,l+ -2] = xx*zy+xy*zx - D[2][l+ 1,l+ -1] = xy*zz+xz*zy - D[2][l+ 1,l+ 0] = xz*zz * SQRT3 - D[2][l+ 1,l+ 1] = xx*zz+xz*zx - D[2][l+ 1,l+ 2] = xx*zx-xy*zy - D[2][l+ 2,l+ -2] = xx*xy-yx*yy - D[2][l+ 2,l+ -1] = xy*xz-yy*yz - D[2][l+ 2,l+ 0] = (xz*xz-yz*yz) * 0.5 * SQRT3 - D[2][l+ 2,l+ 1] = xx*xz-yx*yz - D[2][l+ 2,l+ 2] = (xx*xx-xy*xy+yy*yy-yx*yx) * 0.5 - - if lmax < 3: - return D - - l=3 - D[3][l+ -3,l+ -3] = 3*xx**2*yy/4 + 3*xx*xy*yx/2 - 3*xy**2*yy/4 - 3*yx**2*yy/4 + yy**3/4 - D[3][l+ -3,l+ -2] = sqrt(6)*(xx*xy*yz + xx*xz*yy + xy*xz*yx - yx*yy*yz)/2 - D[3][l+ -3,l+ -1] = sqrt(15)*(-xx**2*yy - 2*xx*xy*yx - 3*xy**2*yy + 8*xy*xz*yz + 4*xz**2*yy + yx**2*yy + yy**3 - 4*yy*yz**2)/20 - D[3][l+ -3,l+ 0] = sqrt(10)*(-3*xx**2*yz - 6*xx*xz*yx - 3*xy**2*yz - 6*xy*xz*yy + 6*xz**2*yz + 3*yx**2*yz + 3*yy**2*yz - 2*yz**3)/20 - D[3][l+ -3,l+ 1] = sqrt(15)*(-3*xx**2*yx - 2*xx*xy*yy + 8*xx*xz*yz - xy**2*yx + 4*xz**2*yx + yx**3 + yx*yy**2 - 4*yx*yz**2)/20 - D[3][l+ -3,l+ 2] = sqrt(6)*(xx**2*yz + 2*xx*xz*yx - xy**2*yz - 2*xy*xz*yy - yx**2*yz + yy**2*yz)/4 - D[3][l+ -3,l+ 3] = 3*xx**2*yx/4 - 3*xx*xy*yy/2 - 3*xy**2*yx/4 - yx**3/4 + 3*yx*yy**2/4 - D[3][l+ -2,l+ -3] = sqrt(6)*(xx*yx*zy + xx*yy*zx + xy*yx*zx - xy*yy*zy)/2 - D[3][l+ -2,l+ -2] = xx*yy*zz + xx*yz*zy + xy*yx*zz + xy*yz*zx + xz*yx*zy + xz*yy*zx - D[3][l+ -2,l+ -1] = sqrt(10)*(-xx*yx*zy - xx*yy*zx - xy*yx*zx - 3*xy*yy*zy + 4*xy*yz*zz + 4*xz*yy*zz + 4*xz*yz*zy)/10 - D[3][l+ -2,l+ 0] = sqrt(15)*(-xx*yx*zz - xx*yz*zx - xy*yy*zz - xy*yz*zy - xz*yx*zx - xz*yy*zy + 2*xz*yz*zz)/5 - D[3][l+ -2,l+ 1] = sqrt(10)*(-3*xx*yx*zx - xx*yy*zy + 4*xx*yz*zz - xy*yx*zy - xy*yy*zx + 4*xz*yx*zz + 4*xz*yz*zx)/10 - D[3][l+ -2,l+ 2] = xx*yx*zz + xx*yz*zx - xy*yy*zz - xy*yz*zy + xz*yx*zx - xz*yy*zy - D[3][l+ -2,l+ 3] = sqrt(6)*(xx*yx*zx - xx*yy*zy - xy*yx*zy - xy*yy*zx)/2 - D[3][l+ -1,l+ -3] = sqrt(15)*(2*yx*zx*zy + yy*zx**2 - yy*zy**2)/4 - D[3][l+ -1,l+ -2] = sqrt(10)*(yx*zy*zz + yy*zx*zz + yz*zx*zy)/2 - D[3][l+ -1,l+ -1] = -yx*zx*zy/2 - yy*zx**2/4 - 3*yy*zy**2/4 + yy*zz**2 + 2*yz*zy*zz - D[3][l+ -1,l+ 0] = sqrt(6)*(-2*yx*zx*zz - 2*yy*zy*zz - yz*zx**2 - yz*zy**2 + 2*yz*zz**2)/4 - D[3][l+ -1,l+ 1] = -3*yx*zx**2/4 - yx*zy**2/4 + yx*zz**2 - yy*zx*zy/2 + 2*yz*zx*zz - D[3][l+ -1,l+ 2] = sqrt(10)*(2*yx*zx*zz - 2*yy*zy*zz + yz*zx**2 - yz*zy**2)/4 - D[3][l+ -1,l+ 3] = sqrt(15)*(yx*zx**2 - yx*zy**2 - 2*yy*zx*zy)/4 - D[3][l+ 0,l+ -3] = sqrt(10)*zy*(3*zx**2 - zy**2)/4 - D[3][l+ 0,l+ -2] = sqrt(15)*zx*zy*zz - D[3][l+ 0,l+ -1] = sqrt(6)*zy*(5*zz**2 - 1)/4 - D[3][l+ 0,l+ 0] = zz*(-3*zx**2 - 3*zy**2 + 2*zz**2)/2 - D[3][l+ 0,l+ 1] = sqrt(6)*zx*(5*zz**2 - 1)/4 - D[3][l+ 0,l+ 2] = sqrt(15)*zz*(zx - zy)*(zx + zy)/2 - D[3][l+ 0,l+ 3] = sqrt(10)*zx*(zx**2 - 3*zy**2)/4 - D[3][l+ 1,l+ -3] = sqrt(15)*(2*xx*zx*zy + xy*zx**2 - xy*zy**2)/4 - D[3][l+ 1,l+ -2] = sqrt(10)*(xx*zy*zz + xy*zx*zz + xz*zx*zy)/2 - D[3][l+ 1,l+ -1] = -xx*zx*zy/2 - xy*zx**2/4 - 3*xy*zy**2/4 + xy*zz**2 + 2*xz*zy*zz - D[3][l+ 1,l+ 0] = sqrt(6)*(-2*xx*zx*zz - 2*xy*zy*zz - xz*zx**2 - xz*zy**2 + 2*xz*zz**2)/4 - D[3][l+ 1,l+ 1] = -3*xx*zx**2/4 - xx*zy**2/4 + xx*zz**2 - xy*zx*zy/2 + 2*xz*zx*zz - D[3][l+ 1,l+ 2] = sqrt(10)*(2*xx*zx*zz - 2*xy*zy*zz + xz*zx**2 - xz*zy**2)/4 - D[3][l+ 1,l+ 3] = sqrt(15)*(xx*zx**2 - xx*zy**2 - 2*xy*zx*zy)/4 - D[3][l+ 2,l+ -3] = sqrt(6)*(xx**2*zy + 2*xx*xy*zx - xy**2*zy - yx**2*zy - 2*yx*yy*zx + yy**2*zy)/4 - D[3][l+ 2,l+ -2] = xx*xy*zz + xx*xz*zy + xy*xz*zx - yx*yy*zz - yx*yz*zy - yy*yz*zx - D[3][l+ 2,l+ -1] = sqrt(10)*(-xx**2*zy - 2*xx*xy*zx - 3*xy**2*zy + 8*xy*xz*zz + 4*xz**2*zy + yx**2*zy + 2*yx*yy*zx + 3*yy**2*zy - 8*yy*yz*zz - 4*yz**2*zy)/20 - D[3][l+ 2,l+ 0] = sqrt(15)*(-xx**2*zz - 2*xx*xz*zx - xy**2*zz - 2*xy*xz*zy + 2*xz**2*zz + yx**2*zz + 2*yx*yz*zx + yy**2*zz + 2*yy*yz*zy - 2*yz**2*zz)/10 - D[3][l+ 2,l+ 1] = sqrt(10)*(-3*xx**2*zx - 2*xx*xy*zy + 8*xx*xz*zz - xy**2*zx + 4*xz**2*zx + 3*yx**2*zx + 2*yx*yy*zy - 8*yx*yz*zz + yy**2*zx - 4*yz**2*zx)/20 - D[3][l+ 2,l+ 2] = xx**2*zz/2 + xx*xz*zx - xy**2*zz/2 - xy*xz*zy - yx**2*zz/2 - yx*yz*zx + yy**2*zz/2 + yy*yz*zy - D[3][l+ 2,l+ 3] = sqrt(6)*(xx**2*zx - 2*xx*xy*zy - xy**2*zx - yx**2*zx + 2*yx*yy*zy + yy**2*zx)/4 - D[3][l+ 3,l+ -3] = 3*xx**2*xy/4 - 3*xx*yx*yy/2 - xy**3/4 - 3*xy*yx**2/4 + 3*xy*yy**2/4 - D[3][l+ 3,l+ -2] = sqrt(6)*(xx*xy*xz - xx*yy*yz - xy*yx*yz - xz*yx*yy)/2 - D[3][l+ 3,l+ -1] = sqrt(15)*(-xx**2*xy + 2*xx*yx*yy - xy**3 + 4*xy*xz**2 + xy*yx**2 + 3*xy*yy**2 - 4*xy*yz**2 - 8*xz*yy*yz)/20 - D[3][l+ 3,l+ 0] = sqrt(10)*(-3*xx**2*xz + 6*xx*yx*yz - 3*xy**2*xz + 6*xy*yy*yz + 2*xz**3 + 3*xz*yx**2 + 3*xz*yy**2 - 6*xz*yz**2)/20 - D[3][l+ 3,l+ 1] = sqrt(15)*(-xx**3 - xx*xy**2 + 4*xx*xz**2 + 3*xx*yx**2 + xx*yy**2 - 4*xx*yz**2 + 2*xy*yx*yy - 8*xz*yx*yz)/20 - D[3][l+ 3,l+ 2] = sqrt(6)*(xx**2*xz - 2*xx*yx*yz - xy**2*xz + 2*xy*yy*yz - xz*yx**2 + xz*yy**2)/4 - D[3][l+ 3,l+ 3] = xx**3/4 - 3*xx*xy**2/4 - 3*xx*yx**2/4 + 3*xx*yy**2/4 + 3*xy*yx*yy/2 - - if lmax < 4: - return D - - l=4 - D[4][l+ -4,l+-4] = xx**3*yy/2 + 3*xx**2*xy*yx/2 - 3*xx*xy**2*yy/2 - 3*xx*yx**2*yy/2 + xx*yy**3/2 - xy**3*yx/2 - xy*yx**3/2 + 3*xy*yx*yy**2/2 - D[4][l+ -4,l+-3] = sqrt(2)*(3*xx**2*xy*yz + 3*xx**2*xz*yy + 6*xx*xy*xz*yx - 6*xx*yx*yy*yz - xy**3*yz - 3*xy**2*xz*yy - 3*xy*yx**2*yz + 3*xy*yy**2*yz - 3*xz*yx**2*yy + xz*yy**3)/4 - D[4][l+ -4,l+-2] = sqrt(7)*(-xx**3*yy - 3*xx**2*xy*yx - 3*xx*xy**2*yy + 12*xx*xy*xz*yz + 6*xx*xz**2*yy + 3*xx*yx**2*yy + xx*yy**3 - 6*xx*yy*yz**2 - xy**3*yx + 6*xy*xz**2*yx + xy*yx**3 + 3*xy*yx*yy**2 - 6*xy*yx*yz**2 - 12*xz*yx*yy*yz)/14 - D[4][l+ -4,l+-1] = sqrt(14)*(-3*xx**2*xy*yz - 3*xx**2*xz*yy - 6*xx*xy*xz*yx + 6*xx*yx*yy*yz - 3*xy**3*yz - 9*xy**2*xz*yy + 12*xy*xz**2*yz + 3*xy*yx**2*yz + 9*xy*yy**2*yz - 4*xy*yz**3 + 4*xz**3*yy + 3*xz*yx**2*yy + 3*xz*yy**3 - 12*xz*yy*yz**2)/28 - D[4][l+ -4,l+ 0] = sqrt(35)*(3*xx**3*yx + 3*xx**2*xy*yy - 12*xx**2*xz*yz + 3*xx*xy**2*yx - 12*xx*xz**2*yx - 3*xx*yx**3 - 3*xx*yx*yy**2 + 12*xx*yx*yz**2 + 3*xy**3*yy - 12*xy**2*xz*yz - 12*xy*xz**2*yy - 3*xy*yx**2*yy - 3*xy*yy**3 + 12*xy*yy*yz**2 + 8*xz**3*yz + 12*xz*yx**2*yz + 12*xz*yy**2*yz - 8*xz*yz**3)/70 - D[4][l+ -4,l+ 1] = sqrt(14)*(-3*xx**3*yz - 9*xx**2*xz*yx - 3*xx*xy**2*yz - 6*xx*xy*xz*yy + 12*xx*xz**2*yz + 9*xx*yx**2*yz + 3*xx*yy**2*yz - 4*xx*yz**3 - 3*xy**2*xz*yx + 6*xy*yx*yy*yz + 4*xz**3*yx + 3*xz*yx**3 + 3*xz*yx*yy**2 - 12*xz*yx*yz**2)/28 - D[4][l+ -4,l+ 2] = sqrt(7)*(-xx**3*yx + 3*xx**2*xz*yz + 3*xx*xz**2*yx + xx*yx**3 - 3*xx*yx*yz**2 + xy**3*yy - 3*xy**2*xz*yz - 3*xy*xz**2*yy - xy*yy**3 + 3*xy*yy*yz**2 - 3*xz*yx**2*yz + 3*xz*yy**2*yz)/7 - D[4][l+ -4,l+ 3] = sqrt(2)*(xx**3*yz + 3*xx**2*xz*yx - 3*xx*xy**2*yz - 6*xx*xy*xz*yy - 3*xx*yx**2*yz + 3*xx*yy**2*yz - 3*xy**2*xz*yx + 6*xy*yx*yy*yz - xz*yx**3 + 3*xz*yx*yy**2)/4 - D[4][l+ -4,l+ 4] = xx**3*yx/2 - 3*xx**2*xy*yy/2 - 3*xx*xy**2*yx/2 - xx*yx**3/2 + 3*xx*yx*yy**2/2 + xy**3*yy/2 + 3*xy*yx**2*yy/2 - xy*yy**3/2 - D[4][l+ -3,l+-4] = sqrt(2)*(3*xx**2*yx*zy + 3*xx**2*yy*zx + 6*xx*xy*yx*zx - 6*xx*xy*yy*zy - 3*xy**2*yx*zy - 3*xy**2*yy*zx - yx**3*zy - 3*yx**2*yy*zx + 3*yx*yy**2*zy + yy**3*zx)/4 - D[4][l+ -3,l+-3] = 3*xx**2*yy*zz/4 + 3*xx**2*yz*zy/4 + 3*xx*xy*yx*zz/2 + 3*xx*xy*yz*zx/2 + 3*xx*xz*yx*zy/2 + 3*xx*xz*yy*zx/2 - 3*xy**2*yy*zz/4 - 3*xy**2*yz*zy/4 + 3*xy*xz*yx*zx/2 - 3*xy*xz*yy*zy/2 - 3*yx**2*yy*zz/4 - 3*yx**2*yz*zy/4 - 3*yx*yy*yz*zx/2 + yy**3*zz/4 + 3*yy**2*yz*zy/4 - D[4][l+ -3,l+-2] = sqrt(14)*(-3*xx**2*yx*zy - 3*xx**2*yy*zx - 6*xx*xy*yx*zx - 6*xx*xy*yy*zy + 12*xx*xy*yz*zz + 12*xx*xz*yy*zz + 12*xx*xz*yz*zy - 3*xy**2*yx*zy - 3*xy**2*yy*zx + 12*xy*xz*yx*zz + 12*xy*xz*yz*zx + 6*xz**2*yx*zy + 6*xz**2*yy*zx + yx**3*zy + 3*yx**2*yy*zx + 3*yx*yy**2*zy - 12*yx*yy*yz*zz - 6*yx*yz**2*zy + yy**3*zx - 6*yy*yz**2*zx)/28 - D[4][l+ -3,l+-1] = sqrt(7)*(-3*xx**2*yy*zz - 3*xx**2*yz*zy - 6*xx*xy*yx*zz - 6*xx*xy*yz*zx - 6*xx*xz*yx*zy - 6*xx*xz*yy*zx - 9*xy**2*yy*zz - 9*xy**2*yz*zy - 6*xy*xz*yx*zx - 18*xy*xz*yy*zy + 24*xy*xz*yz*zz + 12*xz**2*yy*zz + 12*xz**2*yz*zy + 3*yx**2*yy*zz + 3*yx**2*yz*zy + 6*yx*yy*yz*zx + 3*yy**3*zz + 9*yy**2*yz*zy - 12*yy*yz**2*zz - 4*yz**3*zy)/28 - D[4][l+ -3,l+ 0] = sqrt(70)*(9*xx**2*yx*zx + 3*xx**2*yy*zy - 12*xx**2*yz*zz + 6*xx*xy*yx*zy + 6*xx*xy*yy*zx - 24*xx*xz*yx*zz - 24*xx*xz*yz*zx + 3*xy**2*yx*zx + 9*xy**2*yy*zy - 12*xy**2*yz*zz - 24*xy*xz*yy*zz - 24*xy*xz*yz*zy - 12*xz**2*yx*zx - 12*xz**2*yy*zy + 24*xz**2*yz*zz - 3*yx**3*zx - 3*yx**2*yy*zy + 12*yx**2*yz*zz - 3*yx*yy**2*zx + 12*yx*yz**2*zx - 3*yy**3*zy + 12*yy**2*yz*zz + 12*yy*yz**2*zy - 8*yz**3*zz)/140 - D[4][l+ -3,l+ 1] = sqrt(7)*(-9*xx**2*yx*zz - 9*xx**2*yz*zx - 6*xx*xy*yy*zz - 6*xx*xy*yz*zy - 18*xx*xz*yx*zx - 6*xx*xz*yy*zy + 24*xx*xz*yz*zz - 3*xy**2*yx*zz - 3*xy**2*yz*zx - 6*xy*xz*yx*zy - 6*xy*xz*yy*zx + 12*xz**2*yx*zz + 12*xz**2*yz*zx + 3*yx**3*zz + 9*yx**2*yz*zx + 3*yx*yy**2*zz + 6*yx*yy*yz*zy - 12*yx*yz**2*zz + 3*yy**2*yz*zx - 4*yz**3*zx)/28 - D[4][l+ -3,l+ 2] = sqrt(14)*(-3*xx**2*yx*zx + 3*xx**2*yz*zz + 6*xx*xz*yx*zz + 6*xx*xz*yz*zx + 3*xy**2*yy*zy - 3*xy**2*yz*zz - 6*xy*xz*yy*zz - 6*xy*xz*yz*zy + 3*xz**2*yx*zx - 3*xz**2*yy*zy + yx**3*zx - 3*yx**2*yz*zz - 3*yx*yz**2*zx - yy**3*zy + 3*yy**2*yz*zz + 3*yy*yz**2*zy)/14 - D[4][l+ -3,l+ 3] = 3*xx**2*yx*zz/4 + 3*xx**2*yz*zx/4 - 3*xx*xy*yy*zz/2 - 3*xx*xy*yz*zy/2 + 3*xx*xz*yx*zx/2 - 3*xx*xz*yy*zy/2 - 3*xy**2*yx*zz/4 - 3*xy**2*yz*zx/4 - 3*xy*xz*yx*zy/2 - 3*xy*xz*yy*zx/2 - yx**3*zz/4 - 3*yx**2*yz*zx/4 + 3*yx*yy**2*zz/4 + 3*yx*yy*yz*zy/2 + 3*yy**2*yz*zx/4 - D[4][l+ -3,l+ 4] = sqrt(2)*(3*xx**2*yx*zx - 3*xx**2*yy*zy - 6*xx*xy*yx*zy - 6*xx*xy*yy*zx - 3*xy**2*yx*zx + 3*xy**2*yy*zy - yx**3*zx + 3*yx**2*yy*zy + 3*yx*yy**2*zx - yy**3*zy)/4 - D[4][l+ -2,l+-4] = sqrt(7)*(2*xx*yx*zx*zy + xx*yy*zx**2 - xx*yy*zy**2 + xy*yx*zx**2 - xy*yx*zy**2 - 2*xy*yy*zx*zy)/2 - D[4][l+ -2,l+-3] = sqrt(14)*(2*xx*yx*zy*zz + 2*xx*yy*zx*zz + 2*xx*yz*zx*zy + 2*xy*yx*zx*zz - 2*xy*yy*zy*zz + xy*yz*zx**2 - xy*yz*zy**2 + 2*xz*yx*zx*zy + xz*yy*zx**2 - xz*yy*zy**2)/4 - D[4][l+ -2,l+-2] = -xx*yx*zx*zy - xx*yy*zx**2/2 - xx*yy*zy**2/2 + xx*yy*zz**2 + 2*xx*yz*zy*zz - xy*yx*zx**2/2 - xy*yx*zy**2/2 + xy*yx*zz**2 - xy*yy*zx*zy + 2*xy*yz*zx*zz + 2*xz*yx*zy*zz + 2*xz*yy*zx*zz + 2*xz*yz*zx*zy - D[4][l+ -2,l+-1] = sqrt(2)*(-2*xx*yx*zy*zz - 2*xx*yy*zx*zz - 2*xx*yz*zx*zy - 2*xy*yx*zx*zz - 6*xy*yy*zy*zz - xy*yz*zx**2 - 3*xy*yz*zy**2 + 4*xy*yz*zz**2 - 2*xz*yx*zx*zy - xz*yy*zx**2 - 3*xz*yy*zy**2 + 4*xz*yy*zz**2 + 8*xz*yz*zy*zz)/4 - D[4][l+ -2,l+ 0] = sqrt(5)*(3*xx*yx*zx**2 + xx*yx*zy**2 - 4*xx*yx*zz**2 + 2*xx*yy*zx*zy - 8*xx*yz*zx*zz + 2*xy*yx*zx*zy + xy*yy*zx**2 + 3*xy*yy*zy**2 - 4*xy*yy*zz**2 - 8*xy*yz*zy*zz - 8*xz*yx*zx*zz - 8*xz*yy*zy*zz - 4*xz*yz*zx**2 - 4*xz*yz*zy**2 + 8*xz*yz*zz**2)/10 - D[4][l+ -2,l+ 1] = sqrt(2)*(-6*xx*yx*zx*zz - 2*xx*yy*zy*zz - 3*xx*yz*zx**2 - xx*yz*zy**2 + 4*xx*yz*zz**2 - 2*xy*yx*zy*zz - 2*xy*yy*zx*zz - 2*xy*yz*zx*zy - 3*xz*yx*zx**2 - xz*yx*zy**2 + 4*xz*yx*zz**2 - 2*xz*yy*zx*zy + 8*xz*yz*zx*zz)/4 - D[4][l+ -2,l+ 2] = -xx*yx*zx**2 + xx*yx*zz**2 + 2*xx*yz*zx*zz + xy*yy*zy**2 - xy*yy*zz**2 - 2*xy*yz*zy*zz + 2*xz*yx*zx*zz - 2*xz*yy*zy*zz + xz*yz*zx**2 - xz*yz*zy**2 - D[4][l+ -2,l+ 3] = sqrt(14)*(2*xx*yx*zx*zz - 2*xx*yy*zy*zz + xx*yz*zx**2 - xx*yz*zy**2 - 2*xy*yx*zy*zz - 2*xy*yy*zx*zz - 2*xy*yz*zx*zy + xz*yx*zx**2 - xz*yx*zy**2 - 2*xz*yy*zx*zy)/4 - D[4][l+ -2,l+ 4] = sqrt(7)*(xx*yx*zx**2 - xx*yx*zy**2 - 2*xx*yy*zx*zy - 2*xy*yx*zx*zy - xy*yy*zx**2 + xy*yy*zy**2)/2 - D[4][l+ -1,l+-4] = sqrt(14)*(3*yx*zx**2*zy - yx*zy**3 + yy*zx**3 - 3*yy*zx*zy**2)/4 - D[4][l+ -1,l+-3] = sqrt(7)*(6*yx*zx*zy*zz + 3*yy*zx**2*zz - 3*yy*zy**2*zz + 3*yz*zx**2*zy - yz*zy**3)/4 - D[4][l+ -1,l+-2] = sqrt(2)*(-3*yx*zx**2*zy - yx*zy**3 + 6*yx*zy*zz**2 - yy*zx**3 - 3*yy*zx*zy**2 + 6*yy*zx*zz**2 + 12*yz*zx*zy*zz)/4 - D[4][l+ -1,l+-1] = -3*yx*zx*zy*zz/2 - 3*yy*zx**2*zz/4 - 9*yy*zy**2*zz/4 + yy*zz**3 - 3*yz*zx**2*zy/4 - 3*yz*zy**3/4 + 3*yz*zy*zz**2 - D[4][l+ -1,l+ 0] = sqrt(10)*(3*yx*zx**3 + 3*yx*zx*zy**2 - 12*yx*zx*zz**2 + 3*yy*zx**2*zy + 3*yy*zy**3 - 12*yy*zy*zz**2 - 12*yz*zx**2*zz - 12*yz*zy**2*zz + 8*yz*zz**3)/20 - D[4][l+ -1,l+ 1] = -9*yx*zx**2*zz/4 - 3*yx*zy**2*zz/4 + yx*zz**3 - 3*yy*zx*zy*zz/2 - 3*yz*zx**3/4 - 3*yz*zx*zy**2/4 + 3*yz*zx*zz**2 - D[4][l+ -1,l+ 2] = sqrt(2)*(-yx*zx**3 + 3*yx*zx*zz**2 + yy*zy**3 - 3*yy*zy*zz**2 + 3*yz*zx**2*zz - 3*yz*zy**2*zz)/2 - D[4][l+ -1,l+ 3] = sqrt(7)*(3*yx*zx**2*zz - 3*yx*zy**2*zz - 6*yy*zx*zy*zz + yz*zx**3 - 3*yz*zx*zy**2)/4 - D[4][l+ -1,l+ 4] = sqrt(14)*(yx*zx**3 - 3*yx*zx*zy**2 - 3*yy*zx**2*zy + yy*zy**3)/4 - D[4][l+ 0,l+-4] = sqrt(35)*zx*zy*(zx - zy)*(zx + zy)/2 - D[4][l+ 0,l+-3] = sqrt(70)*zy*zz*(3*zx**2 - zy**2)/4 - D[4][l+ 0,l+-2] = sqrt(5)*zx*zy*(7*zz**2 - 1)/2 - D[4][l+ 0,l+-1] = sqrt(10)*zy*zz*(-3*zx**2 - 3*zy**2 + 4*zz**2)/4 - D[4][l+ 0,l+ 0] = 3*zx**4/8 + 3*zx**2*zy**2/4 - 3*zx**2*zz**2 + 3*zy**4/8 - 3*zy**2*zz**2 + zz**4 - D[4][l+ 0,l+ 1] = sqrt(10)*zx*zz*(-3*zx**2 - 3*zy**2 + 4*zz**2)/4 - D[4][l+ 0,l+ 2] = sqrt(5)*(zx - zy)*(zx + zy)*(7*zz**2 - 1)/4 - D[4][l+ 0,l+ 3] = sqrt(70)*zx*zz*(zx**2 - 3*zy**2)/4 - D[4][l+ 0,l+ 4] = sqrt(35)*(zx**4 - 6*zx**2*zy**2 + zy**4)/8 - D[4][l+ 1,l+-4] = sqrt(14)*(3*xx*zx**2*zy - xx*zy**3 + xy*zx**3 - 3*xy*zx*zy**2)/4 - D[4][l+ 1,l+-3] = sqrt(7)*(6*xx*zx*zy*zz + 3*xy*zx**2*zz - 3*xy*zy**2*zz + 3*xz*zx**2*zy - xz*zy**3)/4 - D[4][l+ 1,l+-2] = sqrt(2)*(-3*xx*zx**2*zy - xx*zy**3 + 6*xx*zy*zz**2 - xy*zx**3 - 3*xy*zx*zy**2 + 6*xy*zx*zz**2 + 12*xz*zx*zy*zz)/4 - D[4][l+ 1,l+-1] = -3*xx*zx*zy*zz/2 - 3*xy*zx**2*zz/4 - 9*xy*zy**2*zz/4 + xy*zz**3 - 3*xz*zx**2*zy/4 - 3*xz*zy**3/4 + 3*xz*zy*zz**2 - D[4][l+ 1,l+ 0] = sqrt(10)*(3*xx*zx**3 + 3*xx*zx*zy**2 - 12*xx*zx*zz**2 + 3*xy*zx**2*zy + 3*xy*zy**3 - 12*xy*zy*zz**2 - 12*xz*zx**2*zz - 12*xz*zy**2*zz + 8*xz*zz**3)/20 - D[4][l+ 1,l+ 1] = -9*xx*zx**2*zz/4 - 3*xx*zy**2*zz/4 + xx*zz**3 - 3*xy*zx*zy*zz/2 - 3*xz*zx**3/4 - 3*xz*zx*zy**2/4 + 3*xz*zx*zz**2 - D[4][l+ 1,l+ 2] = sqrt(2)*(-xx*zx**3 + 3*xx*zx*zz**2 + xy*zy**3 - 3*xy*zy*zz**2 + 3*xz*zx**2*zz - 3*xz*zy**2*zz)/2 - D[4][l+ 1,l+ 3] = sqrt(7)*(3*xx*zx**2*zz - 3*xx*zy**2*zz - 6*xy*zx*zy*zz + xz*zx**3 - 3*xz*zx*zy**2)/4 - D[4][l+ 1,l+ 4] = sqrt(14)*(xx*zx**3 - 3*xx*zx*zy**2 - 3*xy*zx**2*zy + xy*zy**3)/4 - D[4][l+ 2,l+-4] = sqrt(7)*(-xx**3*xy + 3*xx**2*zx*zy + xx*xy**3 + 3*xx*xy*zx**2 - 3*xx*xy*zy**2 - 3*xy**2*zx*zy + yx**3*yy - 3*yx**2*zx*zy - yx*yy**3 - 3*yx*yy*zx**2 + 3*yx*yy*zy**2 + 3*yy**2*zx*zy)/7 - D[4][l+ 2,l+-3] = sqrt(14)*(-3*xx**2*xy*xz + 3*xx**2*zy*zz + 6*xx*xy*zx*zz + 6*xx*xz*zx*zy + xy**3*xz - 3*xy**2*zy*zz + 3*xy*xz*zx**2 - 3*xy*xz*zy**2 + 3*yx**2*yy*yz - 3*yx**2*zy*zz - 6*yx*yy*zx*zz - 6*yx*yz*zx*zy - yy**3*yz + 3*yy**2*zy*zz - 3*yy*yz*zx**2 + 3*yy*yz*zy**2)/14 - D[4][l+ 2,l+-2] = xx**3*xy/7 - 3*xx**2*zx*zy/7 + xx*xy**3/7 - 6*xx*xy*xz**2/7 - 3*xx*xy*zx**2/7 - 3*xx*xy*zy**2/7 + 6*xx*xy*zz**2/7 + 12*xx*xz*zy*zz/7 - 3*xy**2*zx*zy/7 + 12*xy*xz*zx*zz/7 + 6*xz**2*zx*zy/7 - yx**3*yy/7 + 3*yx**2*zx*zy/7 - yx*yy**3/7 + 6*yx*yy*yz**2/7 + 3*yx*yy*zx**2/7 + 3*yx*yy*zy**2/7 - 6*yx*yy*zz**2/7 - 12*yx*yz*zy*zz/7 + 3*yy**2*zx*zy/7 - 12*yy*yz*zx*zz/7 - 6*yz**2*zx*zy/7 - D[4][l+ 2,l+-1] = sqrt(2)*(3*xx**2*xy*xz - 3*xx**2*zy*zz - 6*xx*xy*zx*zz - 6*xx*xz*zx*zy + 3*xy**3*xz - 9*xy**2*zy*zz - 4*xy*xz**3 - 3*xy*xz*zx**2 - 9*xy*xz*zy**2 + 12*xy*xz*zz**2 + 12*xz**2*zy*zz - 3*yx**2*yy*yz + 3*yx**2*zy*zz + 6*yx*yy*zx*zz + 6*yx*yz*zx*zy - 3*yy**3*yz + 9*yy**2*zy*zz + 4*yy*yz**3 + 3*yy*yz*zx**2 + 9*yy*yz*zy**2 - 12*yy*yz*zz**2 - 12*yz**2*zy*zz)/14 - D[4][l+ 2,l+ 0] = sqrt(5)*(-3*xx**4 - 6*xx**2*xy**2 + 24*xx**2*xz**2 + 18*xx**2*zx**2 + 6*xx**2*zy**2 - 24*xx**2*zz**2 + 24*xx*xy*zx*zy - 96*xx*xz*zx*zz - 3*xy**4 + 24*xy**2*xz**2 + 6*xy**2*zx**2 + 18*xy**2*zy**2 - 24*xy**2*zz**2 - 96*xy*xz*zy*zz - 8*xz**4 - 24*xz**2*zx**2 - 24*xz**2*zy**2 + 48*xz**2*zz**2 + 3*yx**4 + 6*yx**2*yy**2 - 24*yx**2*yz**2 - 18*yx**2*zx**2 - 6*yx**2*zy**2 + 24*yx**2*zz**2 - 24*yx*yy*zx*zy + 96*yx*yz*zx*zz + 3*yy**4 - 24*yy**2*yz**2 - 6*yy**2*zx**2 - 18*yy**2*zy**2 + 24*yy**2*zz**2 + 96*yy*yz*zy*zz + 8*yz**4 + 24*yz**2*zx**2 + 24*yz**2*zy**2 - 48*yz**2*zz**2)/140 - D[4][l+ 2,l+ 1] = sqrt(2)*(3*xx**3*xz - 9*xx**2*zx*zz + 3*xx*xy**2*xz - 6*xx*xy*zy*zz - 4*xx*xz**3 - 9*xx*xz*zx**2 - 3*xx*xz*zy**2 + 12*xx*xz*zz**2 - 3*xy**2*zx*zz - 6*xy*xz*zx*zy + 12*xz**2*zx*zz - 3*yx**3*yz + 9*yx**2*zx*zz - 3*yx*yy**2*yz + 6*yx*yy*zy*zz + 4*yx*yz**3 + 9*yx*yz*zx**2 + 3*yx*yz*zy**2 - 12*yx*yz*zz**2 + 3*yy**2*zx*zz + 6*yy*yz*zx*zy - 12*yz**2*zx*zz)/14 - D[4][l+ 2,l+ 2] = xx**4/14 - 3*xx**2*xz**2/7 - 3*xx**2*zx**2/7 + 3*xx**2*zz**2/7 + 12*xx*xz*zx*zz/7 - xy**4/14 + 3*xy**2*xz**2/7 + 3*xy**2*zy**2/7 - 3*xy**2*zz**2/7 - 12*xy*xz*zy*zz/7 + 3*xz**2*zx**2/7 - 3*xz**2*zy**2/7 - yx**4/14 + 3*yx**2*yz**2/7 + 3*yx**2*zx**2/7 - 3*yx**2*zz**2/7 - 12*yx*yz*zx*zz/7 + yy**4/14 - 3*yy**2*yz**2/7 - 3*yy**2*zy**2/7 + 3*yy**2*zz**2/7 + 12*yy*yz*zy*zz/7 - 3*yz**2*zx**2/7 + 3*yz**2*zy**2/7 - D[4][l+ 2,l+ 3] = sqrt(14)*(-xx**3*xz + 3*xx**2*zx*zz + 3*xx*xy**2*xz - 6*xx*xy*zy*zz + 3*xx*xz*zx**2 - 3*xx*xz*zy**2 - 3*xy**2*zx*zz - 6*xy*xz*zx*zy + yx**3*yz - 3*yx**2*zx*zz - 3*yx*yy**2*yz + 6*yx*yy*zy*zz - 3*yx*yz*zx**2 + 3*yx*yz*zy**2 + 3*yy**2*zx*zz + 6*yy*yz*zx*zy)/14 - D[4][l+ 2,l+ 4] = sqrt(7)*(-xx**4 + 6*xx**2*xy**2 + 6*xx**2*zx**2 - 6*xx**2*zy**2 - 24*xx*xy*zx*zy - xy**4 - 6*xy**2*zx**2 + 6*xy**2*zy**2 + yx**4 - 6*yx**2*yy**2 - 6*yx**2*zx**2 + 6*yx**2*zy**2 + 24*yx*yy*zx*zy + yy**4 + 6*yy**2*zx**2 - 6*yy**2*zy**2)/28 - D[4][l+ 3,l+-4] = sqrt(2)*(xx**3*zy + 3*xx**2*xy*zx - 3*xx*xy**2*zy - 3*xx*yx**2*zy - 6*xx*yx*yy*zx + 3*xx*yy**2*zy - xy**3*zx - 3*xy*yx**2*zx + 6*xy*yx*yy*zy + 3*xy*yy**2*zx)/4 - D[4][l+ 3,l+-3] = 3*xx**2*xy*zz/4 + 3*xx**2*xz*zy/4 + 3*xx*xy*xz*zx/2 - 3*xx*yx*yy*zz/2 - 3*xx*yx*yz*zy/2 - 3*xx*yy*yz*zx/2 - xy**3*zz/4 - 3*xy**2*xz*zy/4 - 3*xy*yx**2*zz/4 - 3*xy*yx*yz*zx/2 + 3*xy*yy**2*zz/4 + 3*xy*yy*yz*zy/2 - 3*xz*yx**2*zy/4 - 3*xz*yx*yy*zx/2 + 3*xz*yy**2*zy/4 - D[4][l+ 3,l+-2] = sqrt(14)*(-xx**3*zy - 3*xx**2*xy*zx - 3*xx*xy**2*zy + 12*xx*xy*xz*zz + 6*xx*xz**2*zy + 3*xx*yx**2*zy + 6*xx*yx*yy*zx + 3*xx*yy**2*zy - 12*xx*yy*yz*zz - 6*xx*yz**2*zy - xy**3*zx + 6*xy*xz**2*zx + 3*xy*yx**2*zx + 6*xy*yx*yy*zy - 12*xy*yx*yz*zz + 3*xy*yy**2*zx - 6*xy*yz**2*zx - 12*xz*yx*yy*zz - 12*xz*yx*yz*zy - 12*xz*yy*yz*zx)/28 - D[4][l+ 3,l+-1] = sqrt(7)*(-3*xx**2*xy*zz - 3*xx**2*xz*zy - 6*xx*xy*xz*zx + 6*xx*yx*yy*zz + 6*xx*yx*yz*zy + 6*xx*yy*yz*zx - 3*xy**3*zz - 9*xy**2*xz*zy + 12*xy*xz**2*zz + 3*xy*yx**2*zz + 6*xy*yx*yz*zx + 9*xy*yy**2*zz + 18*xy*yy*yz*zy - 12*xy*yz**2*zz + 4*xz**3*zy + 3*xz*yx**2*zy + 6*xz*yx*yy*zx + 9*xz*yy**2*zy - 24*xz*yy*yz*zz - 12*xz*yz**2*zy)/28 - D[4][l+ 3,l+ 0] = sqrt(70)*(3*xx**3*zx + 3*xx**2*xy*zy - 12*xx**2*xz*zz + 3*xx*xy**2*zx - 12*xx*xz**2*zx - 9*xx*yx**2*zx - 6*xx*yx*yy*zy + 24*xx*yx*yz*zz - 3*xx*yy**2*zx + 12*xx*yz**2*zx + 3*xy**3*zy - 12*xy**2*xz*zz - 12*xy*xz**2*zy - 3*xy*yx**2*zy - 6*xy*yx*yy*zx - 9*xy*yy**2*zy + 24*xy*yy*yz*zz + 12*xy*yz**2*zy + 8*xz**3*zz + 12*xz*yx**2*zz + 24*xz*yx*yz*zx + 12*xz*yy**2*zz + 24*xz*yy*yz*zy - 24*xz*yz**2*zz)/140 - D[4][l+ 3,l+ 1] = sqrt(7)*(-3*xx**3*zz - 9*xx**2*xz*zx - 3*xx*xy**2*zz - 6*xx*xy*xz*zy + 12*xx*xz**2*zz + 9*xx*yx**2*zz + 18*xx*yx*yz*zx + 3*xx*yy**2*zz + 6*xx*yy*yz*zy - 12*xx*yz**2*zz - 3*xy**2*xz*zx + 6*xy*yx*yy*zz + 6*xy*yx*yz*zy + 6*xy*yy*yz*zx + 4*xz**3*zx + 9*xz*yx**2*zx + 6*xz*yx*yy*zy - 24*xz*yx*yz*zz + 3*xz*yy**2*zx - 12*xz*yz**2*zx)/28 - D[4][l+ 3,l+ 2] = sqrt(14)*(-xx**3*zx + 3*xx**2*xz*zz + 3*xx*xz**2*zx + 3*xx*yx**2*zx - 6*xx*yx*yz*zz - 3*xx*yz**2*zx + xy**3*zy - 3*xy**2*xz*zz - 3*xy*xz**2*zy - 3*xy*yy**2*zy + 6*xy*yy*yz*zz + 3*xy*yz**2*zy - 3*xz*yx**2*zz - 6*xz*yx*yz*zx + 3*xz*yy**2*zz + 6*xz*yy*yz*zy)/14 - D[4][l+ 3,l+ 3] = xx**3*zz/4 + 3*xx**2*xz*zx/4 - 3*xx*xy**2*zz/4 - 3*xx*xy*xz*zy/2 - 3*xx*yx**2*zz/4 - 3*xx*yx*yz*zx/2 + 3*xx*yy**2*zz/4 + 3*xx*yy*yz*zy/2 - 3*xy**2*xz*zx/4 + 3*xy*yx*yy*zz/2 + 3*xy*yx*yz*zy/2 + 3*xy*yy*yz*zx/2 - 3*xz*yx**2*zx/4 + 3*xz*yx*yy*zy/2 + 3*xz*yy**2*zx/4 - D[4][l+ 3,l+ 4] = sqrt(2)*(xx**3*zx - 3*xx**2*xy*zy - 3*xx*xy**2*zx - 3*xx*yx**2*zx + 6*xx*yx*yy*zy + 3*xx*yy**2*zx + xy**3*zy + 3*xy*yx**2*zy + 6*xy*yx*yy*zx - 3*xy*yy**2*zy)/4 - D[4][l+ 4,l+-4] = xx**3*xy/2 - 3*xx**2*yx*yy/2 - xx*xy**3/2 - 3*xx*xy*yx**2/2 + 3*xx*xy*yy**2/2 + 3*xy**2*yx*yy/2 + yx**3*yy/2 - yx*yy**3/2 - D[4][l+ 4,l+-3] = sqrt(2)*(3*xx**2*xy*xz - 3*xx**2*yy*yz - 6*xx*xy*yx*yz - 6*xx*xz*yx*yy - xy**3*xz + 3*xy**2*yy*yz - 3*xy*xz*yx**2 + 3*xy*xz*yy**2 + 3*yx**2*yy*yz - yy**3*yz)/4 - D[4][l+ 4,l+-2] = sqrt(7)*(-xx**3*xy + 3*xx**2*yx*yy - xx*xy**3 + 6*xx*xy*xz**2 + 3*xx*xy*yx**2 + 3*xx*xy*yy**2 - 6*xx*xy*yz**2 - 12*xx*xz*yy*yz + 3*xy**2*yx*yy - 12*xy*xz*yx*yz - 6*xz**2*yx*yy - yx**3*yy - yx*yy**3 + 6*yx*yy*yz**2)/14 - D[4][l+ 4,l+-1] = sqrt(14)*(-3*xx**2*xy*xz + 3*xx**2*yy*yz + 6*xx*xy*yx*yz + 6*xx*xz*yx*yy - 3*xy**3*xz + 9*xy**2*yy*yz + 4*xy*xz**3 + 3*xy*xz*yx**2 + 9*xy*xz*yy**2 - 12*xy*xz*yz**2 - 12*xz**2*yy*yz - 3*yx**2*yy*yz - 3*yy**3*yz + 4*yy*yz**3)/28 - D[4][l+ 4,l+ 0] = sqrt(35)*(3*xx**4 + 6*xx**2*xy**2 - 24*xx**2*xz**2 - 18*xx**2*yx**2 - 6*xx**2*yy**2 + 24*xx**2*yz**2 - 24*xx*xy*yx*yy + 96*xx*xz*yx*yz + 3*xy**4 - 24*xy**2*xz**2 - 6*xy**2*yx**2 - 18*xy**2*yy**2 + 24*xy**2*yz**2 + 96*xy*xz*yy*yz + 8*xz**4 + 24*xz**2*yx**2 + 24*xz**2*yy**2 - 48*xz**2*yz**2 + 3*yx**4 + 6*yx**2*yy**2 - 24*yx**2*yz**2 + 3*yy**4 - 24*yy**2*yz**2 + 8*yz**4)/280 - D[4][l+ 4,l+ 1] = sqrt(14)*(-3*xx**3*xz + 9*xx**2*yx*yz - 3*xx*xy**2*xz + 6*xx*xy*yy*yz + 4*xx*xz**3 + 9*xx*xz*yx**2 + 3*xx*xz*yy**2 - 12*xx*xz*yz**2 + 3*xy**2*yx*yz + 6*xy*xz*yx*yy - 12*xz**2*yx*yz - 3*yx**3*yz - 3*yx*yy**2*yz + 4*yx*yz**3)/28 - D[4][l+ 4,l+ 2] = sqrt(7)*(-xx**4 + 6*xx**2*xz**2 + 6*xx**2*yx**2 - 6*xx**2*yz**2 - 24*xx*xz*yx*yz + xy**4 - 6*xy**2*xz**2 - 6*xy**2*yy**2 + 6*xy**2*yz**2 + 24*xy*xz*yy*yz - 6*xz**2*yx**2 + 6*xz**2*yy**2 - yx**4 + 6*yx**2*yz**2 + yy**4 - 6*yy**2*yz**2)/28 - D[4][l+ 4,l+ 3] = sqrt(2)*(xx**3*xz - 3*xx**2*yx*yz - 3*xx*xy**2*xz + 6*xx*xy*yy*yz - 3*xx*xz*yx**2 + 3*xx*xz*yy**2 + 3*xy**2*yx*yz + 6*xy*xz*yx*yy + yx**3*yz - 3*yx*yy**2*yz)/4 - D[4][l+ 4,l+ 4] = xx**4/8 - 3*xx**2*xy**2/4 - 3*xx**2*yx**2/4 + 3*xx**2*yy**2/4 + 3*xx*xy*yx*yy + xy**4/8 + 3*xy**2*yx**2/4 - 3*xy**2*yy**2/4 + yx**4/8 - 3*yx**2*yy**2/4 + yy**4/8 - - if lmax > 4: - raise NotImplementedError(f'Too a big {lmax=}') - - return D + """Generate real Wigner D-matrices for spatial rotation of spherical harmonics. + + Computes rotation matrices D^l for angular momenta l = 0 to lmax, where + D^l[m1, m2] transforms spherical harmonics under the specified rotation. + The rotation is defined by new axes x' = xyz[0], y' = xyz[1], z' = xyz[2]. + The code is generated by `mathutils/wigner.py`. + + Args: + xyz (numpy ndarray): 3x3 rotation matrix with rows defining new [x', y', z'] axes. + lmax (int): Maximum angular momentum (supports lmax <= 4). + order (str): Ordering convention for l=1 spherical harmonics. Defaults to 'xyz'. + + Returns: + list: List of numpy ndarrays D[l] where D[l] is the (2l+1) x (2l+1) real Wigner + D-matrix for angular momentum l. Note: m1 index is rotated (D is transposed). + + Raises: + NotImplementedError: If lmax > 4. + + Note: + The matrices are computed using explicit algebraic expressions for each l. + """ + xx = xyz[0,0]; xy = xyz[0,1]; xz = xyz[0,2] + yx = xyz[1,0]; yy = xyz[1,1]; yz = xyz[1,2] + zx = xyz[2,0]; zy = xyz[2,1]; zz = xyz[2,2] + + SQRT3 = sqrt(3.0) + + D = [np.zeros((2*l+1,2*l+1)) for l in range(lmax+1)] + + D[0][0,0] = 1.0 + + if lmax < 1: + return D + + l=1 + if order=='yzx': # -1 0 1 + D[1][l+ -1,l+ -1] = yy + D[1][l+ -1,l+ 0] = yz + D[1][l+ -1,l+ 1] = yx + D[1][l+ 0,l+ -1] = zy + D[1][l+ 0,l+ 0] = zz + D[1][l+ 0,l+ 1] = zx + D[1][l+ 1,l+ -1] = xy + D[1][l+ 1,l+ 0] = xz + D[1][l+ 1,l+ 1] = xx + elif order=='xyz': # 1 -1 0 + D[1][ 0, 0] = xx + D[1][ 0, 1] = xy + D[1][ 0, 2] = xz + D[1][ 1, 0] = yx + D[1][ 1, 1] = yy + D[1][ 1, 2] = yz + D[1][ 2, 0] = zx + D[1][ 2, 1] = zy + D[1][ 2, 2] = zz + + if lmax < 2: + return D + + l=2 + D[2][l+ -2,l+ -2] = xx*yy+xy*yx + D[2][l+ -2,l+ -1] = xy*yz+xz*yy + D[2][l+ -2,l+ 0] = xz*yz * SQRT3 + D[2][l+ -2,l+ 1] = xx*yz+xz*yx + D[2][l+ -2,l+ 2] = xx*yx-xy*yy + D[2][l+ -1,l+ -2] = yx*zy+yy*zx + D[2][l+ -1,l+ -1] = yy*zz+yz*zy + D[2][l+ -1,l+ 0] = yz*zz * SQRT3 + D[2][l+ -1,l+ 1] = yx*zz+yz*zx + D[2][l+ -1,l+ 2] = yx*zx-yy*zy + D[2][l+ 0,l+ -2] = zx*zy * SQRT3 + D[2][l+ 0,l+ -1] = zy*zz * SQRT3 + D[2][l+ 0,l+ 0] = 1.5*zz*zz - 0.5 + D[2][l+ 0,l+ 1] = zx*zz * SQRT3 + D[2][l+ 0,l+ 2] = (zx*zx-zy*zy) * 0.5 * SQRT3 + D[2][l+ 1,l+ -2] = xx*zy+xy*zx + D[2][l+ 1,l+ -1] = xy*zz+xz*zy + D[2][l+ 1,l+ 0] = xz*zz * SQRT3 + D[2][l+ 1,l+ 1] = xx*zz+xz*zx + D[2][l+ 1,l+ 2] = xx*zx-xy*zy + D[2][l+ 2,l+ -2] = xx*xy-yx*yy + D[2][l+ 2,l+ -1] = xy*xz-yy*yz + D[2][l+ 2,l+ 0] = (xz*xz-yz*yz) * 0.5 * SQRT3 + D[2][l+ 2,l+ 1] = xx*xz-yx*yz + D[2][l+ 2,l+ 2] = (xx*xx-xy*xy+yy*yy-yx*yx) * 0.5 + + if lmax < 3: + return D + + l=3 + D[3][l+ -3,l+ -3] = 3*xx**2*yy/4 + 3*xx*xy*yx/2 - 3*xy**2*yy/4 - 3*yx**2*yy/4 + yy**3/4 + D[3][l+ -3,l+ -2] = sqrt(6)*(xx*xy*yz + xx*xz*yy + xy*xz*yx - yx*yy*yz)/2 + D[3][l+ -3,l+ -1] = sqrt(15)*(-xx**2*yy - 2*xx*xy*yx - 3*xy**2*yy + 8*xy*xz*yz + 4*xz**2*yy + yx**2*yy + yy**3 - 4*yy*yz**2)/20 + D[3][l+ -3,l+ 0] = sqrt(10)*(-3*xx**2*yz - 6*xx*xz*yx - 3*xy**2*yz - 6*xy*xz*yy + 6*xz**2*yz + 3*yx**2*yz + 3*yy**2*yz - 2*yz**3)/20 + D[3][l+ -3,l+ 1] = sqrt(15)*(-3*xx**2*yx - 2*xx*xy*yy + 8*xx*xz*yz - xy**2*yx + 4*xz**2*yx + yx**3 + yx*yy**2 - 4*yx*yz**2)/20 + D[3][l+ -3,l+ 2] = sqrt(6)*(xx**2*yz + 2*xx*xz*yx - xy**2*yz - 2*xy*xz*yy - yx**2*yz + yy**2*yz)/4 + D[3][l+ -3,l+ 3] = 3*xx**2*yx/4 - 3*xx*xy*yy/2 - 3*xy**2*yx/4 - yx**3/4 + 3*yx*yy**2/4 + D[3][l+ -2,l+ -3] = sqrt(6)*(xx*yx*zy + xx*yy*zx + xy*yx*zx - xy*yy*zy)/2 + D[3][l+ -2,l+ -2] = xx*yy*zz + xx*yz*zy + xy*yx*zz + xy*yz*zx + xz*yx*zy + xz*yy*zx + D[3][l+ -2,l+ -1] = sqrt(10)*(-xx*yx*zy - xx*yy*zx - xy*yx*zx - 3*xy*yy*zy + 4*xy*yz*zz + 4*xz*yy*zz + 4*xz*yz*zy)/10 + D[3][l+ -2,l+ 0] = sqrt(15)*(-xx*yx*zz - xx*yz*zx - xy*yy*zz - xy*yz*zy - xz*yx*zx - xz*yy*zy + 2*xz*yz*zz)/5 + D[3][l+ -2,l+ 1] = sqrt(10)*(-3*xx*yx*zx - xx*yy*zy + 4*xx*yz*zz - xy*yx*zy - xy*yy*zx + 4*xz*yx*zz + 4*xz*yz*zx)/10 + D[3][l+ -2,l+ 2] = xx*yx*zz + xx*yz*zx - xy*yy*zz - xy*yz*zy + xz*yx*zx - xz*yy*zy + D[3][l+ -2,l+ 3] = sqrt(6)*(xx*yx*zx - xx*yy*zy - xy*yx*zy - xy*yy*zx)/2 + D[3][l+ -1,l+ -3] = sqrt(15)*(2*yx*zx*zy + yy*zx**2 - yy*zy**2)/4 + D[3][l+ -1,l+ -2] = sqrt(10)*(yx*zy*zz + yy*zx*zz + yz*zx*zy)/2 + D[3][l+ -1,l+ -1] = -yx*zx*zy/2 - yy*zx**2/4 - 3*yy*zy**2/4 + yy*zz**2 + 2*yz*zy*zz + D[3][l+ -1,l+ 0] = sqrt(6)*(-2*yx*zx*zz - 2*yy*zy*zz - yz*zx**2 - yz*zy**2 + 2*yz*zz**2)/4 + D[3][l+ -1,l+ 1] = -3*yx*zx**2/4 - yx*zy**2/4 + yx*zz**2 - yy*zx*zy/2 + 2*yz*zx*zz + D[3][l+ -1,l+ 2] = sqrt(10)*(2*yx*zx*zz - 2*yy*zy*zz + yz*zx**2 - yz*zy**2)/4 + D[3][l+ -1,l+ 3] = sqrt(15)*(yx*zx**2 - yx*zy**2 - 2*yy*zx*zy)/4 + D[3][l+ 0,l+ -3] = sqrt(10)*zy*(3*zx**2 - zy**2)/4 + D[3][l+ 0,l+ -2] = sqrt(15)*zx*zy*zz + D[3][l+ 0,l+ -1] = sqrt(6)*zy*(5*zz**2 - 1)/4 + D[3][l+ 0,l+ 0] = zz*(-3*zx**2 - 3*zy**2 + 2*zz**2)/2 + D[3][l+ 0,l+ 1] = sqrt(6)*zx*(5*zz**2 - 1)/4 + D[3][l+ 0,l+ 2] = sqrt(15)*zz*(zx - zy)*(zx + zy)/2 + D[3][l+ 0,l+ 3] = sqrt(10)*zx*(zx**2 - 3*zy**2)/4 + D[3][l+ 1,l+ -3] = sqrt(15)*(2*xx*zx*zy + xy*zx**2 - xy*zy**2)/4 + D[3][l+ 1,l+ -2] = sqrt(10)*(xx*zy*zz + xy*zx*zz + xz*zx*zy)/2 + D[3][l+ 1,l+ -1] = -xx*zx*zy/2 - xy*zx**2/4 - 3*xy*zy**2/4 + xy*zz**2 + 2*xz*zy*zz + D[3][l+ 1,l+ 0] = sqrt(6)*(-2*xx*zx*zz - 2*xy*zy*zz - xz*zx**2 - xz*zy**2 + 2*xz*zz**2)/4 + D[3][l+ 1,l+ 1] = -3*xx*zx**2/4 - xx*zy**2/4 + xx*zz**2 - xy*zx*zy/2 + 2*xz*zx*zz + D[3][l+ 1,l+ 2] = sqrt(10)*(2*xx*zx*zz - 2*xy*zy*zz + xz*zx**2 - xz*zy**2)/4 + D[3][l+ 1,l+ 3] = sqrt(15)*(xx*zx**2 - xx*zy**2 - 2*xy*zx*zy)/4 + D[3][l+ 2,l+ -3] = sqrt(6)*(xx**2*zy + 2*xx*xy*zx - xy**2*zy - yx**2*zy - 2*yx*yy*zx + yy**2*zy)/4 + D[3][l+ 2,l+ -2] = xx*xy*zz + xx*xz*zy + xy*xz*zx - yx*yy*zz - yx*yz*zy - yy*yz*zx + D[3][l+ 2,l+ -1] = sqrt(10)*(-xx**2*zy - 2*xx*xy*zx - 3*xy**2*zy + 8*xy*xz*zz + 4*xz**2*zy + yx**2*zy + 2*yx*yy*zx + 3*yy**2*zy - 8*yy*yz*zz - 4*yz**2*zy)/20 + D[3][l+ 2,l+ 0] = sqrt(15)*(-xx**2*zz - 2*xx*xz*zx - xy**2*zz - 2*xy*xz*zy + 2*xz**2*zz + yx**2*zz + 2*yx*yz*zx + yy**2*zz + 2*yy*yz*zy - 2*yz**2*zz)/10 + D[3][l+ 2,l+ 1] = sqrt(10)*(-3*xx**2*zx - 2*xx*xy*zy + 8*xx*xz*zz - xy**2*zx + 4*xz**2*zx + 3*yx**2*zx + 2*yx*yy*zy - 8*yx*yz*zz + yy**2*zx - 4*yz**2*zx)/20 + D[3][l+ 2,l+ 2] = xx**2*zz/2 + xx*xz*zx - xy**2*zz/2 - xy*xz*zy - yx**2*zz/2 - yx*yz*zx + yy**2*zz/2 + yy*yz*zy + D[3][l+ 2,l+ 3] = sqrt(6)*(xx**2*zx - 2*xx*xy*zy - xy**2*zx - yx**2*zx + 2*yx*yy*zy + yy**2*zx)/4 + D[3][l+ 3,l+ -3] = 3*xx**2*xy/4 - 3*xx*yx*yy/2 - xy**3/4 - 3*xy*yx**2/4 + 3*xy*yy**2/4 + D[3][l+ 3,l+ -2] = sqrt(6)*(xx*xy*xz - xx*yy*yz - xy*yx*yz - xz*yx*yy)/2 + D[3][l+ 3,l+ -1] = sqrt(15)*(-xx**2*xy + 2*xx*yx*yy - xy**3 + 4*xy*xz**2 + xy*yx**2 + 3*xy*yy**2 - 4*xy*yz**2 - 8*xz*yy*yz)/20 + D[3][l+ 3,l+ 0] = sqrt(10)*(-3*xx**2*xz + 6*xx*yx*yz - 3*xy**2*xz + 6*xy*yy*yz + 2*xz**3 + 3*xz*yx**2 + 3*xz*yy**2 - 6*xz*yz**2)/20 + D[3][l+ 3,l+ 1] = sqrt(15)*(-xx**3 - xx*xy**2 + 4*xx*xz**2 + 3*xx*yx**2 + xx*yy**2 - 4*xx*yz**2 + 2*xy*yx*yy - 8*xz*yx*yz)/20 + D[3][l+ 3,l+ 2] = sqrt(6)*(xx**2*xz - 2*xx*yx*yz - xy**2*xz + 2*xy*yy*yz - xz*yx**2 + xz*yy**2)/4 + D[3][l+ 3,l+ 3] = xx**3/4 - 3*xx*xy**2/4 - 3*xx*yx**2/4 + 3*xx*yy**2/4 + 3*xy*yx*yy/2 + + if lmax < 4: + return D + + l=4 + D[4][l+ -4,l+-4] = xx**3*yy/2 + 3*xx**2*xy*yx/2 - 3*xx*xy**2*yy/2 - 3*xx*yx**2*yy/2 + xx*yy**3/2 - xy**3*yx/2 - xy*yx**3/2 + 3*xy*yx*yy**2/2 + D[4][l+ -4,l+-3] = sqrt(2)*(3*xx**2*xy*yz + 3*xx**2*xz*yy + 6*xx*xy*xz*yx - 6*xx*yx*yy*yz - xy**3*yz - 3*xy**2*xz*yy - 3*xy*yx**2*yz + 3*xy*yy**2*yz - 3*xz*yx**2*yy + xz*yy**3)/4 + D[4][l+ -4,l+-2] = sqrt(7)*(-xx**3*yy - 3*xx**2*xy*yx - 3*xx*xy**2*yy + 12*xx*xy*xz*yz + 6*xx*xz**2*yy + 3*xx*yx**2*yy + xx*yy**3 - 6*xx*yy*yz**2 - xy**3*yx + 6*xy*xz**2*yx + xy*yx**3 + 3*xy*yx*yy**2 - 6*xy*yx*yz**2 - 12*xz*yx*yy*yz)/14 + D[4][l+ -4,l+-1] = sqrt(14)*(-3*xx**2*xy*yz - 3*xx**2*xz*yy - 6*xx*xy*xz*yx + 6*xx*yx*yy*yz - 3*xy**3*yz - 9*xy**2*xz*yy + 12*xy*xz**2*yz + 3*xy*yx**2*yz + 9*xy*yy**2*yz - 4*xy*yz**3 + 4*xz**3*yy + 3*xz*yx**2*yy + 3*xz*yy**3 - 12*xz*yy*yz**2)/28 + D[4][l+ -4,l+ 0] = sqrt(35)*(3*xx**3*yx + 3*xx**2*xy*yy - 12*xx**2*xz*yz + 3*xx*xy**2*yx - 12*xx*xz**2*yx - 3*xx*yx**3 - 3*xx*yx*yy**2 + 12*xx*yx*yz**2 + 3*xy**3*yy - 12*xy**2*xz*yz - 12*xy*xz**2*yy - 3*xy*yx**2*yy - 3*xy*yy**3 + 12*xy*yy*yz**2 + 8*xz**3*yz + 12*xz*yx**2*yz + 12*xz*yy**2*yz - 8*xz*yz**3)/70 + D[4][l+ -4,l+ 1] = sqrt(14)*(-3*xx**3*yz - 9*xx**2*xz*yx - 3*xx*xy**2*yz - 6*xx*xy*xz*yy + 12*xx*xz**2*yz + 9*xx*yx**2*yz + 3*xx*yy**2*yz - 4*xx*yz**3 - 3*xy**2*xz*yx + 6*xy*yx*yy*yz + 4*xz**3*yx + 3*xz*yx**3 + 3*xz*yx*yy**2 - 12*xz*yx*yz**2)/28 + D[4][l+ -4,l+ 2] = sqrt(7)*(-xx**3*yx + 3*xx**2*xz*yz + 3*xx*xz**2*yx + xx*yx**3 - 3*xx*yx*yz**2 + xy**3*yy - 3*xy**2*xz*yz - 3*xy*xz**2*yy - xy*yy**3 + 3*xy*yy*yz**2 - 3*xz*yx**2*yz + 3*xz*yy**2*yz)/7 + D[4][l+ -4,l+ 3] = sqrt(2)*(xx**3*yz + 3*xx**2*xz*yx - 3*xx*xy**2*yz - 6*xx*xy*xz*yy - 3*xx*yx**2*yz + 3*xx*yy**2*yz - 3*xy**2*xz*yx + 6*xy*yx*yy*yz - xz*yx**3 + 3*xz*yx*yy**2)/4 + D[4][l+ -4,l+ 4] = xx**3*yx/2 - 3*xx**2*xy*yy/2 - 3*xx*xy**2*yx/2 - xx*yx**3/2 + 3*xx*yx*yy**2/2 + xy**3*yy/2 + 3*xy*yx**2*yy/2 - xy*yy**3/2 + D[4][l+ -3,l+-4] = sqrt(2)*(3*xx**2*yx*zy + 3*xx**2*yy*zx + 6*xx*xy*yx*zx - 6*xx*xy*yy*zy - 3*xy**2*yx*zy - 3*xy**2*yy*zx - yx**3*zy - 3*yx**2*yy*zx + 3*yx*yy**2*zy + yy**3*zx)/4 + D[4][l+ -3,l+-3] = 3*xx**2*yy*zz/4 + 3*xx**2*yz*zy/4 + 3*xx*xy*yx*zz/2 + 3*xx*xy*yz*zx/2 + 3*xx*xz*yx*zy/2 + 3*xx*xz*yy*zx/2 - 3*xy**2*yy*zz/4 - 3*xy**2*yz*zy/4 + 3*xy*xz*yx*zx/2 - 3*xy*xz*yy*zy/2 - 3*yx**2*yy*zz/4 - 3*yx**2*yz*zy/4 - 3*yx*yy*yz*zx/2 + yy**3*zz/4 + 3*yy**2*yz*zy/4 + D[4][l+ -3,l+-2] = sqrt(14)*(-3*xx**2*yx*zy - 3*xx**2*yy*zx - 6*xx*xy*yx*zx - 6*xx*xy*yy*zy + 12*xx*xy*yz*zz + 12*xx*xz*yy*zz + 12*xx*xz*yz*zy - 3*xy**2*yx*zy - 3*xy**2*yy*zx + 12*xy*xz*yx*zz + 12*xy*xz*yz*zx + 6*xz**2*yx*zy + 6*xz**2*yy*zx + yx**3*zy + 3*yx**2*yy*zx + 3*yx*yy**2*zy - 12*yx*yy*yz*zz - 6*yx*yz**2*zy + yy**3*zx - 6*yy*yz**2*zx)/28 + D[4][l+ -3,l+-1] = sqrt(7)*(-3*xx**2*yy*zz - 3*xx**2*yz*zy - 6*xx*xy*yx*zz - 6*xx*xy*yz*zx - 6*xx*xz*yx*zy - 6*xx*xz*yy*zx - 9*xy**2*yy*zz - 9*xy**2*yz*zy - 6*xy*xz*yx*zx - 18*xy*xz*yy*zy + 24*xy*xz*yz*zz + 12*xz**2*yy*zz + 12*xz**2*yz*zy + 3*yx**2*yy*zz + 3*yx**2*yz*zy + 6*yx*yy*yz*zx + 3*yy**3*zz + 9*yy**2*yz*zy - 12*yy*yz**2*zz - 4*yz**3*zy)/28 + D[4][l+ -3,l+ 0] = sqrt(70)*(9*xx**2*yx*zx + 3*xx**2*yy*zy - 12*xx**2*yz*zz + 6*xx*xy*yx*zy + 6*xx*xy*yy*zx - 24*xx*xz*yx*zz - 24*xx*xz*yz*zx + 3*xy**2*yx*zx + 9*xy**2*yy*zy - 12*xy**2*yz*zz - 24*xy*xz*yy*zz - 24*xy*xz*yz*zy - 12*xz**2*yx*zx - 12*xz**2*yy*zy + 24*xz**2*yz*zz - 3*yx**3*zx - 3*yx**2*yy*zy + 12*yx**2*yz*zz - 3*yx*yy**2*zx + 12*yx*yz**2*zx - 3*yy**3*zy + 12*yy**2*yz*zz + 12*yy*yz**2*zy - 8*yz**3*zz)/140 + D[4][l+ -3,l+ 1] = sqrt(7)*(-9*xx**2*yx*zz - 9*xx**2*yz*zx - 6*xx*xy*yy*zz - 6*xx*xy*yz*zy - 18*xx*xz*yx*zx - 6*xx*xz*yy*zy + 24*xx*xz*yz*zz - 3*xy**2*yx*zz - 3*xy**2*yz*zx - 6*xy*xz*yx*zy - 6*xy*xz*yy*zx + 12*xz**2*yx*zz + 12*xz**2*yz*zx + 3*yx**3*zz + 9*yx**2*yz*zx + 3*yx*yy**2*zz + 6*yx*yy*yz*zy - 12*yx*yz**2*zz + 3*yy**2*yz*zx - 4*yz**3*zx)/28 + D[4][l+ -3,l+ 2] = sqrt(14)*(-3*xx**2*yx*zx + 3*xx**2*yz*zz + 6*xx*xz*yx*zz + 6*xx*xz*yz*zx + 3*xy**2*yy*zy - 3*xy**2*yz*zz - 6*xy*xz*yy*zz - 6*xy*xz*yz*zy + 3*xz**2*yx*zx - 3*xz**2*yy*zy + yx**3*zx - 3*yx**2*yz*zz - 3*yx*yz**2*zx - yy**3*zy + 3*yy**2*yz*zz + 3*yy*yz**2*zy)/14 + D[4][l+ -3,l+ 3] = 3*xx**2*yx*zz/4 + 3*xx**2*yz*zx/4 - 3*xx*xy*yy*zz/2 - 3*xx*xy*yz*zy/2 + 3*xx*xz*yx*zx/2 - 3*xx*xz*yy*zy/2 - 3*xy**2*yx*zz/4 - 3*xy**2*yz*zx/4 - 3*xy*xz*yx*zy/2 - 3*xy*xz*yy*zx/2 - yx**3*zz/4 - 3*yx**2*yz*zx/4 + 3*yx*yy**2*zz/4 + 3*yx*yy*yz*zy/2 + 3*yy**2*yz*zx/4 + D[4][l+ -3,l+ 4] = sqrt(2)*(3*xx**2*yx*zx - 3*xx**2*yy*zy - 6*xx*xy*yx*zy - 6*xx*xy*yy*zx - 3*xy**2*yx*zx + 3*xy**2*yy*zy - yx**3*zx + 3*yx**2*yy*zy + 3*yx*yy**2*zx - yy**3*zy)/4 + D[4][l+ -2,l+-4] = sqrt(7)*(2*xx*yx*zx*zy + xx*yy*zx**2 - xx*yy*zy**2 + xy*yx*zx**2 - xy*yx*zy**2 - 2*xy*yy*zx*zy)/2 + D[4][l+ -2,l+-3] = sqrt(14)*(2*xx*yx*zy*zz + 2*xx*yy*zx*zz + 2*xx*yz*zx*zy + 2*xy*yx*zx*zz - 2*xy*yy*zy*zz + xy*yz*zx**2 - xy*yz*zy**2 + 2*xz*yx*zx*zy + xz*yy*zx**2 - xz*yy*zy**2)/4 + D[4][l+ -2,l+-2] = -xx*yx*zx*zy - xx*yy*zx**2/2 - xx*yy*zy**2/2 + xx*yy*zz**2 + 2*xx*yz*zy*zz - xy*yx*zx**2/2 - xy*yx*zy**2/2 + xy*yx*zz**2 - xy*yy*zx*zy + 2*xy*yz*zx*zz + 2*xz*yx*zy*zz + 2*xz*yy*zx*zz + 2*xz*yz*zx*zy + D[4][l+ -2,l+-1] = sqrt(2)*(-2*xx*yx*zy*zz - 2*xx*yy*zx*zz - 2*xx*yz*zx*zy - 2*xy*yx*zx*zz - 6*xy*yy*zy*zz - xy*yz*zx**2 - 3*xy*yz*zy**2 + 4*xy*yz*zz**2 - 2*xz*yx*zx*zy - xz*yy*zx**2 - 3*xz*yy*zy**2 + 4*xz*yy*zz**2 + 8*xz*yz*zy*zz)/4 + D[4][l+ -2,l+ 0] = sqrt(5)*(3*xx*yx*zx**2 + xx*yx*zy**2 - 4*xx*yx*zz**2 + 2*xx*yy*zx*zy - 8*xx*yz*zx*zz + 2*xy*yx*zx*zy + xy*yy*zx**2 + 3*xy*yy*zy**2 - 4*xy*yy*zz**2 - 8*xy*yz*zy*zz - 8*xz*yx*zx*zz - 8*xz*yy*zy*zz - 4*xz*yz*zx**2 - 4*xz*yz*zy**2 + 8*xz*yz*zz**2)/10 + D[4][l+ -2,l+ 1] = sqrt(2)*(-6*xx*yx*zx*zz - 2*xx*yy*zy*zz - 3*xx*yz*zx**2 - xx*yz*zy**2 + 4*xx*yz*zz**2 - 2*xy*yx*zy*zz - 2*xy*yy*zx*zz - 2*xy*yz*zx*zy - 3*xz*yx*zx**2 - xz*yx*zy**2 + 4*xz*yx*zz**2 - 2*xz*yy*zx*zy + 8*xz*yz*zx*zz)/4 + D[4][l+ -2,l+ 2] = -xx*yx*zx**2 + xx*yx*zz**2 + 2*xx*yz*zx*zz + xy*yy*zy**2 - xy*yy*zz**2 - 2*xy*yz*zy*zz + 2*xz*yx*zx*zz - 2*xz*yy*zy*zz + xz*yz*zx**2 - xz*yz*zy**2 + D[4][l+ -2,l+ 3] = sqrt(14)*(2*xx*yx*zx*zz - 2*xx*yy*zy*zz + xx*yz*zx**2 - xx*yz*zy**2 - 2*xy*yx*zy*zz - 2*xy*yy*zx*zz - 2*xy*yz*zx*zy + xz*yx*zx**2 - xz*yx*zy**2 - 2*xz*yy*zx*zy)/4 + D[4][l+ -2,l+ 4] = sqrt(7)*(xx*yx*zx**2 - xx*yx*zy**2 - 2*xx*yy*zx*zy - 2*xy*yx*zx*zy - xy*yy*zx**2 + xy*yy*zy**2)/2 + D[4][l+ -1,l+-4] = sqrt(14)*(3*yx*zx**2*zy - yx*zy**3 + yy*zx**3 - 3*yy*zx*zy**2)/4 + D[4][l+ -1,l+-3] = sqrt(7)*(6*yx*zx*zy*zz + 3*yy*zx**2*zz - 3*yy*zy**2*zz + 3*yz*zx**2*zy - yz*zy**3)/4 + D[4][l+ -1,l+-2] = sqrt(2)*(-3*yx*zx**2*zy - yx*zy**3 + 6*yx*zy*zz**2 - yy*zx**3 - 3*yy*zx*zy**2 + 6*yy*zx*zz**2 + 12*yz*zx*zy*zz)/4 + D[4][l+ -1,l+-1] = -3*yx*zx*zy*zz/2 - 3*yy*zx**2*zz/4 - 9*yy*zy**2*zz/4 + yy*zz**3 - 3*yz*zx**2*zy/4 - 3*yz*zy**3/4 + 3*yz*zy*zz**2 + D[4][l+ -1,l+ 0] = sqrt(10)*(3*yx*zx**3 + 3*yx*zx*zy**2 - 12*yx*zx*zz**2 + 3*yy*zx**2*zy + 3*yy*zy**3 - 12*yy*zy*zz**2 - 12*yz*zx**2*zz - 12*yz*zy**2*zz + 8*yz*zz**3)/20 + D[4][l+ -1,l+ 1] = -9*yx*zx**2*zz/4 - 3*yx*zy**2*zz/4 + yx*zz**3 - 3*yy*zx*zy*zz/2 - 3*yz*zx**3/4 - 3*yz*zx*zy**2/4 + 3*yz*zx*zz**2 + D[4][l+ -1,l+ 2] = sqrt(2)*(-yx*zx**3 + 3*yx*zx*zz**2 + yy*zy**3 - 3*yy*zy*zz**2 + 3*yz*zx**2*zz - 3*yz*zy**2*zz)/2 + D[4][l+ -1,l+ 3] = sqrt(7)*(3*yx*zx**2*zz - 3*yx*zy**2*zz - 6*yy*zx*zy*zz + yz*zx**3 - 3*yz*zx*zy**2)/4 + D[4][l+ -1,l+ 4] = sqrt(14)*(yx*zx**3 - 3*yx*zx*zy**2 - 3*yy*zx**2*zy + yy*zy**3)/4 + D[4][l+ 0,l+-4] = sqrt(35)*zx*zy*(zx - zy)*(zx + zy)/2 + D[4][l+ 0,l+-3] = sqrt(70)*zy*zz*(3*zx**2 - zy**2)/4 + D[4][l+ 0,l+-2] = sqrt(5)*zx*zy*(7*zz**2 - 1)/2 + D[4][l+ 0,l+-1] = sqrt(10)*zy*zz*(-3*zx**2 - 3*zy**2 + 4*zz**2)/4 + D[4][l+ 0,l+ 0] = 3*zx**4/8 + 3*zx**2*zy**2/4 - 3*zx**2*zz**2 + 3*zy**4/8 - 3*zy**2*zz**2 + zz**4 + D[4][l+ 0,l+ 1] = sqrt(10)*zx*zz*(-3*zx**2 - 3*zy**2 + 4*zz**2)/4 + D[4][l+ 0,l+ 2] = sqrt(5)*(zx - zy)*(zx + zy)*(7*zz**2 - 1)/4 + D[4][l+ 0,l+ 3] = sqrt(70)*zx*zz*(zx**2 - 3*zy**2)/4 + D[4][l+ 0,l+ 4] = sqrt(35)*(zx**4 - 6*zx**2*zy**2 + zy**4)/8 + D[4][l+ 1,l+-4] = sqrt(14)*(3*xx*zx**2*zy - xx*zy**3 + xy*zx**3 - 3*xy*zx*zy**2)/4 + D[4][l+ 1,l+-3] = sqrt(7)*(6*xx*zx*zy*zz + 3*xy*zx**2*zz - 3*xy*zy**2*zz + 3*xz*zx**2*zy - xz*zy**3)/4 + D[4][l+ 1,l+-2] = sqrt(2)*(-3*xx*zx**2*zy - xx*zy**3 + 6*xx*zy*zz**2 - xy*zx**3 - 3*xy*zx*zy**2 + 6*xy*zx*zz**2 + 12*xz*zx*zy*zz)/4 + D[4][l+ 1,l+-1] = -3*xx*zx*zy*zz/2 - 3*xy*zx**2*zz/4 - 9*xy*zy**2*zz/4 + xy*zz**3 - 3*xz*zx**2*zy/4 - 3*xz*zy**3/4 + 3*xz*zy*zz**2 + D[4][l+ 1,l+ 0] = sqrt(10)*(3*xx*zx**3 + 3*xx*zx*zy**2 - 12*xx*zx*zz**2 + 3*xy*zx**2*zy + 3*xy*zy**3 - 12*xy*zy*zz**2 - 12*xz*zx**2*zz - 12*xz*zy**2*zz + 8*xz*zz**3)/20 + D[4][l+ 1,l+ 1] = -9*xx*zx**2*zz/4 - 3*xx*zy**2*zz/4 + xx*zz**3 - 3*xy*zx*zy*zz/2 - 3*xz*zx**3/4 - 3*xz*zx*zy**2/4 + 3*xz*zx*zz**2 + D[4][l+ 1,l+ 2] = sqrt(2)*(-xx*zx**3 + 3*xx*zx*zz**2 + xy*zy**3 - 3*xy*zy*zz**2 + 3*xz*zx**2*zz - 3*xz*zy**2*zz)/2 + D[4][l+ 1,l+ 3] = sqrt(7)*(3*xx*zx**2*zz - 3*xx*zy**2*zz - 6*xy*zx*zy*zz + xz*zx**3 - 3*xz*zx*zy**2)/4 + D[4][l+ 1,l+ 4] = sqrt(14)*(xx*zx**3 - 3*xx*zx*zy**2 - 3*xy*zx**2*zy + xy*zy**3)/4 + D[4][l+ 2,l+-4] = sqrt(7)*(-xx**3*xy + 3*xx**2*zx*zy + xx*xy**3 + 3*xx*xy*zx**2 - 3*xx*xy*zy**2 - 3*xy**2*zx*zy + yx**3*yy - 3*yx**2*zx*zy - yx*yy**3 - 3*yx*yy*zx**2 + 3*yx*yy*zy**2 + 3*yy**2*zx*zy)/7 + D[4][l+ 2,l+-3] = sqrt(14)*(-3*xx**2*xy*xz + 3*xx**2*zy*zz + 6*xx*xy*zx*zz + 6*xx*xz*zx*zy + xy**3*xz - 3*xy**2*zy*zz + 3*xy*xz*zx**2 - 3*xy*xz*zy**2 + 3*yx**2*yy*yz - 3*yx**2*zy*zz - 6*yx*yy*zx*zz - 6*yx*yz*zx*zy - yy**3*yz + 3*yy**2*zy*zz - 3*yy*yz*zx**2 + 3*yy*yz*zy**2)/14 + D[4][l+ 2,l+-2] = xx**3*xy/7 - 3*xx**2*zx*zy/7 + xx*xy**3/7 - 6*xx*xy*xz**2/7 - 3*xx*xy*zx**2/7 - 3*xx*xy*zy**2/7 + 6*xx*xy*zz**2/7 + 12*xx*xz*zy*zz/7 - 3*xy**2*zx*zy/7 + 12*xy*xz*zx*zz/7 + 6*xz**2*zx*zy/7 - yx**3*yy/7 + 3*yx**2*zx*zy/7 - yx*yy**3/7 + 6*yx*yy*yz**2/7 + 3*yx*yy*zx**2/7 + 3*yx*yy*zy**2/7 - 6*yx*yy*zz**2/7 - 12*yx*yz*zy*zz/7 + 3*yy**2*zx*zy/7 - 12*yy*yz*zx*zz/7 - 6*yz**2*zx*zy/7 + D[4][l+ 2,l+-1] = sqrt(2)*(3*xx**2*xy*xz - 3*xx**2*zy*zz - 6*xx*xy*zx*zz - 6*xx*xz*zx*zy + 3*xy**3*xz - 9*xy**2*zy*zz - 4*xy*xz**3 - 3*xy*xz*zx**2 - 9*xy*xz*zy**2 + 12*xy*xz*zz**2 + 12*xz**2*zy*zz - 3*yx**2*yy*yz + 3*yx**2*zy*zz + 6*yx*yy*zx*zz + 6*yx*yz*zx*zy - 3*yy**3*yz + 9*yy**2*zy*zz + 4*yy*yz**3 + 3*yy*yz*zx**2 + 9*yy*yz*zy**2 - 12*yy*yz*zz**2 - 12*yz**2*zy*zz)/14 + D[4][l+ 2,l+ 0] = sqrt(5)*(-3*xx**4 - 6*xx**2*xy**2 + 24*xx**2*xz**2 + 18*xx**2*zx**2 + 6*xx**2*zy**2 - 24*xx**2*zz**2 + 24*xx*xy*zx*zy - 96*xx*xz*zx*zz - 3*xy**4 + 24*xy**2*xz**2 + 6*xy**2*zx**2 + 18*xy**2*zy**2 - 24*xy**2*zz**2 - 96*xy*xz*zy*zz - 8*xz**4 - 24*xz**2*zx**2 - 24*xz**2*zy**2 + 48*xz**2*zz**2 + 3*yx**4 + 6*yx**2*yy**2 - 24*yx**2*yz**2 - 18*yx**2*zx**2 - 6*yx**2*zy**2 + 24*yx**2*zz**2 - 24*yx*yy*zx*zy + 96*yx*yz*zx*zz + 3*yy**4 - 24*yy**2*yz**2 - 6*yy**2*zx**2 - 18*yy**2*zy**2 + 24*yy**2*zz**2 + 96*yy*yz*zy*zz + 8*yz**4 + 24*yz**2*zx**2 + 24*yz**2*zy**2 - 48*yz**2*zz**2)/140 + D[4][l+ 2,l+ 1] = sqrt(2)*(3*xx**3*xz - 9*xx**2*zx*zz + 3*xx*xy**2*xz - 6*xx*xy*zy*zz - 4*xx*xz**3 - 9*xx*xz*zx**2 - 3*xx*xz*zy**2 + 12*xx*xz*zz**2 - 3*xy**2*zx*zz - 6*xy*xz*zx*zy + 12*xz**2*zx*zz - 3*yx**3*yz + 9*yx**2*zx*zz - 3*yx*yy**2*yz + 6*yx*yy*zy*zz + 4*yx*yz**3 + 9*yx*yz*zx**2 + 3*yx*yz*zy**2 - 12*yx*yz*zz**2 + 3*yy**2*zx*zz + 6*yy*yz*zx*zy - 12*yz**2*zx*zz)/14 + D[4][l+ 2,l+ 2] = xx**4/14 - 3*xx**2*xz**2/7 - 3*xx**2*zx**2/7 + 3*xx**2*zz**2/7 + 12*xx*xz*zx*zz/7 - xy**4/14 + 3*xy**2*xz**2/7 + 3*xy**2*zy**2/7 - 3*xy**2*zz**2/7 - 12*xy*xz*zy*zz/7 + 3*xz**2*zx**2/7 - 3*xz**2*zy**2/7 - yx**4/14 + 3*yx**2*yz**2/7 + 3*yx**2*zx**2/7 - 3*yx**2*zz**2/7 - 12*yx*yz*zx*zz/7 + yy**4/14 - 3*yy**2*yz**2/7 - 3*yy**2*zy**2/7 + 3*yy**2*zz**2/7 + 12*yy*yz*zy*zz/7 - 3*yz**2*zx**2/7 + 3*yz**2*zy**2/7 + D[4][l+ 2,l+ 3] = sqrt(14)*(-xx**3*xz + 3*xx**2*zx*zz + 3*xx*xy**2*xz - 6*xx*xy*zy*zz + 3*xx*xz*zx**2 - 3*xx*xz*zy**2 - 3*xy**2*zx*zz - 6*xy*xz*zx*zy + yx**3*yz - 3*yx**2*zx*zz - 3*yx*yy**2*yz + 6*yx*yy*zy*zz - 3*yx*yz*zx**2 + 3*yx*yz*zy**2 + 3*yy**2*zx*zz + 6*yy*yz*zx*zy)/14 + D[4][l+ 2,l+ 4] = sqrt(7)*(-xx**4 + 6*xx**2*xy**2 + 6*xx**2*zx**2 - 6*xx**2*zy**2 - 24*xx*xy*zx*zy - xy**4 - 6*xy**2*zx**2 + 6*xy**2*zy**2 + yx**4 - 6*yx**2*yy**2 - 6*yx**2*zx**2 + 6*yx**2*zy**2 + 24*yx*yy*zx*zy + yy**4 + 6*yy**2*zx**2 - 6*yy**2*zy**2)/28 + D[4][l+ 3,l+-4] = sqrt(2)*(xx**3*zy + 3*xx**2*xy*zx - 3*xx*xy**2*zy - 3*xx*yx**2*zy - 6*xx*yx*yy*zx + 3*xx*yy**2*zy - xy**3*zx - 3*xy*yx**2*zx + 6*xy*yx*yy*zy + 3*xy*yy**2*zx)/4 + D[4][l+ 3,l+-3] = 3*xx**2*xy*zz/4 + 3*xx**2*xz*zy/4 + 3*xx*xy*xz*zx/2 - 3*xx*yx*yy*zz/2 - 3*xx*yx*yz*zy/2 - 3*xx*yy*yz*zx/2 - xy**3*zz/4 - 3*xy**2*xz*zy/4 - 3*xy*yx**2*zz/4 - 3*xy*yx*yz*zx/2 + 3*xy*yy**2*zz/4 + 3*xy*yy*yz*zy/2 - 3*xz*yx**2*zy/4 - 3*xz*yx*yy*zx/2 + 3*xz*yy**2*zy/4 + D[4][l+ 3,l+-2] = sqrt(14)*(-xx**3*zy - 3*xx**2*xy*zx - 3*xx*xy**2*zy + 12*xx*xy*xz*zz + 6*xx*xz**2*zy + 3*xx*yx**2*zy + 6*xx*yx*yy*zx + 3*xx*yy**2*zy - 12*xx*yy*yz*zz - 6*xx*yz**2*zy - xy**3*zx + 6*xy*xz**2*zx + 3*xy*yx**2*zx + 6*xy*yx*yy*zy - 12*xy*yx*yz*zz + 3*xy*yy**2*zx - 6*xy*yz**2*zx - 12*xz*yx*yy*zz - 12*xz*yx*yz*zy - 12*xz*yy*yz*zx)/28 + D[4][l+ 3,l+-1] = sqrt(7)*(-3*xx**2*xy*zz - 3*xx**2*xz*zy - 6*xx*xy*xz*zx + 6*xx*yx*yy*zz + 6*xx*yx*yz*zy + 6*xx*yy*yz*zx - 3*xy**3*zz - 9*xy**2*xz*zy + 12*xy*xz**2*zz + 3*xy*yx**2*zz + 6*xy*yx*yz*zx + 9*xy*yy**2*zz + 18*xy*yy*yz*zy - 12*xy*yz**2*zz + 4*xz**3*zy + 3*xz*yx**2*zy + 6*xz*yx*yy*zx + 9*xz*yy**2*zy - 24*xz*yy*yz*zz - 12*xz*yz**2*zy)/28 + D[4][l+ 3,l+ 0] = sqrt(70)*(3*xx**3*zx + 3*xx**2*xy*zy - 12*xx**2*xz*zz + 3*xx*xy**2*zx - 12*xx*xz**2*zx - 9*xx*yx**2*zx - 6*xx*yx*yy*zy + 24*xx*yx*yz*zz - 3*xx*yy**2*zx + 12*xx*yz**2*zx + 3*xy**3*zy - 12*xy**2*xz*zz - 12*xy*xz**2*zy - 3*xy*yx**2*zy - 6*xy*yx*yy*zx - 9*xy*yy**2*zy + 24*xy*yy*yz*zz + 12*xy*yz**2*zy + 8*xz**3*zz + 12*xz*yx**2*zz + 24*xz*yx*yz*zx + 12*xz*yy**2*zz + 24*xz*yy*yz*zy - 24*xz*yz**2*zz)/140 + D[4][l+ 3,l+ 1] = sqrt(7)*(-3*xx**3*zz - 9*xx**2*xz*zx - 3*xx*xy**2*zz - 6*xx*xy*xz*zy + 12*xx*xz**2*zz + 9*xx*yx**2*zz + 18*xx*yx*yz*zx + 3*xx*yy**2*zz + 6*xx*yy*yz*zy - 12*xx*yz**2*zz - 3*xy**2*xz*zx + 6*xy*yx*yy*zz + 6*xy*yx*yz*zy + 6*xy*yy*yz*zx + 4*xz**3*zx + 9*xz*yx**2*zx + 6*xz*yx*yy*zy - 24*xz*yx*yz*zz + 3*xz*yy**2*zx - 12*xz*yz**2*zx)/28 + D[4][l+ 3,l+ 2] = sqrt(14)*(-xx**3*zx + 3*xx**2*xz*zz + 3*xx*xz**2*zx + 3*xx*yx**2*zx - 6*xx*yx*yz*zz - 3*xx*yz**2*zx + xy**3*zy - 3*xy**2*xz*zz - 3*xy*xz**2*zy - 3*xy*yy**2*zy + 6*xy*yy*yz*zz + 3*xy*yz**2*zy - 3*xz*yx**2*zz - 6*xz*yx*yz*zx + 3*xz*yy**2*zz + 6*xz*yy*yz*zy)/14 + D[4][l+ 3,l+ 3] = xx**3*zz/4 + 3*xx**2*xz*zx/4 - 3*xx*xy**2*zz/4 - 3*xx*xy*xz*zy/2 - 3*xx*yx**2*zz/4 - 3*xx*yx*yz*zx/2 + 3*xx*yy**2*zz/4 + 3*xx*yy*yz*zy/2 - 3*xy**2*xz*zx/4 + 3*xy*yx*yy*zz/2 + 3*xy*yx*yz*zy/2 + 3*xy*yy*yz*zx/2 - 3*xz*yx**2*zx/4 + 3*xz*yx*yy*zy/2 + 3*xz*yy**2*zx/4 + D[4][l+ 3,l+ 4] = sqrt(2)*(xx**3*zx - 3*xx**2*xy*zy - 3*xx*xy**2*zx - 3*xx*yx**2*zx + 6*xx*yx*yy*zy + 3*xx*yy**2*zx + xy**3*zy + 3*xy*yx**2*zy + 6*xy*yx*yy*zx - 3*xy*yy**2*zy)/4 + D[4][l+ 4,l+-4] = xx**3*xy/2 - 3*xx**2*yx*yy/2 - xx*xy**3/2 - 3*xx*xy*yx**2/2 + 3*xx*xy*yy**2/2 + 3*xy**2*yx*yy/2 + yx**3*yy/2 - yx*yy**3/2 + D[4][l+ 4,l+-3] = sqrt(2)*(3*xx**2*xy*xz - 3*xx**2*yy*yz - 6*xx*xy*yx*yz - 6*xx*xz*yx*yy - xy**3*xz + 3*xy**2*yy*yz - 3*xy*xz*yx**2 + 3*xy*xz*yy**2 + 3*yx**2*yy*yz - yy**3*yz)/4 + D[4][l+ 4,l+-2] = sqrt(7)*(-xx**3*xy + 3*xx**2*yx*yy - xx*xy**3 + 6*xx*xy*xz**2 + 3*xx*xy*yx**2 + 3*xx*xy*yy**2 - 6*xx*xy*yz**2 - 12*xx*xz*yy*yz + 3*xy**2*yx*yy - 12*xy*xz*yx*yz - 6*xz**2*yx*yy - yx**3*yy - yx*yy**3 + 6*yx*yy*yz**2)/14 + D[4][l+ 4,l+-1] = sqrt(14)*(-3*xx**2*xy*xz + 3*xx**2*yy*yz + 6*xx*xy*yx*yz + 6*xx*xz*yx*yy - 3*xy**3*xz + 9*xy**2*yy*yz + 4*xy*xz**3 + 3*xy*xz*yx**2 + 9*xy*xz*yy**2 - 12*xy*xz*yz**2 - 12*xz**2*yy*yz - 3*yx**2*yy*yz - 3*yy**3*yz + 4*yy*yz**3)/28 + D[4][l+ 4,l+ 0] = sqrt(35)*(3*xx**4 + 6*xx**2*xy**2 - 24*xx**2*xz**2 - 18*xx**2*yx**2 - 6*xx**2*yy**2 + 24*xx**2*yz**2 - 24*xx*xy*yx*yy + 96*xx*xz*yx*yz + 3*xy**4 - 24*xy**2*xz**2 - 6*xy**2*yx**2 - 18*xy**2*yy**2 + 24*xy**2*yz**2 + 96*xy*xz*yy*yz + 8*xz**4 + 24*xz**2*yx**2 + 24*xz**2*yy**2 - 48*xz**2*yz**2 + 3*yx**4 + 6*yx**2*yy**2 - 24*yx**2*yz**2 + 3*yy**4 - 24*yy**2*yz**2 + 8*yz**4)/280 + D[4][l+ 4,l+ 1] = sqrt(14)*(-3*xx**3*xz + 9*xx**2*yx*yz - 3*xx*xy**2*xz + 6*xx*xy*yy*yz + 4*xx*xz**3 + 9*xx*xz*yx**2 + 3*xx*xz*yy**2 - 12*xx*xz*yz**2 + 3*xy**2*yx*yz + 6*xy*xz*yx*yy - 12*xz**2*yx*yz - 3*yx**3*yz - 3*yx*yy**2*yz + 4*yx*yz**3)/28 + D[4][l+ 4,l+ 2] = sqrt(7)*(-xx**4 + 6*xx**2*xz**2 + 6*xx**2*yx**2 - 6*xx**2*yz**2 - 24*xx*xz*yx*yz + xy**4 - 6*xy**2*xz**2 - 6*xy**2*yy**2 + 6*xy**2*yz**2 + 24*xy*xz*yy*yz - 6*xz**2*yx**2 + 6*xz**2*yy**2 - yx**4 + 6*yx**2*yz**2 + yy**4 - 6*yy**2*yz**2)/28 + D[4][l+ 4,l+ 3] = sqrt(2)*(xx**3*xz - 3*xx**2*yx*yz - 3*xx*xy**2*xz + 6*xx*xy*yy*yz - 3*xx*xz*yx**2 + 3*xx*xz*yy**2 + 3*xy**2*yx*yz + 6*xy*xz*yx*yy + yx**3*yz - 3*yx*yy**2*yz)/4 + D[4][l+ 4,l+ 4] = xx**4/8 - 3*xx**2*xy**2/4 - 3*xx**2*yx**2/4 + 3*xx**2*yy**2/4 + 3*xx*xy*yx*yy + xy**4/8 + 3*xy**2*yx**2/4 - 3*xy**2*yy**2/4 + yx**4/8 - 3*yx**2*yy**2/4 + yy**4/8 + + if lmax > 4: + raise NotImplementedError(f'Too a big {lmax=}') + + return D def Dmatrix_for_z(z, lmax, order='xyz'): - return Dmatrix(new_xy_axis(z), lmax, order) + """Generate Wigner D-matrices for rotation that aligns z-axis with given vector. + + Wrapper function that combines new_xy_axis() and Dmatrix() to compute + rotation matrices for a rotation defined only by the target z-direction. + Args: + z (numpy ndarray): 3D vector defining the target z-axis direction. + lmax (int): Maximum angular momentum (supports lmax <= 4). + order (str): Ordering convention for l=1 spherical harmonics. Defaults to 'xyz'. + + Returns: + list: List of Wigner D-matrices for l=0 to lmax. + """ + return Dmatrix(new_xy_axis(z), lmax, order) diff --git a/qstack/spahm/rho/__init__.py b/qstack/spahm/rho/__init__.py index e69de29b..f4346753 100644 --- a/qstack/spahm/rho/__init__.py +++ b/qstack/spahm/rho/__init__.py @@ -0,0 +1 @@ +"""Atom- and bond-based SPAHM module.""" diff --git a/qstack/spahm/rho/__main__.py b/qstack/spahm/rho/__main__.py index f81982ec..d8745bad 100644 --- a/qstack/spahm/rho/__main__.py +++ b/qstack/spahm/rho/__main__.py @@ -1,4 +1,7 @@ +"""Command-line entry point for SPAHM(a,b) computation.""" + from .compute_rho_spahm import main + if __name__ == "__main__": main() diff --git a/qstack/spahm/rho/atom.py b/qstack/spahm/rho/atom.py index c4885897..0ba3bfa4 100644 --- a/qstack/spahm/rho/atom.py +++ b/qstack/spahm/rho/atom.py @@ -1,3 +1,5 @@ +"""Legacy command-line entry point for SPAHM(a) computations.""" + import numpy as np from qstack import compound from .compute_rho_spahm import get_repr @@ -5,6 +7,17 @@ def main(args=None): + """Command-line interface for computing SPAHM(a) atomic representations. + + Computes atom-centered SPAHM representations for a single molecule from an XYZ file. + The representation is based on fitted atomic densities from a guess Hamiltonian. + + Args: + args (list, optional): Command-line arguments. If None, uses sys.argv. Defaults to None. + + Output: + Saves representation to numpy file specified by --name argument. + """ parser = SpahmParser(description='This program computes the SPAHM(a) representation for a given molecular system', atom=True) parser.add_argument('--mol', dest='mol', required=True, type=str, help="the path to the xyz file with the molecular structure") parser.add_argument('--charge', dest='charge', default=0, type=int, help='total charge of the system (default: 0)') diff --git a/qstack/spahm/rho/atomic_density.py b/qstack/spahm/rho/atomic_density.py index 53856cb8..1208ba55 100644 --- a/qstack/spahm/rho/atomic_density.py +++ b/qstack/spahm/rho/atomic_density.py @@ -1,9 +1,32 @@ +"""Atomic density computation.""" + import numpy as np from qstack import compound, fields from . import lowdin def fit(mol, dm, aux_basis, short=False, w_slicing=True, only_i=None): + """Create atomic density representations using Löwdin partitioning and density fitting. + + Decomposes the molecular density matrix into atomic contributions using Löwdin + orthogonalization, then fits each atomic density onto auxiliary basis set. + + Args: + mol (pyscf Mole): pyscf Mole object. + dm (numpy ndarray): 2D density matrix in AO basis. + aux_basis (str or dict): Auxiliary basis set for density fitting. + short (bool): If True, returns only diagonal blocks (atom-centered coefficients). + Defaults to False. + w_slicing (bool): If True, uses block-diagonal Coulomb matrix (faster). + Defaults to True. + only_i (list or None): List of atom indices to compute. If None, computes all atoms. + Defaults to None. + + Returns: + list or numpy ndarray: Density fitting coefficients for each atom. + - If short=False: list of 1D arrays (full aux basis per atom) + - If short=True: 1D array (concatenated atom-centered coefficients only) + """ L = lowdin.Lowdin_split(mol, dm) if only_i is not None and len(only_i) > 0: @@ -34,9 +57,10 @@ def fit(mol, dm, aux_basis, short=False, w_slicing=True, only_i=None): a_dfs.append(c_a) if short: - cc = [] - for i, c in zip(auxmol.aoslice_by_atom()[:,2:], a_dfs, strict=True): - cc.append(c[i[0]:i[1]]) - return np.hstack(cc) + if only_i is not None and len(only_i) > 0: + aoslice_by_atom = auxmol.aoslice_by_atom()[only_i,2:] + else: + aoslice_by_atom = auxmol.aoslice_by_atom()[:,2:] + return [c[i0:i1] for (i0, i1), c in zip(aoslice_by_atom, a_dfs, strict=True)] return a_dfs diff --git a/qstack/spahm/rho/bond.py b/qstack/spahm/rho/bond.py index 9b02b5eb..e332a495 100644 --- a/qstack/spahm/rho/bond.py +++ b/qstack/spahm/rho/bond.py @@ -1,3 +1,5 @@ +"""Legacy command-line entry point for SPAHM(b) computations.""" + import os import numpy as np from qstack.tools import correct_num_threads @@ -7,6 +9,17 @@ def main(args=None): + """Command-line interface for computing SPAHM(b) bond representations. + + Computes bond-centered SPAHM representations for molecules or molecular datasets. + Representations capture chemical bonding information using density fitting on bond centers. + + Args: + args (list, optional): Command-line arguments. If None, uses sys.argv. Defaults to None. + + Output: + Saves representations to numpy files with names based on --name argument. + """ parser = SpahmParser(description='This program computes the SPAHM(b) representation for a given molecular system or a list of thereof', unified=True, bond=True) args = parser.parse_args(args=args) if args.print>0: @@ -61,5 +74,6 @@ def main(args=None): else: np.save(args.name_out + mod_suffix, modvec) + if __name__ == "__main__": main() diff --git a/qstack/spahm/rho/bond_selected.py b/qstack/spahm/rho/bond_selected.py index 929fb13d..90b30e84 100644 --- a/qstack/spahm/rho/bond_selected.py +++ b/qstack/spahm/rho/bond_selected.py @@ -1,3 +1,5 @@ +"""Representation for a specific bond in a molecule.""" + import os import numpy as np from . import utils, dmb_rep_bond as dmbb, lowdin @@ -10,7 +12,29 @@ def get_spahm_b_selected(mols, bondidx, xyzlist, readdm=None, guess=defaults.guess, xc=defaults.xc, spin=None, cutoff=defaults.cutoff, printlevel=0, omods=defaults.omod, bpath=defaults.bpath, only_m0=False, same_basis=False): + """Compute SPAHM(b) representations for specific bonds in molecules. + + Generates bond-centered representations for user-specified atom pairs across + a dataset of molecules, useful for targeted bond analysis. + + Args: + mols (list): List of pyscf Mole objects. + bondidx (numpy ndarray): 2D array (nmols, 2) of 0-indexed atom pairs defining bonds. + xyzlist (list): List of XYZ filenames corresponding to mols. + readdm (str, optional): Directory to load pre-computed density matrices. Defaults to None. + guess (str): Guess Hamiltonian method name. Defaults to defaults.guess. + xc (str): Exchange-correlation functional. Defaults to defaults.xc. + spin (numpy ndarray, optional): Array of numbers of unpaired electrons per molecule. Defaults to None. + cutoff (float): Maximum bond distance in Å. Defaults to defaults.cutoff. + printlevel (int): Verbosity level. Defaults to 0. + omods (list): Open-shell modes (e.g. 'alpha', 'beta'). Defaults to defaults.omod. + bpath (str): Path to bond basis set directory. Defaults to defaults.bpath. + only_m0 (bool): Use only m=0 basis functions. Defaults to False. + same_basis (bool): Use generic CC.bas for all pairs. Defaults to False. + Returns: + list: List of (filename, representation) tuples for each specified bond. + """ if spin is None or (spin == None).all(): omods = [None] @@ -36,6 +60,18 @@ def get_spahm_b_selected(mols, bondidx, xyzlist, def main(): + """Command-line interface for computing SPAHM(b) representations for specific bonds. + + Reads a file listing XYZ structures and bond indices, computes representations + for each specified bond, and saves them to individual files. The input file format + is: XYZ_path atom1_index atom2_index (1-indexed). + + Args: + None: Parses command-line arguments. + + Output: + Saves bond representations to numpy files in specified directory. + """ parser = SpahmParser(description='This program computes the SPAHM(b) representation for a list of bonds', bond=True) parser.remove_argument('elements') parser.remove_argument('only_z') diff --git a/qstack/spahm/rho/compute_rho_spahm.py b/qstack/spahm/rho/compute_rho_spahm.py index 582ccf43..234d632a 100644 --- a/qstack/spahm/rho/compute_rho_spahm.py +++ b/qstack/spahm/rho/compute_rho_spahm.py @@ -1,7 +1,9 @@ +"""Main computation routines for SPAHM(a,b) representations.""" + import os import itertools import numpy as np -from qstack.tools import correct_num_threads +from qstack.tools import correct_num_threads, slice_generator from . import utils, dmb_rep_bond as dmbb from . import dmb_rep_atom as dmba from .utils import defaults @@ -13,125 +15,126 @@ def spahm_a_b(rep_type, mols, dms, elements=None, only_m0=False, zeros=False, printlevel=0, auxbasis=defaults.auxbasis, model=defaults.model, pairfile=None, dump_and_exit=False, same_basis=False, only_z=None): - """ Computes SPAHM(a,b) representations for a set of molecules. + """Compute SPAHM(a) or SPAHM(b) representations for a set of molecules. + + Reference: + K. R. Briling, Y. Calvino Alonso, A. Fabrizio, C. Corminboeuf, + "SPAHM(a,b): Encoding the density information from guess Hamiltonian in quantum machine learning representations", + J. Chem. Theory Comput. 20 1108–1117 (2024), doi:10.1021/acs.jctc.3c01040. Args: - - rep_type (str) : the representation type ('atom' or 'bond' centered) - - mols (list): the list of molecules (pyscf.Mole objects) - - dms (list of numpy.ndarray): list of guess density matrices for each molecule - - bpath (str): path to the directory containing bond-optimized basis-functions (.bas) - - cutoff (float): the cutoff distance (angstrom) between atoms to be considered as bond - - omods (list of str): the selected mode for open-shell computations - - elements (list of str): list of all elements present in the set of molecules - - only_m0 (bool): use only basis functions with `m=0` - - zeros (bool): add zeros features for non-existing bond pairs - - printlevel (int): level of verbosity - - pairfile (str): path to the pairfile (if None, atom pairs are detected automatically) - - dump_and_exit (bool): to save pairfile for the set of molecules (without generating representaitons) - - same_basis (bool): to use the same bond-optimized basis function for all atomic pairs (ZZ.bas == CC.bas for any Z) - - only_z (list of str): restrict the atomic representations to atom types in this list + rep_type (str): Representation type: 'atom' for SPAHM(a) or 'bond' for SPAHM(b). + mols (list): List of pyscf Mole objects. + dms (list): List of density matrices (2D or 3D numpy arrays) for each molecule. + bpath (str): Directory path containing bond-optimized basis files (.bas) for SPAHM(b). Defaults to defaults.bpath. + cutoff (float): Bond cutoff distance in Å for SPAHM(b). Defaults to defaults.cutoff. + omods (list): Open-shell modes ('alpha', 'beta', 'sum', 'diff'). Defaults to defaults.omod. + elements (list, optional): Element symbols present in dataset. Auto-detected if None. Defaults to None. + only_m0 (bool): Use only m=0 angular momentum component for SPAHM(b). Defaults to False. + zeros (bool): Pad with zeros for non-existent bond pairs in SPAHM(b). Defaults to False. + printlevel (int): Verbosity level (0=silent, >0=verbose). Defaults to 0. + auxbasis (str): Auxiliary basis set for SPAHM(a). Defaults to defaults.auxbasis. + model (str): Atomic density fitting model for SPAHM(a). Defaults to defaults.model. + pairfile (str, optional): Path to atom pair file for SPAHM(b). Atom pairs are computed from mols if None. Defaults to None. + dump_and_exit (bool): Save atom pair file for SPAHM(b) to pairfile and exit without computing. Defaults to False. + same_basis (bool): Use generic CC.bas for all atom pairs for SPAHM(b). Defaults to False. + only_z (list, optional): Restrict to specific atom types. Defaults to None. Returns: - A numpy.ndarray with the atomic spahm-b representations for each molecule (Nmods,Nmolecules,NatomMax,Nfeatures). - with: - Nmods: the alpha and beta components of the representation - - Nmolecules: the number of molecules in the set - - NatomMax: the maximum number of atoms in one molecule - - Nfeatures: the number of features (for each omods) + numpy ndarray: 4D array (n_omods, n_mols, max_atoms, n_features) where: + - n_omods: Number of open-shell components (1 for closed-shell, len(omods) for open-shell) + - n_mols: Number of molecules in dataset + - max_atoms: Maximum number of atoms/bonds across all molecules + - n_features: Representation dimension """ - maxlen = 0 # This needs fixing `UnboundLocalError` if only_z is None: only_z = [] + if len(only_z) > 0: + print(f"Selecting atom-types in {only_z}") + natm = max(sum(sum(z==np.array(mol.elements)) for z in only_z) for mol in mols) + else: + natm = max(mol.natm for mol in mols) + if rep_type == 'bond': elements, mybasis, qqs0, qqs4q, idx, M = dmbb.read_basis_wrapper(mols, bpath, only_m0, printlevel, elements=elements, cutoff=cutoff, pairfile=pairfile, dump_and_exit=dump_and_exit, same_basis=same_basis) qqs = qqs0 if zeros else qqs4q - maxlen = max([dmbb.bonds_dict_init(qqs[q0], M)[1] for q0 in elements]) + maxlen = max(dmbb.bonds_dict_init(qqs[q0], M)[1] for q0 in elements) elif rep_type == 'atom': if elements is None: elements = set() for mol in mols: elements.update(mol.elements) elements = sorted(set(elements)) - df_wrapper, sym_wrapper = dmba.get_model(model) + df_wrapper, sym_wrapper, maxlen_fn = dmba.get_model(model) ao, ao_len, idx, M = dmba.get_basis_info(elements, auxbasis) - maxlen = sum([len(v) for v in idx.values()]) + maxlen = maxlen_fn(idx, idx.keys() if len(only_z)==0 else only_z) - if len(only_z) > 0: - print(f"Selecting atom-types in {only_z}") - zinmols = [] - for mol in mols: - zinmol = [sum(z == np.array(mol.elements)) for z in only_z] - zinmols.append(sum(zinmol)) - natm = max(zinmols) - else: - natm = max([mol.natm for mol in mols]) - zinmols = [mol.natm for mol in mols] allvec = np.zeros((len(omods), len(mols), natm, maxlen)) for imol, (mol, dm) in enumerate(zip(mols, dms, strict=True)): if printlevel>0: print('mol', imol, flush=True) - if len(only_z) >0: + if len(only_z)>0: only_i = [i for i,z in enumerate(mol.elements) if z in only_z] else: only_i = range(mol.natm) for iomod, omod in enumerate(omods): DM = utils.dm_open_mod(dm, omod) - vec = None # This too !!! (maybe a wrapper or dict) if rep_type == 'bond': vec = dmbb.repr_for_mol(mol, DM, qqs, M, mybasis, idx, maxlen, cutoff, only_z=only_z) elif rep_type == 'atom': c_df = df_wrapper(mol, DM, auxbasis, only_i=only_i) - vec = sym_wrapper(c_df, mol, idx, ao, ao_len, M, elements) + vec = sym_wrapper(maxlen, c_df, mol.elements, idx, ao, ao_len, M, only_i) allvec[iomod,imol,:len(vec)] = vec - return allvec + def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=None, pairfile=None, dump_and_exit=False, same_basis=True, bpath=defaults.bpath, cutoff=defaults.cutoff, omods=defaults.omod, elements=None, only_m0=False, zeros=False, split=False, printlevel=0, auxbasis=defaults.auxbasis, model=defaults.model, with_symbols=False, only_z=None, merge=True): - """ Computes and reshapes an array of SPAHM(a,b) representations + """Compute and reshapes SPAHM(a) or SPAHM(b) representations with flexible output formats. + + High-level interface that handles density matrix computation, representation generation, + and output formatting including splitting, symbol labeling, and merging options. Args: - - rep_type (str) : the representation type ('atom' or 'bond' centered) - - mols (list): the list of molecules (pyscf.Mole objects) - - xyzlist (list of str): list with the paths to the xyz files - - guess (str): the guess Hamiltonian - - xc (str): the exchange-correlation functionals - - dms (list of numpy.ndarray): list of guess density matrices for each molecule - - readdm (str): path to the .npy file containins density matrices - - bpath (str): path to the directory containing bond-optimized basis-functions (.bas) - - cutoff (float): the cutoff distance (angstrom) between atoms to be considered as bond - - omods (list of str): the selected mode for open-shell computations - - spin (list of int): list of spins for each molecule - - elements (list of str): list of all elements present in the set of molecules - - only_m0 (bool): use only basis functions with `m=0` - - zeros (bool): add zeros features for non-existing bond pairs - - printlevel (int): level of verbosity - - pairfile (str): path to the pairfile (if None, atom pairs are detected automatically) - - dump_and_exit (bool): to save pairfile for the set of molecules (without generating representaitons) - - same_basis (bool): to use the same bond-optimized basis function for all atomic pairs (ZZ.bas == CC.bas for any Z) - - only_z (list of str): restrict the atomic representations to atom types in this list - - split (bool): to split the final array into molecules - - with_symbols (bool): to associate atomic symbol to representations in final array - - merge (bool): to concatenate alpha and beta representations to a single feature vector + rep_type (str): Representation type ('atom' or 'bond'). + mols (list): List of pyscf Mole objects. + xyzlist (list): List of XYZ file paths corresponding to mols. + guess (str): Guess Hamiltonian name. + xc (str): Exchange-correlation functional. Defaults to defaults.xc. + spin (list, optional): List of spin multiplicities per molecule. Defaults to None. + readdm (str, optional): Directory path to load pre-computed density matrices. Defaults to None. + pairfile (str, optional): Path to atom pair file for SPAHM(b). Defaults to None. + dump_and_exit (bool): Save atom pair file for SPAHM(b) to pairfile and exit without computing. Defaults to False. + same_basis (bool): Use generic CC.bas for all atom pairs for SPAHM(b). Defaults to False. + bpath (str): Directory path containing bond-optimized basis files (.bas) for SPAHM(b). Defaults to defaults.bpath. + cutoff (float): Bond cutoff distance in Å for SPAHM(b). Defaults to defaults.cutoff. + omods (list): Open-shell modes ('alpha', 'beta', 'sum', 'diff'). Defaults to defaults.omod. + elements (list, optional): Element symbols in dataset. Auto-detected if None. Defaults to None. + only_m0 (bool): Use only m=0 angular momentum component for SPAHM(b). Defaults to False. + zeros (bool): Pad with zeros for non-existent bond pairs in SPAHM(b). Defaults to False. + split (bool): Split output by molecule. Defaults to False. + printlevel (int): Verbosity level. Defaults to 0. + auxbasis (str): Auxiliary basis for SPAHM(a). Defaults to defaults.auxbasis. + model (str): Atomic density fitting model for SPAHM(a). Defaults to defaults.model. + with_symbols (bool): Include atomic symbols with representations. Defaults to False. + only_z (list, optional): Restrict to specific atom types. Defaults to None. + merge (bool): Merge alpha/beta into single vector. Defaults to True. Returns: - A numpy.ndarray with all representations with shape (Nmods,Nmolecules,Natoms,Nfeatures) - with: - - Nmods: the alpha and beta components of the representation - - Nmolecules: the number of molecules in the set - - Natoms: the number of atoms in one molecule - - Nfeatures: the number of features (for each omod) - reshaped according to: - - if split==False: collapses Nmolecules and returns a single np.ndarray (Nmods,Natoms,Nfeatures) (where Natoms is the total number of atoms in the set of molecules) - - if merge==True: collapses the Nmods axis into the Nfeatures axis - - if with_symbols==True: returns (for each molecule (Natoms, 2) containging the atom symbols along 1st dim and one of the above arrays + numpy ndarray: Representation array with shape depending on options: + - Base: (n_omods, n_mols, max_atoms, n_features) + - If split=False: (n_omods, total_atoms, n_features) - all molecules concatenated + - If merge=True: Features concatenated, omods dimension removed + - If with_symbols=True: Object array with (symbol, vector) tuples per atom + - If split=True and with_symbols=True: List format per molecule """ if not dump_and_exit: dms = utils.mols_guess(mols, xyzlist, guess, xc=xc, spin=spin, readdm=readdm, printlevel=printlevel) @@ -143,7 +146,7 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= else: all_atoms = [mol.elements for mol in mols] - spin = np.array(spin) ## a bit dirty but couldn't find a better way to ensure Iterable type! + spin = np.array(spin) # a bit dirty but couldn't find a better way to ensure Iterable type! if (spin == None).all(): omods = [None] @@ -188,30 +191,39 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= ], dtype=object) else: - natm_tot = sum(len(elems) for elems in all_atoms) - allvec_new = np.empty_like(allvec, shape=(len(omods), natm_tot, maxlen)) - atm_i = 0 - for mol_i, elems in enumerate(all_atoms): - allvec_new[:, atm_i:atm_i+len(elems), :] = allvec[:, mol_i, :len(elems), :] - atm_i += len(elems) + all_atoms_list = list(itertools.chain.from_iterable(all_atoms)) + allvec_new = np.empty_like(allvec, shape=(len(omods), len(all_atoms_list), maxlen)) + for (mol_i, elems), slice_i in slice_generator([*enumerate(all_atoms)], inc=lambda x: len(x[1])): + allvec_new[:, slice_i, :] = allvec[:, mol_i, :len(elems), :] allvec = allvec_new del allvec_new - all_atoms = list(itertools.chain.from_iterable(all_atoms)) if merge: allvec = np.hstack(allvec) if with_symbols: - allvec = np.array(list(zip(all_atoms, allvec, strict=True)), dtype=object) + allvec = np.array(list(zip(all_atoms_list, allvec, strict=True)), dtype=object) else: if with_symbols: allvec = np.array([ - np.array(list(zip(all_atoms, modvec, strict=True)), dtype=object) + np.array(list(zip(all_atoms_list, modvec, strict=True)), dtype=object) for modvec in allvec ], dtype=object) return allvec + def main(args=None): + """Command-line interface for computing SPAHM representations (atom or bond centered). + + Unified CLI that supports both SPAHM(a) and SPAHM(b) computations with extensive + options for molecular datasets, splitting, and output formatting. + + Args: + args (list, optional): Command-line arguments. If None, uses sys.argv. Defaults to None. + + Output: + Saves representations to numpy files based on --name argument and options. + """ parser = SpahmParser(description='This program computes the SPAHM(a,b) representations for a given molecular system or a list thereof', unified=True, atom=True, bond=True) parser.add_argument('--rep', dest='rep', type=str, choices=['atom', 'bond'], required=True, help='the type of representation') args = parser.parse_args(args=args) @@ -267,5 +279,6 @@ def main(args=None): else: np.save(args.name_out + mod_suffix, modvec) + if __name__ == "__main__": main() diff --git a/qstack/spahm/rho/dmb_rep_atom.py b/qstack/spahm/rho/dmb_rep_atom.py index 58e14a26..b6432627 100644 --- a/qstack/spahm/rho/dmb_rep_atom.py +++ b/qstack/spahm/rho/dmb_rep_atom.py @@ -1,10 +1,35 @@ +"""Functions for SPAHM(a) computation. + +Implements various models: pure, SAD-diff, occupation-corrected, Löwdin partitioning. + +Provides: + models_dict: Dictionary of available models. +""" + import numpy as np import pyscf from qstack import compound, fields from . import sym, atomic_density, lowdin +from qstack.tools import slice_generator def get_basis_info(atom_types, auxbasis): + """Gather auxiliary basis information for all atom types. + + Computes overlap matrices, basis function indices, and metric matrices + needed for atomic density fitting. + + Args: + atom_types (list): List of element symbols (e.g., ['C', 'H', 'O']). + auxbasis (str or dict): Auxiliary basis set specification. + + Returns: + tuple: (ao, ao_len, idx, M) where: + - ao (dict): Angular momentum info per element. + - ao_len (dict): Basis set size per element. + - idx (dict): Pair indices for symmetrization per element. + - M (dict): Metric matrices (2D numpy ndarray) per element. + """ ao = {} idx = {} M = {} @@ -18,28 +43,46 @@ def get_basis_info(atom_types, auxbasis): def _make_models_dict(): - def df_pure(mol, dm, auxbasis): - return fields.decomposition.decompose(mol, dm, auxbasis)[1] + """Create a dictionary of available SPAHM(a) models. + + Defines density fitting functions for each model. + + Returns: + dict: Mapping model names to (density_fitting_function, symmetrization_function, maxlen_function). + """ + def df_pure(mol, dm, auxbasis, only_i): + """Pure density fitting without modifications.""" + auxmol, c = fields.decomposition.decompose(mol, dm, auxbasis) + return sym.c_split_atom(auxmol, c, only_i=only_i) - def df_sad_diff(mol, dm, auxbasis): + def df_sad_diff(mol, dm, auxbasis, only_i=None): + """Density fitting on difference from superposition of atomic densities (SAD).""" mf = pyscf.scf.RHF(mol) dm_sad = mf.init_guess_by_atom(mol) + if dm_sad.ndim==3: + dm_sad = dm_sad.sum(axis=0) dm = dm - dm_sad - return fields.decomposition.decompose(mol, dm, auxbasis)[1] + auxmol, c = fields.decomposition.decompose(mol, dm, auxbasis) + return sym.c_split_atom(auxmol, c, only_i=only_i) def df_lowdin_long(mol, dm, auxbasis, only_i=None): + """Löwdin partitioning with block-diagonal slicing with contributions from other elements.""" return atomic_density.fit(mol, dm, auxbasis, only_i=only_i) def df_lowdin_short(mol, dm, auxbasis, only_i=None): + """Löwdin partitioning with block-diagonal slicing.""" return atomic_density.fit(mol, dm, auxbasis, short=True, only_i=only_i) def df_lowdin_long_x(mol, dm, auxbasis, only_i=None): + """Löwdin partitioning with contributions from other elements.""" return atomic_density.fit(mol, dm, auxbasis, w_slicing=False, only_i=only_i) def df_lowdin_short_x(mol, dm, auxbasis, only_i=None): + """Löwdin partitioning.""" return atomic_density.fit(mol, dm, auxbasis, short=True, w_slicing=False, only_i=only_i) - def df_occup(mol, dm, auxbasis): + def df_occup(mol, dm, auxbasis, only_i=None): + """Pure density fitting with preserving atom charges.""" L = lowdin.Lowdin_split(mol, dm) diag = np.diag(L.dmL) Q = np.array([sum(diag[start:stop]) for (start, stop) in mol.aoslice_nr_by_atom()[:,2:]]) @@ -47,62 +90,167 @@ def df_occup(mol, dm, auxbasis): eri2c, eri3c = fields.decomposition.get_integrals(mol, auxmol)[1:] c0 = fields.decomposition.get_coeff(dm, eri2c, eri3c) c = fields.decomposition.correct_N_atomic(auxmol, Q, c0, metric=eri2c) - return c - - models_dict = {'pure' : [df_pure, coefficients_symmetrize_short ], - 'sad-diff' : [df_sad_diff, coefficients_symmetrize_short ], - 'occup' : [df_occup, coefficients_symmetrize_short ], - 'lowdin-short' : [df_lowdin_short, coefficients_symmetrize_short ], - 'lowdin-long' : [df_lowdin_long, coefficients_symmetrize_long ], - 'lowdin-short-x': [df_lowdin_short_x, coefficients_symmetrize_short ], - 'lowdin-long-x' : [df_lowdin_long_x, coefficients_symmetrize_long ], - 'mr2021' : [df_pure, coefficients_symmetrize_MR2021]} + return sym.c_split_atom(auxmol, c, only_i=only_i) + + def maxlen_long(idx, _): + return sum(len(v) for v in idx.values()) + + def maxlen_short(idx, elements): + return max(len(idx[q]) for q in elements) + + def maxlen_MR2021(idx, elements): + return max(len(np.unique(idx[q][:,0])) for q in elements) + + models_dict = {'pure' : (df_pure, coefficients_symmetrize_short , maxlen_short ), + 'sad-diff' : (df_sad_diff, coefficients_symmetrize_short , maxlen_short ), + 'occup' : (df_occup, coefficients_symmetrize_short , maxlen_short ), + 'lowdin-short' : (df_lowdin_short, coefficients_symmetrize_short , maxlen_short ), + 'lowdin-long' : (df_lowdin_long, coefficients_symmetrize_long , maxlen_long ), + 'lowdin-short-x': (df_lowdin_short_x, coefficients_symmetrize_short , maxlen_short ), + 'lowdin-long-x' : (df_lowdin_long_x, coefficients_symmetrize_long , maxlen_long ), + 'mr2021' : (df_pure, coefficients_symmetrize_MR2021, maxlen_MR2021 )} return models_dict def get_model(arg): + """Return density fitting and symmetrization functions for specified model. + + Args: + arg (str): Model name. Available options: + - 'pure': Pure density fitting + - 'occup': Occupation-corrected density fitting. + - 'sad-diff': Superposition of Atomic Densities difference. + - 'lowdin-short': Short Löwdin partitioning with slicing. + - 'lowdin-long': Long Löwdin partitioning with slicing. + - 'lowdin-short-x': Short Löwdin. + - 'lowdin-long-x': Long Löwdin. + - 'mr2021': Method from Margraf & Reuter 2021. + + Returns: + tuple: (density_fitting_function, symmetrization_function, maxlen_function). + - density_fitting_function (callable): Function performing density fitting. + + Args: + mol (pyscf Mole): Molecule object. + dm (numpy ndarray): Density matrix (2D). + auxbasis (str or dict): Auxiliary basis set. + only_i (list[int]): List of atom indices to use. + + Returns: + list: Density fitting coefficients per atom (1D numpy ndarrays). + + - symmetrization_function (callable): Function for symmetrizing coefficients. + + Args: + maxlen (int): Maximum feature length. + c (numpy ndarray): Density fitting coefficients (1D). + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). + idx (dict): Pair indices per element. + ao (dict): Angular momentum info per element. + ao_len (dict): Basis set sizes per element. + M (dict): Metric matrices per element (2D numpy ndarrays). + only_i (list[int]): List of atom indices to use. + + Returns: + numpy ndarray: Symmetrized atomic feature vectors. + + - maxlen_function (callable): Function computing max. feature size. + + Args: + idx (dict): Pair indices per element. + elements (list[str]): Elements for which representation is computed. + + Returns: + int: Maximum feature length. + + Raises: + RuntimeError: If model name is not recognized. + """ arg = arg.lower() if arg not in models_dict: raise RuntimeError(f'Unknown model. Available models: {list(models_dict.keys())}') return models_dict[arg] -def coefficients_symmetrize_MR2021(c, mol, idx, ao, ao_len, _M, _): - # J. T. Margraf and K. Reuter, Nat. Commun. 12, 344 (2021). - v = [] - i0 = 0 - for q in mol.elements: - n = ao_len[q] - v.append(sym.vectorize_c_MR2021(idx[q], ao[q], c[i0:i0+n])) - i0 += n +def coefficients_symmetrize_MR2021(maxlen, c, atoms, idx, ao, _, _M, only_i): + """Symmetrize density fitting coefficients using MR2021 method. + + Reference: + J. T. Margraf, K. Reuter, + "Pure non-local machine-learned density functional theory for electron correlation", + Nat. Commun. 12, 344 (2021), doi:10.1038/s41467-020-20471-y. + + Args: + maxlen (int): Maximum feature length. + c (list): List of coefficient arrays per atom. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). + idx (dict): Pair indices per element. + ao (dict): Angular momentum info per element. + _: Unused (for interface compatibility). + _M: Unused (for interface compatibility). + only_i (list[int]): List of atom indices to use. + + Returns: + numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. + """ + if only_i is not None and len(only_i)>0: + atoms = np.array(atoms)[only_i] + v = np.zeros((len(atoms), maxlen)) + for iat, (q, ci) in enumerate(zip(atoms, c, strict=True)): + vi = sym.vectorize_c_MR2021(idx[q], ao[q], ci) + v[iat,:len(vi)] = vi return v -def coefficients_symmetrize_short(c, mol, idx, ao, ao_len, M, _): - # short lowdin / everything else - v = [] - i0 = 0 - for q in mol.elements: - n = ao_len[q] - v.append(M[q] @ sym.vectorize_c_short(idx[q], ao[q], c[i0:i0+n])) - i0 += n - maxlen = sum([len(v) for v in idx.values()]) - v = np.array([np.pad(x, (0, maxlen-len(x)), constant_values=0) for x in v]) +def coefficients_symmetrize_short(maxlen, c, atoms, idx, ao, _, M, only_i): + """Symmetrize coefficients for each atom. + + For each atom, use contributions from the said atom. + + Args: + maxlen (int): Maximum feature length. + c (list): List of coefficient arrays per atom. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). + idx (dict): Pair indices per element. + ao (dict): Angular momentum info per element. + _: Unused (for interface compatibility). + M (dict): Metric matrices per element. + only_i (list[int]): List of atom indices to use. + + Returns: + numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. + """ + if only_i is not None and len(only_i)>0: + atoms = np.array(atoms)[only_i] + v = np.zeros((len(atoms), maxlen)) + for iat, (q, ci) in enumerate(zip(atoms, c, strict=True)): + v[iat,:len(idx[q])] = M[q] @ sym.vectorize_c_short(idx[q], ao[q], ci) return v -def coefficients_symmetrize_long(c_df, mol, idx, ao, ao_len, M, atom_types): - # long lowdin - vectors = [] - for c_a in c_df: - v_atom = {q: np.zeros(len(idx[q])) for q in atom_types} - i0 = 0 - for q in mol.elements: - n = ao_len[q] - v_atom[q] += M[q] @ sym.vectorize_c_short(idx[q], ao[q], c_a[i0:i0+n]) - i0 += n - v_a = np.hstack([v_atom[q] for q in atom_types]) - vectors.append(v_a) +def coefficients_symmetrize_long(maxlen, c_df, atoms, idx, ao, ao_len, M, _): + """Symmetrize coefficients for long Löwdin models. + + For each atom, use contributions from the said atom as well as all other atoms. + + Args: + maxlen (int): Maximum feature length. + c_df (list): List of coefficient arrays per atom. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). + idx (dict): Pair indices per element. + ao (dict): Angular momentum info per element. + ao_len (dict): Basis set sizes per element. + M (dict): Metric matrices per element. + _: Unused (for interface compatibility). + + Returns: + numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. + """ + vectors = np.zeros((len(c_df), maxlen)) + feature_slice = dict(slice_generator(idx.keys(), inc=lambda q: len(idx[q]))) + for iat, c_a in enumerate(c_df): + for q, ao_slice in slice_generator(atoms, inc=lambda q: ao_len[q]): + vectors[iat,feature_slice[q]] += M[q] @ sym.vectorize_c_short(idx[q], ao[q], c_a[ao_slice]) return vectors diff --git a/qstack/spahm/rho/dmb_rep_bond.py b/qstack/spahm/rho/dmb_rep_bond.py index 33e17c77..4f26b635 100644 --- a/qstack/spahm/rho/dmb_rep_bond.py +++ b/qstack/spahm/rho/dmb_rep_bond.py @@ -1,3 +1,5 @@ +"""Functions for SPAHM(b) computation.""" + import operator from ast import literal_eval import numpy as np @@ -8,10 +10,32 @@ def make_bname(q0, q1): + """Create canonical bond name from two element symbols. + + Args: + q0 (str): First element symbol. + q1 (str): Second element symbol. + + Returns: + str: Concatenated element symbols in alphabetical order (e.g., 'CH', 'CC', 'NO'). + """ return operator.concat(*sorted((q0, q1))) def get_basis_info(qqs, mybasis, only_m0, printlevel): + """Compute basis indices and metric matrices for bond pairs. + + Args: + qqs (list): List of bond pair names (e.g., ['CC', 'CH', 'OH']). + mybasis (dict): Dictionary mapping bond names to basis set dictionaries. + only_m0 (bool): If True, use only m=0 angular momentum components. + printlevel (int): Verbosity level. + + Returns: + tuple: (idx, M) where: + - idx (dict): Pair indices for each bond type (list of [i, j] pairs) + - M (dict): Metric matrices for each bond type (numpy 2D ndarray) + """ idx = {} M = {} for qq in qqs: @@ -27,6 +51,16 @@ def get_basis_info(qqs, mybasis, only_m0, printlevel): def read_df_basis(bnames, bpath, same_basis=False): + """Load bond-optimized basis sets from .bas files. + + Args: + bnames (list): List of bond pair names (e.g., ['CC', 'CH']). + bpath (str): Directory path containing .bas files. + same_basis (bool): If True, uses generic CC.bas for all pairs. Defaults to False. + + Returns: + dict: Dictionary mapping bond names to basis set dictionaries. + """ mybasis = {} for bname in bnames: if bname in mybasis: @@ -38,6 +72,18 @@ def read_df_basis(bnames, bpath, same_basis=False): def get_element_pairs(elements): + """Generate all possible element pair combinations. + + Creates complete list of bond types assuming all elements can bond with each other. + + Args: + elements (list): List of element symbols. + + Returns: + tuple: (qqs, qqs4q) where: + - qqs (list): Sorted list of unique bond pair names + - qqs4q (dict): Maps each element to its list of possible bond partners + """ qqs = [] qqs4q = {} for q1 in elements: @@ -52,6 +98,23 @@ def get_element_pairs(elements): def get_element_pairs_cutoff(elements, mols, cutoff, align=False): + """Determine element pairs based on actual distances in molecules. + + Identifies which element pairs actually form bonds within the distance cutoff + by scanning molecular geometries. + + Args: + elements (list): List of element symbols to consider. + mols (list): List of pyscf Mole objects. + cutoff (float): Maximum bond distance in Å. + align (bool): If True, includes all element pairs regardless of distance. + Defaults to False. + + Returns: + tuple: (qqs, qqs4q) where: + - qqs (list): Sorted list of bond pair names found within cutoff + - qqs4q (dict): Maps each element to its list of bond partners + """ qqs4q = {q: [] for q in elements} qqs = [] if align: @@ -82,6 +145,25 @@ def get_element_pairs_cutoff(elements, mols, cutoff, align=False): def read_basis_wrapper_pairs(mols, bondidx, bpath, only_m0, printlevel, same_basis=False): + """Read basis sets and computes metric matrices for specified bond pairs. + + Processes bond pairs from molecular structures and loads their corresponding + basis sets from disk, then computes basis indices and metric matrices. + + Args: + mols (list): List of pyscf Mole objects. + bondidx (list): List of bond index pairs [(i0, i1), ...] for each molecule. + bpath (str): Directory path containing basis set files. + only_m0 (bool): If True, use only m=0 angular momentum components. + printlevel (int): Verbosity level for output (>1 for detailed printing). + same_basis (bool): If True, uses generic CC.bas for all pairs. Defaults to False. + + Returns: + tuple: (mybasis, idx, M) where: + - mybasis (dict): Bond name to basis set dictionary mapping, + - idx (dict): Pair indices (list of [i, j] pairs) for each bond type, + - M (dict): Metric matrices (2D numpy ndarray) for each bond type. + """ qqs0 = [make_bname(*map(mol.atom_symbol, bondij)) for (bondij, mol) in zip(bondidx, mols, strict=True)] qqs0 = sorted(set(qqs0)) if printlevel>1: @@ -92,6 +174,34 @@ def read_basis_wrapper_pairs(mols, bondidx, bpath, only_m0, printlevel, same_bas def read_basis_wrapper(mols, bpath, only_m0, printlevel, cutoff=None, elements=None, pairfile=None, dump_and_exit=False, same_basis=False): + """Read basis sets for all element pairs present in molecules. + + Determines which element pairs exist (either all possible or within cutoff distance), + loads corresponding basis sets, and computes metric matrices and indices. + Can cache pair information to file for subsequent runs. + + Args: + mols (list): List of pyscf Mole objects to analyze. + bpath (str): Directory path containing basis set files. + only_m0 (bool): If True, use only m=0 angular momentum components. + printlevel (int): Verbosity level for output (>1 for detailed printing). + cutoff (float, optional): Maximum bond distance in Å for pair detection. + If None, considers all element combinations. Defaults to None. + elements (list, optional): List of element symbols to consider. If None, + extracts all elements from molecules. Defaults to None. + pairfile (str, optional): Path to save/load element pair information. Defaults to None. + dump_and_exit (bool): If True, saves pair information and exits. Defaults to False. + same_basis (bool): If True, uses generic CC.bas for all pairs. Defaults to False. + + Returns: + tuple: (elements, mybasis, qqs, qqs4q, idx, M) where: + - elements (list): Sorted list of element symbols. + - mybasis (dict): Bond name to basis set dictionary mapping. + - qqs (dict): Maps each element to list of all bond pair names. + - qqs4q (dict): Maps each element to its specific bond partners. + - idx (dict): Pair indices (list of [i, j] pairs) for each bond type. + - M (dict): Metric matrices (2D numpy ndarray) for each bond type. + """ if elements is None: elements = sorted({q for mol in mols for q in mol.elements}) @@ -114,7 +224,22 @@ def read_basis_wrapper(mols, bpath, only_m0, printlevel, cutoff=None, elements=N idx, M = get_basis_info(qqs0, mybasis, only_m0, printlevel) return elements, mybasis, qqs, qqs4q, idx, M + def bonds_dict_init(qqs, M): + """Initialize storage for bond representations. + + Creates a dictionary with zero-initialized arrays for each bond type, + with array sizes matching the corresponding metric matrix dimensions. + + Args: + qqs (list): List of bond pair names (e.g., ['CC', 'CH', 'OH']). + M (dict): Dictionary mapping bond names to metric matrices. + + Returns: + tuple: (mybonds, N) where: + - mybonds (dict): Bond name to zero-initialized numpy array mapping. + - N (int): Total number of basis functions across all bond types. + """ N = 0 mybonds = {} for qq in qqs: @@ -125,6 +250,21 @@ def bonds_dict_init(qqs, M): def fit_dm(dm, mol, mybasis, ri0, ri1): + """Fit density matrix using auxiliary basis functions at bond center. + + Decomposes the bond density matrix into auxiliary basis coefficients + centered at the bond midpoint, then splits coefficients by angular momentum. + + Args: + dm (numpy.ndarray): Density matrix for the bond. + mol (pyscf.gto.Mole): Molecule object containing the bond. + mybasis (dict): Basis set dictionary for the bond type. + ri0 (numpy.ndarray): Coordinates of first atom in Å. + ri1 (numpy.ndarray): Coordinates of second atom in Å. + + Returns: + list: Coefficients split by angular momentum quantum number [(l, coeff), ...]. + """ rm = (ri0+ri1)*0.5 atom = f"No {rm[0]} {rm[1]} {rm[2]}" auxmol = gto.M(atom=atom, basis=mybasis) @@ -135,6 +275,21 @@ def fit_dm(dm, mol, mybasis, ri0, ri1): def vec_from_cs(z, cs, lmax, idx): + """Rotate basis coefficients to bond axis and creates vectorized representation. + + Applies Wigner D-matrix rotation to align coefficients with the bond vector + (same as pretending the bond is along the z-axis), + ensuring rotational invariance, then vectorizes using symmetry indices. + + Args: + z (numpy.ndarray): Bond vector (displacement from one atom to another). + cs (list): Angular momentum decomposed coefficients [(l, coeff), ...]. + lmax (int): Maximum angular momentum quantum number. + idx (dict): Pair indices mapping for symmetrization. + + Returns: + numpy.ndarray: Rotationally invariant vectorized representation of the bond. + """ D = Dmatrix_for_z(z, lmax) c_new = rotate_c(D, cs) v = sym.vectorize_c(idx, c_new) @@ -142,6 +297,28 @@ def vec_from_cs(z, cs, lmax, idx): def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): + """Compute bond representation for a specific atom pair. + + Extracts bond density, fits it with basis functions at the bond center, + and symmetrizes the representation from both atom perspectives. + + Args: + i0 (int): Index of first atom. + i1 (int): Index of second atom. + L (Lowdin_split): Löwdin-split density matrix object. + mybasis (dict): Bond basis sets keyed by bond names. + idx (dict): Pair indices for symmetrization. + q (list): Element symbols for all atoms. + r (numpy ndarray): Atomic coordinates in Å. + cutoff (float): Maximum bond distance. + + Returns: + tuple: ([v0, v1], bname) where: + - v0: Representation from atom i0's perspective. + - v1: Representation from atom i1's perspective. + - bname: Bond name (e.g., 'CH'). + Returns (None, None) if distance exceeds cutoff. + """ q0, q1 = q[i0], q[i1] r0, r1 = r[i0], r[i1] z = r1-r0 @@ -150,14 +327,34 @@ def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): dm1 = L.get_bond(i0, i1) bname = make_bname(q0, q1) cs = fit_dm(dm1, L.mol, mybasis[bname], r0, r1) - lmax = max([c[0] for c in cs]) + lmax = max(c[0] for c in cs) v0 = vec_from_cs(+z, cs, lmax, idx[bname]) v1 = vec_from_cs(-z, cs, lmax, idx[bname]) return [v0, v1], bname def repr_for_mol(mol, dm, qqs, M, mybasis, idx, maxlen, cutoff, only_z=None): + """Compute SPAHM(b) representations for all atoms in a molecule. + + Constructs bond-based atomic representations by summing contributions from + all bonds of the same type within cutoff distance. + + Args: + mol (pyscf.gto.Mole): Molecule object. + dm (numpy.ndarray): Molecular density matrix. + qqs (dict): Maps each element to list of bond pair names it can form. + M (dict): Metric matrices for each bond type. + mybasis (dict): Bond basis sets keyed by bond names. + idx (dict): Pair indices for symmetrization of each bond type. + maxlen (int): Maximum representation length for zero-padding. + cutoff (float): Maximum bond distance in Å to consider. + only_z (list, optional): If provided, compute representations only for atoms + with these element symbols. Defaults to None (all atoms). + Returns: + numpy.ndarray: Array of shape (n_atoms, maxlen) containing atom representations, + where each row is a zero-padded SPAHM(b) vector. + """ if only_z is None: only_z = [] diff --git a/qstack/spahm/rho/lowdin.py b/qstack/spahm/rho/lowdin.py index 8e9b1800..23b4ff18 100644 --- a/qstack/spahm/rho/lowdin.py +++ b/qstack/spahm/rho/lowdin.py @@ -1,30 +1,71 @@ +"""Löwdin orthogonalization for density matrix partitioning.""" + import numpy as np + class Lowdin_split: + """Löwdin orthogonalization for density matrix partitioning. + + Transforms density matrix to orthogonal basis using symmetric orthogonalization, + enabling clean atomic and bond partitioning of electron density. + + Attributes: + S (numpy ndarray): Overlap matrix in AO basis. + S12 (numpy ndarray): Square root of overlap matrix (S^{1/2}). + S12i (numpy ndarray): Inverse square root of overlap matrix (S^{-1/2}). + mol (pyscf Mole): pyscf Mole object. + dm (numpy ndarray): Original density matrix in AO basis. + dmL (numpy ndarray): Löwdin-orthogonalized density matrix. + """ + def __init__(self, mol, dm): + """Initialize Löwdin split with molecule and density matrix. + + Args: + mol (pyscf Mole): pyscf Mole object. + dm (numpy ndarray): Density matrix in AO basis. + """ + S = mol.intor_symmetric('int1e_ovlp') + S12,S12i = self.sqrtm(S) + self.S = S + self.S12 = S12 + self.S12i = S12i + self.mol = mol + self.dm = dm + self.dmL = S12 @ dm @ S12 + + def sqrtm(self, m): + """Compute matrix square root and inverse square root via eigendecomposition. + + Args: + m (numpy ndarray): Symmetric positive-definite matrix. + + Returns: + tuple: (m^{1/2}, m^{-1/2}) both symmetrized. + """ + e,b = np.linalg.eigh(m) + e = np.sqrt(e) + sm = b @ np.diag(e ) @ b.T + sm1 = b @ np.diag(1.0/e) @ b.T + return (sm+sm.T)*0.5, (sm1+sm1.T)*0.5 + + def get_bond(self, at1idx, at2idx): + """Extract bond density matrix for an atom pair. + + Isolates the density matrix components corresponding to interactions + between two atoms, transforming back to AO basis. + + Args: + at1idx (int): Index of first atom. + at2idx (int): Index of second atom. - def __init__(self, mol, dm): - S = mol.intor_symmetric('int1e_ovlp') - S12,S12i = self.sqrtm(S) - self.S = S - self.S12 = S12 - self.S12i = S12i - self.mol = mol - self.dm = dm - self.dmL = S12 @ dm @ S12 - - def sqrtm(self, m): - e,b = np.linalg.eigh(m) - e = np.sqrt(e) - sm = b @ np.diag(e ) @ b.T - sm1 = b @ np.diag(1.0/e) @ b.T - return (sm+sm.T)*0.5, (sm1+sm1.T)*0.5 - - def get_bond(self, at1idx, at2idx): - mo1idx = range(*self.mol.aoslice_nr_by_atom()[at1idx][2:]) - mo2idx = range(*self.mol.aoslice_nr_by_atom()[at2idx][2:]) - ix1 = np.ix_(mo1idx,mo2idx) - ix2 = np.ix_(mo2idx,mo1idx) - dmL_bond = np.zeros_like(self.dmL) - dmL_bond[ix1] = self.dmL[ix1] - dmL_bond[ix2] = self.dmL[ix2] - return self.S12i @ dmL_bond @ self.S12i + Returns: + numpy ndarray: Bond density matrix in AO basis (2D array). + """ + mo1idx = range(*self.mol.aoslice_nr_by_atom()[at1idx][2:]) + mo2idx = range(*self.mol.aoslice_nr_by_atom()[at2idx][2:]) + ix1 = np.ix_(mo1idx,mo2idx) + ix2 = np.ix_(mo2idx,mo1idx) + dmL_bond = np.zeros_like(self.dmL) + dmL_bond[ix1] = self.dmL[ix1] + dmL_bond[ix2] = self.dmL[ix2] + return self.S12i @ dmL_bond @ self.S12i diff --git a/qstack/spahm/rho/parser.py b/qstack/spahm/rho/parser.py index 4a5f740d..528d2604 100644 --- a/qstack/spahm/rho/parser.py +++ b/qstack/spahm/rho/parser.py @@ -1,10 +1,23 @@ +"""Command-line argument parser for SPAHM(a,b) main functions.""" + import argparse +from qstack.tools import FlexParser from .utils import defaults, omod_fns_dict from .dmb_rep_atom import models_dict from ..guesses import guesses_dict -class SpahmParser(argparse.ArgumentParser): +class SpahmParser(FlexParser): + """Custom argument parser for SPAHM command-line tools. + + Provides pre-configured argument sets for atomic and bond SPAHM computations. + + Args: + unified (bool): Enable unified file/list interface. Defaults to False. + atom (bool): Add atom-specific arguments (auxbasis, model). Defaults to False. + bond (bool): Add bond-specific arguments (cutoff, bpath, etc.). Defaults to False. + **kwargs: Additional arguments passed to ArgumentParser. + """ def __init__(self, unified=False, atom=False, bond=False, **kwargs): super().__init__(formatter_class=argparse.ArgumentDefaultsHelpFormatter, **kwargs) parser = self @@ -37,18 +50,3 @@ def __init__(self, unified=False, atom=False, bond=False, **kwargs): parser.add_argument('--pairfile', dest='pairfile', default=None, type=str, help='path to the atom pair file') parser.add_argument('--dump_and_exit', dest='dump_and_exit', action='store_true', help='write the atom pair file and exit if --pairfile is set') parser.add_argument('--same_basis', dest='same_basis', action='store_true', help='if to use generic CC.bas basis file for all atom pairs (Default: uses pair-specific basis, if exists)') - - - def remove_argument(parser, arg): - for action in parser._actions: - opts = action.option_strings - if (opts and opts[0] == arg) or action.dest == arg: - parser._remove_action(action) - break - - for action in parser._action_groups: - for group_action in action._group_actions: - opts = group_action.option_strings - if (opts and opts[0] == arg) or group_action.dest == arg: - action._group_actions.remove(group_action) - return diff --git a/qstack/spahm/rho/sym.py b/qstack/spahm/rho/sym.py index a413bb74..a1aad895 100644 --- a/qstack/spahm/rho/sym.py +++ b/qstack/spahm/rho/sym.py @@ -1,16 +1,66 @@ +"""Symmetry operations for SPAHM(a,b) representations.""" + import numpy as np from qstack import compound from qstack.mathutils.matrix import sqrtm +from qstack.reorder import get_mrange + + +def c_split_atom(mol, c, only_i=None): + """Split coefficient vector by angular momentum quantum number for each atom. + + Organizes expansion coefficients into sublists grouped by angular momentum (l) + for each atomic basis function. + + Args: + mol (pyscf Mole): pyscf Mole object. + c (numpy ndarray): 1D array of expansion coefficients. + only_i (list[int]): List of atom indices to use. + + Returns: + list: List of coefficients (numpy ndarrays) per atom. + """ + if only_i is None or len(only_i)==0: + aoslice_by_atom = mol.aoslice_by_atom()[:,2:] + else: + aoslice_by_atom = mol.aoslice_by_atom()[only_i,2:] + return [c[i0:i1] for i0, i1 in aoslice_by_atom] def idxl0(i, l, ao): - # return the index of the basis function with the same L and N but M=0 + """Return index of basis function with same L and N quantum numbers but M=0. + + Finds the m=0 component of the same angular momentum shell. + + Args: + i (int): Basis function index. + l (int): Angular momentum quantum number. + ao (dict): Angular momentum info dict with 'l' and 'm' keys. + + Returns: + int: Index of corresponding m=0 basis function. + """ if l != 1: return i - ao['m'][i]+l else: return i + [0, 2, 1][ao['m'][i]] + def get_S(q, basis): + """Compute overlap matrix and angular momentum info for an atom. + + Creates single-atom molecule and extracts basis function structure. + + Args: + q (str): Element symbol. + basis (str or dict): Basis set. + + Returns: + tuple: (S, ao, ao_start) where: + - S (numpy ndarray): Overlap matrix + - ao (dict): Angular momentum info with 'l' and 'm' lists for each AO + - ao_start (list): Starting indices for each angular momentum shell + """ mol = compound.make_atom(q, basis) S = mol.intor_symmetric('int1e_ovlp') @@ -18,27 +68,45 @@ def get_S(q, basis): ao = {'l': [], 'm': []} for l in l_per_bas: - msize = 2*l+1 - ao['l'].extend([l]*msize) - if l != 1: - ao['m'].extend(np.arange(msize)-l) - else: - ao['m'].extend([1, -1, 0]) # x, y, z + ao['l'].extend([l]*(2*l+1)) + ao['m'].extend(get_mrange(l)) return S, ao, ao_start def store_pair_indices(ao): + """Store basis function pair indices with matching L and M quantum numbers. + + Creates list of all (i,j) pairs where basis functions have identical angular momenta. + + Args: + ao (dict): Angular momentum info with 'l' and 'm' keys. + + Returns: + numpy ndarray: [i, j] index pairs with matching (l, m). + """ idx = [] for i, [li, mi] in enumerate(zip(ao['l'], ao['m'], strict=True)): for j, [lj, mj] in enumerate(zip(ao['l'], ao['m'], strict=True)): if (li!=lj) or (mi!=mj): continue idx.append([i, j]) - return idx + return np.array(idx) def store_pair_indices_short(ao, ao_start): + """Store basis function pair indices for m=0 components only. + + Creates list of (i,j) pairs using only the first basis function (m=0) + of each angular momentum shell, for compact representation. + + Args: + ao (dict): Angular momentum info with 'l' and 'm' keys. + ao_start (list): Starting indices for each angular momentum shell. + + Returns: + numpy ndarray: [i, j] index pairs for m=0 components with matching L. + """ idx = [] for i in ao_start: for j in ao_start: @@ -47,10 +115,25 @@ def store_pair_indices_short(ao, ao_start): if li!=lj: continue idx.append([i, j]) - return idx + return np.array(idx) def metric_matrix(q, idx, ao, S): + """Compute metric matrix for symmetrization of density fitting coefficients. + + Constructs metric matrix from overlap integrals of basis function pairs, + normalized by angular momentum degeneracy (2l+1). Returns square root + for transformation to orthonormal representation. + + Args: + q (str): Element symbol key for angular momentum info. + idx (numpy ndarray): [i, j] basis function pair indices. + ao (dict): Angular momentum info dict with nested structure ao[q]. + S (numpy ndarray): Overlap matrix. + + Returns: + numpy ndarray: Square root of metric matrix. + """ N = len(idx) A = np.zeros((N,N)) for p in range(N): @@ -59,7 +142,7 @@ def metric_matrix(q, idx, ao, S): i1, j1 = idx[p1] l = ao['l'][i] l1 = ao['l'][i1] - if(l!=l1): + if l!=l1: continue A[p1,p] = A[p,p1] = 1.0/(2*l+1) \ * S[idxl0(i, l, ao[q]), idxl0(i1, l, ao[q])] \ @@ -68,6 +151,16 @@ def metric_matrix(q, idx, ao, S): def metric_matrix_short(idx, ao, S): + """Compute metric matrix for symmetrization of short-format coefficients. + + Args: + idx (numpy ndarray): [i, j] basis function pair indices. + ao (dict): Angular momentum info. + S (numpy ndarray): Overlap matrix. + + Returns: + numpy ndarray: Square root of metric matrix. + """ N = len(idx) A = np.zeros((N,N)) for p in range(N): @@ -76,13 +169,24 @@ def metric_matrix_short(idx, ao, S): i1,j1 = idx[p1] l = ao['l'][i] l1 = ao['l'][i1] - if(l!=l1): + if l!=l1: continue A[p1,p] = A[p,p1] = S[i,i1] * S[j,j1] / (2*l+1) return sqrtm(A) def vectorize_c(idx, c): + """Vectorizes density fitting coefficients by forming products. + + Creates rotationally invariant representation from coefficient products. + + Args: + idx (numpy ndarray): [i, j] basis function pair indices. + c (numpy ndarray): 1D array of coefficients. + + Returns: + numpy ndarray: 1D array of coefficient products c[i]*c[j]. + """ v = np.zeros(len(idx)) for p, (i,j) in enumerate(idx): v[p] = c[i]*c[j] @@ -90,7 +194,25 @@ def vectorize_c(idx, c): def vectorize_c_MR2021(idx_pair, ao, c): - idx = sorted(set(np.array(idx_pair)[:,0])) + """Vectorizes coefficients using MR2021 scheme. + + Reference: + J. T. Margraf, K. Reuter, + "Pure non-local machine-learned density functional theory for electron correlation", + Nat. Commun. 12, 344 (2021), doi:10.1038/s41467-020-20471-y. + + Computes simplified rotationally invariant representation by contracting coefficients + within each angular momentum shell. + + Args: + idx_pair (numpy ndarray): [i, j] basis function pair indices. + ao (dict): Angular momentum info with 'l' and 'm' keys. + c (numpy ndarray): 1D array of density fitting coefficients. + + Returns: + numpy ndarray: 1D array of contracted coefficient norms per shell. + """ + idx = np.unique(idx_pair[:,0]) v = np.zeros(len(idx)) for p,i in enumerate(idx): l = ao['l'][i] @@ -100,6 +222,18 @@ def vectorize_c_MR2021(idx_pair, ao, c): def vectorize_c_short(idx, ao, c): + """Vectorizes coefficients using short format with shell-wise dot products. + + Computes representation by contracting coefficient vectors of angular momentum shells. + + Args: + idx (numpy ndarray): [i, j] basis function pair indices (shell starts). + ao (dict): Angular momentum info with 'l' and 'm' keys. + c (numpy ndarray): 1D array of density fitting coefficients. + + Returns: + numpy ndarray: 1D array of shell-pair dot products. + """ v = np.zeros(len(idx)) for p, [i,j] in enumerate(idx): l = ao['l'][i] @@ -109,16 +243,37 @@ def vectorize_c_short(idx, ao, c): def store_pair_indices_z(ao): + """Store basis function pairs with matching |m| quantum numbers. + + Creates list of all (i,j) pairs where basis functions have equal + absolute values of magnetic quantum number m. + + Args: + ao (dict): Angular momentum info with 'l' and 'm' keys. + + Returns: + numpy ndarray: [i, j] index pairs with |m_i| = |m_j|. + """ idx = [] for i, mi in enumerate(ao['m']): for j, mj in enumerate(ao['m']): if abs(mi)!=abs(mj): continue idx.append([i,j]) - return idx + return np.array(idx) def store_pair_indices_z_only0(ao): + """Store basis function pairs restricted to m=0 components only. + + Creates list of all (i,j) pairs where both basis functions have m=0. + + Args: + ao (dict): Angular momentum info with 'l' and 'm' keys. + + Returns: + numpy ndarray: [i, j] index pairs where both m_i = m_j = 0. + """ idx = [] for i, mi in enumerate(ao['m']): if mi!=0: @@ -127,10 +282,24 @@ def store_pair_indices_z_only0(ao): if mj!=0: continue idx.append([i,j]) - return idx + return np.array(idx) def metric_matrix_z(idx, ao, S): + """Compute metric matrix for z-axis symmetric representations. + + Constructs metric matrix accounting for m and -m degeneracy. Matrix + elements are nonzero only when angular momenta match and m quantum + numbers satisfy m_i=m_j AND m_i1=m_j1, or m_i=-m_j AND m_i1=-m_j1. + + Args: + idx (numpy ndarray): [i, j] basis function pair indices. + ao (dict): Angular momentum info with 'l' and 'm' keys. + S (numpy ndarray): Overlap matrix. + + Returns: + numpy ndarray: Square root of metric matrix for z-symmetric normalization. + """ N = len(idx) A = np.zeros((N,N)) for p in range(N): diff --git a/qstack/spahm/rho/utils.py b/qstack/spahm/rho/utils.py index 39f63075..b0e7c9bb 100644 --- a/qstack/spahm/rho/utils.py +++ b/qstack/spahm/rho/utils.py @@ -1,3 +1,10 @@ +"""Utility functions for SPAHM(a,b) computation and default settings. + +Provides: + defaults: Default parameters for SPAHM(a,b) computation. + omod_fns_dict: Dictionary of density matrix modification functions for open-shell systems. +""" + import os import warnings import numpy as np @@ -7,20 +14,35 @@ from qstack.spahm import guesses from qstack import compound + defaults = SimpleNamespace( - guess='LB', - model='Lowdin-long-x', - basis='minao', - auxbasis='ccpvdzjkfit', - omod=['alpha', 'beta'], - elements=["H", "C", "N", "O", "S"], - cutoff=5.0, - xc='hf', - bpath=os.path.dirname(__file__)+'/basis_opt', - ) + guess='LB', + model='Lowdin-long-x', + basis='minao', + auxbasis='ccpvdzjkfit', + omod=['alpha', 'beta'], + elements=["H", "C", "N", "O", "S"], + cutoff=5.0, + xc='hf', + bpath=os.path.dirname(__file__)+'/basis_opt', + ) def get_chsp(fname, n): + """Load charge and spin information from file. + + Reads a file containing charge/spin values, converting 'None' strings to None objects. + + Args: + fname (str or None): Path to charge/spin file. If None, returns array of Nones. + n (int): Expected number of entries in the file. + + Returns: + numpy ndarray: Array of charge/spin values (int or None) of length n. + + Raises: + RuntimeError: If file is not found or has wrong length. + """ def chsp_converter(chsp): if chsp == 'None': chsp = None @@ -31,7 +53,7 @@ def chsp_converter(chsp): return np.full(n, None, dtype=object) if os.path.isfile(fname): chsp = np.loadtxt(fname, dtype=object, converters=chsp_converter, encoding=None) - if(len(chsp)!=n): + if len(chsp)!=n: raise RuntimeError(f'Wrong length of the file {fname}') else: raise RuntimeError(f"{fname} can not be found") @@ -39,6 +61,22 @@ def chsp_converter(chsp): def load_mols(xyzlist, charge, spin, basis, printlevel=0, units='ANG', ecp=None, progress=False, srcdir=None): + """Load molecules from XYZ files and creates pyscf Mole objects. + + Args: + xyzlist (list): List of XYZ filenames. + charge (list or None): List of molecular charges (or None for neutral). + spin (list or None): List of spin multiplicities (or None for default). + basis (str or dict): Basis set. + printlevel (int): Verbosity level (0=silent). Defaults to 0. + units (str): Coordinate units ('ANG' or 'BOHR'). Defaults to 'ANG'. + ecp (str or dict, optional): Effective core potential. Defaults to None. + progress (bool): If True, shows progress bar. Defaults to False. + srcdir (str, optional): Source directory prepended to XYZ filenames. Defaults to None. + + Returns: + list: List of pyscf Mole objects. + """ mols = [] if spin is None: spin = [None] * len(xyzlist) @@ -59,18 +97,32 @@ def load_mols(xyzlist, charge, spin, basis, printlevel=0, units='ANG', ecp=None, def mols_guess(mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=None, printlevel=0): + """Compute or loads guess density matrices for a list of molecules. + + Args: + mols (list): List of pyscf Mole objects. + xyzlist (list): List of XYZ filenames (for naming/loading). + guess (str or callable): Guess method name or function. + xc (str): Exchange-correlation functional for guess. Defaults to defaults.xc. + spin (list or None): List of spin multiplicities. Defaults to None. + readdm (str, optional): Directory path to load pre-computed density matrices. Defaults to None. + printlevel (int): Verbosity level. Defaults to 0. + + Returns: + list: List of density matrices (2D or 3D numpy arrays). + """ dms = [] guess = guesses.get_guess(guess) if spin is None: - spin = [None] *len(xyzlist) + spin = [None]*len(xyzlist) for xyzfile, mol, sp in zip(xyzlist, mols, spin, strict=True): if printlevel>0: print(xyzfile, flush=True) if readdm is None: _e, v = spahm.get_guess_orbitals(mol, guess, xc=xc) - dm = guesses.get_dm(v, mol.nelec, mol.spin if sp is not None else None) + dm = guesses.get_dm(v, mol.nelec, mol.spin if sp is not None else None) else: - dm = np.load(readdm+'/'+os.path.basename(xyzfile)+'.npy') + dm = np.load(f'{readdm}/{os.path.basename(xyzfile)}.npy') if spin and dm.ndim==2: dm = np.array((dm/2,dm/2)) dms.append(dm) @@ -80,17 +132,55 @@ def mols_guess(mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=None, pri def dm_open_mod(dm, omod): - omod_fns_dict[None] = lambda dm: dm - if omod in omod_fns_dict: - return omod_fns_dict[omod](dm) - else: - raise ValueError(f'unknown open-shell mod: must be in {list(omod_fns_dict.keys())}, None if the system is closed-shell') + """Treats density matrix according to the open-shell mode.. + + Args: + dm (numpy ndarray): Density matrix (2D for closed-shell, 3D for open-shell). + omod (str or None): Open-shell mode. Options in omod_fns_dict. + + Returns: + numpy ndarray: Modified density matrix. + + Raises: + NotImplementedError: If omod is not a valid modification type. + RuntimeError: If dm is 2D but omod is None, or if dm is 3D but omod is not None. + """ + if omod is None: + if dm.ndim==3: + raise RuntimeError('Density matrix is open-shell (3D) but omod is None') + elif dm.ndim==2: + return dm + elif dm.ndim == 2: + raise RuntimeError('Density matrix is closed-shell (2D) but omod is not None') + if omod not in omod_fns_dict: + raise NotImplementedError(f'unknown open-shell mode: must be in {list(omod_fns_dict.keys())}, None if the system is closed-shell') + return omod_fns_dict[omod](dm) def get_xyzlist(xyzlistfile): + """Load list of paths to files. + + Args: + xyzlistfile (str): Path to the file containing list of XYZ filenames. + + Returns: + numpy ndarray: Array of XYZ filenames as strings. + """ return np.loadtxt(xyzlistfile, dtype=str, ndmin=1) + def check_data_struct(fin, local=False): + """Check the structure of a representation file. + + Args: + fin (str): Input file path. + local (bool): If True, checks for local representations. + + Returns: + tuple: (is_single (bool), is_labeled (bool)) + is_single: True if the file contains a single representation. + is_labeled: True if the representations are labeled. + """ x = np.load(fin, allow_pickle=True) if type(x.flatten()[0]) is str or type(x.flatten()[0]) is np.str_: is_labeled = True @@ -104,33 +194,36 @@ def check_data_struct(fin, local=False): is_labeled = False if not local and x.ndim == 1: is_single = True - elif x.shape[1] != 2: ## could be problematic! (if it's a set of local representations and nfeatures = 2 !! + elif x.shape[1] != 2: # could be problematic! (if it's a set of local representations and nfeatures = 2 !! is_single=True else: is_single = False return is_single, is_labeled - def load_reps(f_in, from_list=True, srcdir=None, with_labels=False, local=True, sum_local=False, printlevel=0, progress=False, file_format=None): - ''' - A function to load representations from txt-list/npy files. - Args: - - f_in: the name of the input file - - from_list(bool): if the input file is a txt-file containing a list of paths to the representations - - srcdir(str) : the path prefix to be at the begining of each file in `f_in`, defaults to cwd - - with_label(bool): saves a list of tuple (filename, representation) - - local(bool): if the representations is local - - sum_local(bool): if local=True then sums the local components - - printlevel(int): level of verbosity - - progress(bool): if True shows progress-bar - - file_format(dict): (for "experienced users" only) structure of the input data, defaults to structure auto determination - Return: - np.array with shape (N,M) where N number of representations M dimmensionality of the representation - OR tuple ([N],np.array(N,M)) containing list of labels and np.array of representations - ''' + """Load representations from disk. + + Args: + f_in (str): Path to the input file. + from_list (bool): If the input file is a text file containing a list of paths to the representations. + srcdir (str) : The path prefix to be at the begining of each file in `f_in`. Defaults to current working directory. + with_labels (bool): If return atom type labes along with the representations. + local (bool): If the representations are local (per-atom) or global (per-molecule). + sum_local (bool): Sums the local components into a global representation, only if local=True. + printlevel (int): Verbosity level. + progress (bool): If shows a progress bar. + file_format (dict): Structure of the input data, with keys=('is_labeled;, 'is_single'). + Defaults to structure auto determination (for "experienced users" only). + + Returns: + np.array with shape (N_representations, N_features), or a tuple containing a list of atomic labels and said np.array. + + Raises: + RuntimeError: In case of shape mismatch. + """ if file_format is None: # Do not use mutable data structures for argument defaults file_format = {'is_labeled':None, 'is_single':None} if srcdir is None: @@ -173,10 +266,10 @@ def load_reps(f_in, from_list=True, srcdir=None, with_labels=False, else: reps.extend(x) else: - if is_labeled: + if is_labeled: reps.append(x[1]) labels.extend(x[0]) - else: + else: reps.append(x) try: reps = np.array(reps, dtype=float) @@ -191,7 +284,18 @@ def load_reps(f_in, from_list=True, srcdir=None, with_labels=False, else: return reps + def regroup_symbols(file_list, print_level=0, trim_reps=False): + """Regroups representations by atom type. + + Args: + file_list (list): List of representation files. + print_level (int): Verbosity level. Defaults to 0. + trim_reps (bool): If True, trims zeros from representations. Defaults to False. + + Returns: + dict: Dictionary with atom types as keys and lists of representations as values. + """ reps, atoms = load_reps(file_list, from_list=True, with_labels=True, local=True, printlevel=print_level) if print_level > 0: print(f"Extracting {len(atoms)} atoms from {file_list}:") diff --git a/qstack/tools.py b/qstack/tools.py index 50d44456..11e30bd9 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -1,265 +1,196 @@ +"""Utility functions and classes for Q-stack. + +Provides decorators, argument parsers, and helper functions for command-line tools. +""" + import os import time import resource +import argparse +import itertools import numpy as np -def _orca2gpr_idx(mol): - """Given a molecule returns a list of reordered indices to tranform orca AO ordering into SA-GPR. - - Args: - mol (pyscf Mole): pyscf Mole object. +def unix_time_decorator(func): + """Measure and print execution time statistics for a function. - Returns: - A numpy ndarray of re-arranged indices. - """ - #def _M1(n): - # return (n+1)//2 if n%2 else -((n+1)//2) - idx = np.arange(mol.nao, dtype=int) - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - for gto in mol._basis[q]: - l = gto[0] - msize = 2*l+1 - nf = max([len(prim)-1 for prim in gto[1:]]) - for _n in range(nf): - #for m in range(-l, l+1): - # m1 = _M1(m+l) - # idx[(i+(m1-m))] = i - # i+=1 - I = np.s_[i:i+msize] - idx[I] = np.concatenate((idx[I][::-2], idx[I][1::2])) - i += msize - return idx - - -def _orca2gpr_sign(mol): - """Given a molecule returns a list of multipliers needed to tranform from orca AO. + Measures real, user, and system time for the decorated function. + Thanks to https://gist.github.com/turicas/5278558 Args: - mol (pyscf Mole): pyscf Mole object. + func (callable): Function to be decorated. Returns: - A numpy ndarray of +1/-1 multipliers + callable: Wrapped function that prints timing information. """ - signs = np.ones(mol.nao, dtype=int) - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - for gto in mol._basis[q]: - l = gto[0] - msize = 2*l+1 - nf = max([len(prim)-1 for prim in gto[1:]]) - if l<3: - i += msize*nf - else: - for _n in range(nf): - signs[i+5:i+msize] = -1 # |m| >= 3 - i+= msize - return signs - - -def _pyscf2gpr_idx(mol): - """Given a molecule returns a list of reordered indices to tranform pyscf AO ordering into SA-GPR. + def wrapper(*args, **kwargs): + start_time, start_resources = time.time(), resource.getrusage(resource.RUSAGE_SELF) + ret = func(*args, **kwargs) + end_resources, end_time = resource.getrusage(resource.RUSAGE_SELF), time.time() + real = end_time - start_time + user = end_resources.ru_utime - start_resources.ru_utime + syst = end_resources.ru_stime - start_resources.ru_stime + print(f'{func.__name__} : real: {real:.4f} user: {user:.4f} sys: {syst:.4f}') + return ret + return wrapper - Args: - mol (pyscf Mole): pyscf Mole object. - Returns: - A numpy ndarray of re-arranged indices. - """ +def unix_time_decorator_with_tvalues(func): + """Measure execution time statistics and return them along with function result. - idx = np.arange(mol.nao, dtype=int) - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - for gto in mol._basis[q]: - l = gto[0] - msize = 2*l+1 - nf = max([len(prim)-1 for prim in gto[1:]]) - if l==1: - for _n in range(nf): - idx[i:i+3] = [i+1,i+2,i] - i += 3 - else: - i += msize * nf - return idx - - -def reorder_ao(mol, vector, src='pyscf', dest='gpr'): - """Reorder the atomic orbitals from one convention to another. - For example, src=pyscf dest=gpr reorders p-orbitals from +1,-1,0 (pyscf convention) to -1,0,+1 (SA-GPR convention). + Measures real, user, and system time for the decorated function and returns timing dict. + Thanks to https://gist.github.com/turicas/5278558 Args: - mol (pyscf Mole): pyscf Mole object. - vector (numpy ndarray): vector or matrix - src (string): current convention - dest (string): convention to convert to (available: 'pyscf', 'gpr', ... + func (callable): Function to be decorated. Returns: - A numpy ndarray with the reordered vector or matrix. + callable: Wrapped function that returns (timing_dict, result). """ + def wrapper(*args, **kwargs): + start_time, start_resources = time.time(), resource.getrusage(resource.RUSAGE_SELF) + ret = func(*args, **kwargs) + end_resources, end_time = resource.getrusage(resource.RUSAGE_SELF), time.time() + timing = {'real' : end_time - start_time, + 'user' : end_resources.ru_utime - start_resources.ru_utime, + 'sys' : end_resources.ru_stime - start_resources.ru_stime} + return timing, ret + return wrapper - def get_idx(mol, convention): - convention = convention.lower() - if convention == 'gpr': - return np.arange(mol.nao) - elif convention == 'pyscf': - return _pyscf2gpr_idx(mol) - elif convention == 'orca': - return _orca2gpr_idx(mol) - else: - errstr = f'Conversion to/from the {convention} convention is not implemented' - raise NotImplementedError(errstr) - - def get_sign(mol, convention): - convention = convention.lower() - if convention in ['gpr', 'pyscf']: - return np.ones(mol.nao, dtype=int) - elif convention == 'orca': - return _orca2gpr_sign(mol) - - idx_src = get_idx(mol, src) - idx_dest = get_idx(mol, dest) - sign_src = get_sign(mol, src) - sign_dest = get_sign(mol, dest) - - if vector.ndim == 2: - sign_src = np.einsum('i,j->ij', sign_src, sign_src) - sign_dest = np.einsum('i,j->ij', sign_dest, sign_dest) - idx_dest = np.ix_(idx_dest,idx_dest) - idx_src = np.ix_(idx_src,idx_src) - elif vector.ndim!=1: - errstr = f'Dim = {vector.ndim} (should be 1 or 2)' - raise ValueError(errstr) - newvector = np.zeros_like(vector) - newvector[idx_dest] = (sign_src*vector)[idx_src] - newvector *= sign_dest +def correct_num_threads(): + """Set MKL and OpenBLAS thread counts based on SLURM environment. - return newvector + If running under SLURM, sets MKL_NUM_THREADS and OPENBLAS_NUM_THREADS + to match SLURM_CPUS_PER_TASK. + """ + if "SLURM_CPUS_PER_TASK" in os.environ: + os.environ["MKL_NUM_THREADS"] = os.environ["SLURM_CPUS_PER_TASK"] + os.environ["OPENBLAS_NUM_THREADS"] = os.environ["SLURM_CPUS_PER_TASK"] -def _Rz(a): - """Computes the rotation matrix around absolute z-axis. +class FlexParser(argparse.ArgumentParser): + """Argument parser that allows removing arguments. Args: - a (float): Rotation angle. + **kwargs: Arguments passed to ArgumentParser. - Returns: - A 2D numpy ndarray containing the rotation matrix. """ + def remove_argument(self, arg): + """Remove an argument from the parser. - A = np.zeros((3,3)) - A[0,0] = np.cos(a) - A[0,1] = -np.sin(a) - A[0,2] = 0 - A[1,0] = np.sin(a) - A[1,1] = np.cos(a) - A[1,2] = 0 - A[2,0] = 0 - A[2,1] = 0 - A[2,2] = 1 - return A + Utility method for customizing parsers by removing unwanted arguments + from the pre-configured set. Useful when deriving specialized parsers. + Args: + arg (str): Option destination name. -def _Ry(b): - """Computes the rotation matrix around absolute y-axis. + Output: + Modifies parser in place. + """ + for action in self._actions: + opts = action.option_strings + if (opts and opts[0] == arg) or action.dest == arg: + self._remove_action(action) + break - Args: - b (float): Rotation angle. + for action in self._action_groups: + for group_action in action._group_actions: + opts = group_action.option_strings + if (opts and opts[0] == arg) or group_action.dest == arg: + action._group_actions.remove(group_action) + return - Returns: - A 2D numpy ndarray containing the rotation matrix. - """ - A = np.zeros((3,3)) - A[0,0] = np.cos(b) - A[0,1] = 0 - A[0,2] = np.sin(b) - A[1,0] = 0 - A[1,1] = 1 - A[1,2] = 0 - A[2,0] = -np.sin(b) - A[2,1] = 0 - A[2,2] = np.cos(b) - return A - -def _Rx(g): - """Computes the rotation matrix around absolute x-axis. +def slice_generator(iterable, inc=lambda x: x, i0=0): + """Generate slices for elements in an iterable based on increments. Args: - g (float): Rotation angle. + iterable (iterable): Iterable of elements to generate slices for. + inc (callable: int->int): Function that computes increment size for each element. + Defaults to identity function. + i0 (int): Initial starting index. Defaults to 0. - Returns: - A 2D numpy ndarray containing the rotation matrix. + Yields: + tuple: (element, slice) pairs for each element in the iterable. """ + func = func=lambda total, elem: total+inc(elem) + starts = itertools.accumulate(iterable, func=func, initial=i0) + starts_ends = itertools.pairwise(starts) + for elem, (start, end) in zip(iterable, starts_ends, strict=True): + yield elem, np.s_[start:end] - A = np.zeros((3,3)) - A[0,0] = 1 - A[0,1] = 0 - A[0,2] = 0 - A[1,0] = 0 - A[1,1] = np.cos(g) - A[1,2] = -np.sin(g) - A[2,0] = 0 - A[2,1] = np.sin(g) - A[2,2] = np.cos(g) - return A - -def rotate_euler(a, b, g, rad=False): - """Computes the rotation matrix given Euler angles. +class Cursor: + """Cursor class to manage dynamic indexing. Args: - a (float): Alpha Euler angle. - b (float): Beta Euler angle. - g (float): Gamma Euler angle. - rad (bool) : Wheter the angles are in radians or not. - - Returns: - A 2D numpy ndarray with the rotation matrix. + action (str): Type of indexing action ('slicer' or 'ranger'). + inc (callable: int->int): Function to determine increment size. + Defaults to identity function. + i0 (int): Initial index position. Defaults to 0. + + Attributes: + i (int): Current index position. + i_prev (int): Previous index position. + cur (range or slice: Current range or slice. + inc (callable int->int): Increment function. + + Methods: + add(di): Advances the cursor by increment and returns current range/slice. + __call__(di=None): Advances the cursor if di is not None, + returns current range/slice. """ + def __init__(self, action='slicer', inc=lambda x: x, i0=0): + self.i = i0 + self.i_prev = None + self.inc = inc + self.cur = None + self.action = action + self.actions_dict = {'slicer': self._slicer, 'ranger': self._ranger} + + def add(self, di): + """Advance the cursor and return the current range or slice. + + Args: + di: Element to determine increment size. + + Returns: + Current range or slice after advancing. + """ + self._add(di) + self.cur = self.actions_dict[self.action]() + return self.cur + + def _ranger(self): + return range(self.i_prev, self.i) + + def _slicer(self): + return np.s_[self.i_prev:self.i] + + def _add(self, di): + self.i_prev = self.i + self.i += self.inc(di) + + def __call__(self, di=None): + """Optionally advance the cursor and return the current range or slice. + + If the argument is passed, it is used to advance the cursor. + If not, the current value is returned. + + Args: + di (optional): Element to determine increment size. + + Returns: + Current range or slice (after advancing). + """ + if di is None: + return self.cur + else: + return self.add(di) - if not rad: - a = a * np.pi / 180 - b = b * np.pi / 180 - g = g * np.pi / 180 - - A = _Rz(a) - B = _Ry(b) - G = _Rx(g) - - return A@B@G - - -def unix_time_decorator(func): -# thanks to https://gist.github.com/turicas/5278558 - def wrapper(*args, **kwargs): - start_time, start_resources = time.time(), resource.getrusage(resource.RUSAGE_SELF) - ret = func(*args, **kwargs) - end_resources, end_time = resource.getrusage(resource.RUSAGE_SELF), time.time() - real = end_time - start_time - user = end_resources.ru_utime - start_resources.ru_utime - syst = end_resources.ru_stime - start_resources.ru_stime - print(f'{func.__name__} : real: {real:.4f} user: {user:.4f} sys: {syst:.4f}') - return ret - return wrapper - -def unix_time_decorator_with_tvalues(func): -# thanks to https://gist.github.com/turicas/5278558 - def wrapper(*args, **kwargs): - start_time, start_resources = time.time(), resource.getrusage(resource.RUSAGE_SELF) - ret = func(*args, **kwargs) - end_resources, end_time = resource.getrusage(resource.RUSAGE_SELF), time.time() - timing = {'real' : end_time - start_time, - 'user' : end_resources.ru_utime - start_resources.ru_utime, - 'sys' : end_resources.ru_stime - start_resources.ru_stime} - return timing, ret - return wrapper + def __str__(self): + return str(self.i) -def correct_num_threads(): - if "SLURM_CPUS_PER_TASK" in os.environ: - os.environ["MKL_NUM_THREADS"] = os.environ["SLURM_CPUS_PER_TASK"] - os.environ["OPENBLAS_NUM_THREADS"] = os.environ["SLURM_CPUS_PER_TASK"] + def __repr__(self): + return str(self.i) diff --git a/qstack/tree.dat b/qstack/tree.dat deleted file mode 100644 index 368ef60c..00000000 --- a/qstack/tree.dat +++ /dev/null @@ -1,55 +0,0 @@ -├── basis_opt -│   ├── basis_tools.py -│   ├── opt.py -├── compound.py -├── constants.py -├── equio.py -├── fields -│   ├── decomposition.py -│   ├── density2file.py -│   ├── dm.py -│   ├── dori.py -│   ├── excited.py -│   ├── hf_otpd.py -│   ├── hirshfeld.py -│   ├── moments.py -├── mathutils -│   ├── fps.py -│   ├── matrix.py -│   ├── wigner.py -│   ├── xyz_integrals_float.py -│   └── xyz_integrals_sym.py -├── orcaio.py -├── qml.py -├── qstack-qml -│   ├── qstack_qml -│   │   ├── b2r2.py -│   │   └── slatm.py -├── regression -│   ├── condition.py -│   ├── cross_validate_results.py -│   ├── final_error.py -│   ├── global_kernels.py -│   ├── hyperparameters.py -│   ├── kernel.py -│   ├── kernel_utils.py -│   ├── local_kernels.py -│   ├── oos.py -│   └── regression.py -├── spahm -│   ├── compute_spahm.py -│   ├── guesses.py -│   ├── LB2020guess.py -│   └── rho -│   ├── atomic_density.py -│   ├── atom.py -│   ├── bond.py -│   ├── bond_selected.py -│   ├── Dmatrix.py -│   ├── dmb_rep_atom.py -│   ├── dmb_rep_bond.py -│   ├── lowdin.py -│   ├── rep_completion.py -│   ├── sym.py -│   └── utils.py -└── tools.py diff --git a/ruff.toml b/ruff.toml index 14f13f8c..953699ed 100644 --- a/ruff.toml +++ b/ruff.toml @@ -41,11 +41,13 @@ docstring-code-line-length = "dynamic" [lint] + select = [ "A", "E", "F", "B", "S", "COM", "C4", "EXE", "ICN", "PIE", "PLR1714", "ARG", "PERF", "FURB", "PLE", "TRY002", "W", "UP", "RUF", "SIM", "NPY", - #"FIX", "TD", "D", # later + "D", + #"FIX", "TD", # later ] ignore = [ "E741", # ambiguous variable name @@ -57,6 +59,26 @@ ignore = [ "SIM3", # yoda condition "SIM114", # combine if branches "SIM108", # use ternary operator +"D105", # missing docstring in magic method +"D107", # missing docstring in __init__ +"D203", # blank line required before class docstring +"D204", # blank line required after class docstring +"D213", # multi-line docstring summary should start at the second line +"D413", # missing blank line after last section +] + +#preview = true + +extend-select = ["DOC"] +extend-ignore = [ +# pathlib + "FURB101", "FURB103", +# whitespaces + "E201", "E202", "E203", "E211", + "E221", "E222", "E225", "E226", "E228", + "E231", "E241", "E271", "E272", +# lambda + "FURB118", ] [lint.per-file-ignores] @@ -67,4 +89,15 @@ ignore = [ "tests/*" = [ "S101", # use of assert "S306", # use of mktemp +"D", "DOC", +] +"qstack/mathutils/xyz_integrals_sym.py" = ["D417"] # missing argument descriptions +"qstack/mathutils/xyz_integrals_float.py" = ["D417"] # missing argument descriptions +"qstack/reorder.py" = ["DOC502"] # raised exception is not explicitly raised +"qstack/orcaio.py" = ["DOC502"] # raised exception is not explicitly raised +"qstack/equio.py" = ["E251"] # unexpected spaces around keyword / parameter equals +"qstack/spahm/rho/dmb_rep_atom.py" = [ +"DOC201", # `return` is not documented in docstring +"DOC102", # documented parameter is not in the function's signature +"D214", # section is over-indented ] diff --git a/tests/data/H2O_dist.ccpvdz.ccpvdzjkfit.mts b/tests/data/H2O_dist.ccpvdz.ccpvdzjkfit.mts index ee3bd96d..da8a5bef 100644 Binary files a/tests/data/H2O_dist.ccpvdz.ccpvdzjkfit.mts and b/tests/data/H2O_dist.ccpvdz.ccpvdzjkfit.mts differ diff --git a/tests/data/H2O_dist.ccpvdz.dm.mts b/tests/data/H2O_dist.ccpvdz.dm.mts index 825b9152..505bb16c 100644 Binary files a/tests/data/H2O_dist.ccpvdz.dm.mts and b/tests/data/H2O_dist.ccpvdz.dm.mts differ diff --git a/tests/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.mts b/tests/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.mts index be2d1242..73f862d1 100644 Binary files a/tests/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.mts and b/tests/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.mts differ diff --git a/tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy b/tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy index 311909ec..75a20eb4 100644 Binary files a/tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy and b/tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy differ diff --git a/tests/data/SPAHM_a_H2O/X_H2O_MR2021.npy b/tests/data/SPAHM_a_H2O/X_H2O_MR2021.npy new file mode 100644 index 00000000..0360a409 Binary files /dev/null and b/tests/data/SPAHM_a_H2O/X_H2O_MR2021.npy differ diff --git a/tests/data/SPAHM_a_H2O/X_H2O_SAD.npy b/tests/data/SPAHM_a_H2O/X_H2O_SAD.npy index 39b132de..65ec869a 100644 Binary files a/tests/data/SPAHM_a_H2O/X_H2O_SAD.npy and b/tests/data/SPAHM_a_H2O/X_H2O_SAD.npy differ diff --git a/tests/test_c2mio.py b/tests/test_c2mio.py index f0100b21..93d41277 100755 --- a/tests/test_c2mio.py +++ b/tests/test_c2mio.py @@ -3,15 +3,16 @@ import os from qstack.c2mio import get_cell, get_mol, get_ligand + def test_c2mio(): path = os.path.dirname(os.path.realpath(__file__)) - cell = get_cell(f'{path}/data/cell2mol/YOXKUS.cif', workdir=f'{path}/data/cell2mol/') #cell = get_cell('Cell_yoxkus.cell', workdir='.') - #print(cell.moleclist) + cell = get_cell(f'{path}/data/cell2mol/YOXKUS.cif', workdir=f'{path}/data/cell2mol/') # cell = get_cell('Cell_yoxkus.cell', workdir='.') + # print(cell.moleclist) mol = get_mol(cell, mol_idx=0, ecp='def2-svp') assert mol.natm==52 cell = get_cell(f'{path}/data/cell2mol/Cell_YOXKUS.cell', workdir='.') - #print(cell.moleclist[0].ligands) + # print(cell.moleclist[0].ligands) mol_lig = get_ligand(cell, mol_idx=0, lig_idx=1) assert mol_lig.natm==47 diff --git a/tests/test_compound.py b/tests/test_compound.py index 1e3f7313..c429ad8b 100755 --- a/tests/test_compound.py +++ b/tests/test_compound.py @@ -14,6 +14,7 @@ def test_reader(): assert mol.elements == ['O', 'H', 'H'] assert np.linalg.norm(mol.atom_coords()-check_atom_coord) < 1e-8 + def test_makeauxmol(): path = os.path.dirname(os.path.realpath(__file__)) mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'def2svp', charge=0, spin=0) @@ -24,6 +25,7 @@ def test_makeauxmol(): assert type(auxmol.elements) is type([]) assert auxmol.basis == "cc-pvtz-jkfit" + def test_rotate_molecule(): path = os.path.dirname(os.path.realpath(__file__)) mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'def2svp', charge=0, spin=0) @@ -31,22 +33,29 @@ def test_rotate_molecule(): rotated = compound.rotate_molecule(mol, 90, 0, 0) assert np.linalg.norm(rotated_mol.atom_coords()-rotated.atom_coords()) < 1e-10 + def test_mol_to_xyz(): path = os.path.dirname(os.path.realpath(__file__)) - mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'def2svp', charge=0, spin=0) - compound.mol_to_xyz(mol, path+'/data/H2O_saved.xyz') + molpath = path+'/data/H2O_saved.xyz' + with open(molpath) as f: + xyz0 = f.read().strip() + mol = compound.xyz_to_mol(molpath, 'def2svp', charge=0, spin=0) + xyz = compound.mol_to_xyz(mol, '/dev/null') + assert np.all(xyz == xyz0) + def test_commentline(): path = os.path.dirname(os.path.realpath(__file__)) names = ["HO_json.xyz", "HO_keyvalcomma.xyz", "HO_keyvalspace.xyz", "HO_spinline.xyz"] for name in names: - print(name) mol = compound.xyz_to_mol(os.path.join(path,'data',name), 'def2svp', parse_comment=True) assert mol.spin == 0 assert mol.charge == -1 + if __name__ == '__main__': test_reader() test_makeauxmol() test_rotate_molecule() test_mol_to_xyz() + test_commentline() diff --git a/tests/test_dori.py b/tests/test_dori.py index 7350721b..4769cb46 100755 --- a/tests/test_dori.py +++ b/tests/test_dori.py @@ -85,6 +85,7 @@ def test_dori_df(): dori2, _, _, _, _ = dori(mol, c=c, grid_type='cube', resolution=0.5, alg='num') assert np.allclose(dori0, dori2) + if __name__ == '__main__': test_derivatives() test_dori_deriv() diff --git a/tests/test_equio.py b/tests/test_equio.py index 0738634a..2845eba1 100755 --- a/tests/test_equio.py +++ b/tests/test_equio.py @@ -3,6 +3,7 @@ import os import tempfile import filecmp +from itertools import starmap import numpy as np from qstack import compound, equio import metatensor @@ -27,9 +28,10 @@ def test_equio_vector(): ctensor = equio.array_to_tensormap(mol, c) tmpfile = tempfile.mktemp() + MTS_EXT metatensor.save(tmpfile, ctensor) - assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.mts', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.mts', tmpfile)) c1 = equio.tensormap_to_array(mol, ctensor) - assert(np.linalg.norm(c-c1)==0) + assert (np.linalg.norm(c-c1)==0) + def test_equio_matrix(): path = os.path.dirname(os.path.realpath(__file__)) @@ -38,9 +40,10 @@ def test_equio_matrix(): dtensor = equio.array_to_tensormap(mol, dm) tmpfile = tempfile.mktemp() + MTS_EXT metatensor.save(tmpfile, dtensor) - assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.dm.mts', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist.ccpvdz.dm.mts', tmpfile)) dm1 = equio.tensormap_to_array(mol, dtensor) - assert(np.linalg.norm(dm-dm1)==0) + assert (np.linalg.norm(dm-dm1)==0) + def test_equio_joinsplit(): path = os.path.dirname(os.path.realpath(__file__)) @@ -54,12 +57,12 @@ def test_equio_joinsplit(): tmpfile = tempfile.mktemp() + MTS_EXT metatensor.save(tmpfile, ctensor_big) - assert(filecmp.cmp(path+'/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.mts', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.mts', tmpfile)) ctensors = equio.split(ctensor_big) - c11, c22 = [equio.tensormap_to_array(mol, t) for mol,t in zip([mol1,mol2], ctensors, strict=True)] - assert(np.linalg.norm(c11-c1)==0) - assert(np.linalg.norm(c22-c2)==0) + c11, c22 = [*starmap(equio.tensormap_to_array, zip([mol1, mol2], ctensors, strict=True))] + assert (np.linalg.norm(c11-c1)==0) + assert (np.linalg.norm(c22-c2)==0) if __name__ == '__main__': diff --git a/tests/test_excited.py b/tests/test_excited.py index 5fb1cade..908a33b0 100755 --- a/tests/test_excited.py +++ b/tests/test_excited.py @@ -14,8 +14,8 @@ def test_excited(): coeff = np.load(xyzfile+'.mo.npy') X = np.load(xyzfile+'.X.npy') x_c = np.load(xyzfile+'.st2_transition_fit.npy') - hole_d = np.load(xyzfile+'.st2_dm_hole.npy') - part_d = np.load(xyzfile+'.st2_dm_part.npy') + hole_d0 = np.load(xyzfile+'.st2_dm_hole.npy') + part_d0 = np.load(xyzfile+'.st2_dm_part.npy') hole_c = np.load(xyzfile+'.st2_dm_hole_fit.npy') part_c = np.load(xyzfile+'.st2_dm_part_fit.npy') @@ -23,18 +23,22 @@ def test_excited(): x_ao = fields.excited.get_transition_dm(mol, X[state_id], coeff) dip = fields.moments.first(mol, x_ao) dip0 = np.array([ 0.68927353, -2.10714637, -1.53423419]) - assert(np.linalg.norm(dip-dip0)<1e-8) + assert (np.allclose(dip, dip0, atol=1e-8)) + + hole_d, part_d = fields.excited.get_holepart(mol, X[state_id], coeff) + assert (np.allclose(hole_d, hole_d0, atol=1e-8)) + assert (np.allclose(part_d, part_d0, atol=1e-8)) auxmol = compound.make_auxmol(mol, 'ccpvqz jkfit') dip = fields.moments.first(auxmol, x_c) dip0 = np.array([-0.68919144, 2.10692116, 1.53399871]) - assert(np.linalg.norm(dip-dip0)<1e-8) + assert (np.allclose(dip, dip0, atol=1e-8)) dist, hole_extent, part_extent = fields.excited.exciton_properties(mol, hole_d, part_d) - assert(np.linalg.norm(np.array([dist, hole_extent, part_extent])-np.array([2.59863354, 7.84850017, 5.67617426]))<1e-7) + assert (np.allclose([dist, hole_extent, part_extent], [2.59863354, 7.84850017, 5.67617426], atol=1e-7)) dist, hole_extent, part_extent = fields.excited.exciton_properties(auxmol, hole_c, part_c) - assert(np.linalg.norm(np.array([dist, hole_extent, part_extent])-np.array([2.59940378, 7.8477511, 5.67541635]))<1e-7) + assert (np.allclose([dist, hole_extent, part_extent], [2.59940378, 7.8477511, 5.67541635], atol=1e-7)) def test_excited_frag(): @@ -52,8 +56,8 @@ def test_excited_frag(): else: omega_hole_frag0 = np.array([ 4.24698889, 25.1717958 , 7.80455406, 32.89098877, 29.88567248]) omega_part_frag0 = np.array([ 1.87258999, 19.98184387, 37.30712212, 36.77858748, 4.05985653]) - assert(np.linalg.norm(omega_hole_frag-omega_hole_frag0)<1e-8) - assert(np.linalg.norm(omega_part_frag-omega_part_frag0)<1e-8) + assert (np.linalg.norm(omega_hole_frag-omega_hole_frag0)<1e-8) + assert (np.linalg.norm(omega_part_frag-omega_part_frag0)<1e-8) if __name__ == '__main__': diff --git a/tests/test_fitting.py b/tests/test_fitting.py index 10443eff..59a7f573 100755 --- a/tests/test_fitting.py +++ b/tests/test_fitting.py @@ -3,7 +3,7 @@ import os import numpy as np from qstack import compound -from qstack.fields import decomposition +from qstack.fields import decomposition, moments def test_fitting(): @@ -12,7 +12,7 @@ def test_fitting(): dm = np.load(path+'/data/H2O_dist.ccpvdz.dm.npy') c0 = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') _auxmol, c = decomposition.decompose(mol, dm, 'cc-pvdz jkfit') - assert(np.linalg.norm(c-c0)<1e-10) + assert (np.linalg.norm(c-c0)<1e-10) def test_block_fitting(): @@ -29,7 +29,7 @@ def test_block_fitting(): c0 = decomposition.get_coeff(dm, eri2c0, eri3c) c = decomposition.get_coeff(dm, eri2c0, eri3c, slices=atom_bounds) - assert(np.linalg.norm(c-c0)<1e-10) + assert (np.linalg.norm(c-c0)<1e-10) def test_fitting_error(): @@ -39,17 +39,19 @@ def test_fitting_error(): c0 = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') error0 = 4.876780263884939e-05 auxmol = compound.make_auxmol(mol, 'cc-pvdz jkfit') - eri2c = auxmol.intor('int2c2e_sph') + _, eri2c, eri3c = decomposition.get_integrals(mol, auxmol) self_repulsion = decomposition.get_self_repulsion(mol, dm) - error = decomposition.decomposition_error(self_repulsion, c0, eri2c) - assert(np.allclose(error, error0)) + error = decomposition.optimal_decomposition_error(self_repulsion, c0, eri2c) + assert (np.allclose(error, error0)) + error = decomposition.decomposition_error(self_repulsion, c0, eri2c, eri3c, dm) + assert (np.allclose(error, error0)) def test_fitting_noe(): path = os.path.dirname(os.path.realpath(__file__)) auxmol = compound.xyz_to_mol(path+'/data/H2O_dist.xyz', 'cc-pvdz jkfit', charge=0, spin=0) c = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') - N = decomposition.number_of_electrons_deco(auxmol, c) + N = moments.r2_c(auxmol, c, moments=[0])[0] N0 = 10.000199558313856 assert np.allclose(N,N0) diff --git a/tests/test_global.py b/tests/test_global.py index 73d641eb..ef443021 100755 --- a/tests/test_global.py +++ b/tests/test_global.py @@ -12,13 +12,12 @@ def test_avg_kernel(): mols = [np.load(f, allow_pickle=True) for f in mollist] K = kernel.kernel(mols, akernel='L', gkernel='avg', sigma=1.0) - true_K = np.array( [[1. , 1. , 0.79179528], \ - [1. , 1. , 0.79179528] , \ + true_K = np.array( [[1. , 1. , 0.79179528], + [1. , 1. , 0.79179528] , [0.79179528, 0.79179528, 1. ]]) - - assert(K.shape == (3,3)) - assert(np.abs(np.sum(K-true_K)) < 1e-05) + assert (K.shape == (3,3)) + assert (np.abs(np.sum(K-true_K)) < 1e-05) def test_rem_kernel(): @@ -28,12 +27,12 @@ def test_rem_kernel(): mols = [np.load(f, allow_pickle=True) for f in mollist] K = kernel.kernel(mols, akernel='L', gkernel='rem', sigma=1.0, gdict={'alpha':1.0, 'normalize':1, 'verbose':0}) - true_K = np.array( [[1. , 0.6528238, 1. ], \ - [0.6528238,1. ,0.6528238], \ + true_K = np.array( [[1. , 0.6528238, 1. ], + [0.6528238,1. ,0.6528238], [1. ,0.6528238 ,1. ]]) - assert(K.shape == (3,3)) - assert(np.abs(np.sum(K-true_K)) < 1e-05) + assert (K.shape == (3,3)) + assert (np.abs(np.sum(K-true_K)) < 1e-05) def test_rem_kernel_not_self(): @@ -43,12 +42,12 @@ def test_rem_kernel_not_self(): mols = [np.load(f, allow_pickle=True) for f in mollist] K = kernel.kernel(mols, Y=np.copy(mols), akernel='L', gkernel='rem', sigma=1.0, gdict={'alpha':1.0, 'normalize':1, 'verbose':0}) - true_K = np.array( [[1. , 0.6528238, 1. ], \ - [0.6528238,1. ,0.6528238], \ + true_K = np.array( [[1. , 0.6528238, 1. ], + [0.6528238,1. ,0.6528238], [1. ,0.6528238 ,1. ]]) - assert(K.shape == (3,3)) - assert(np.abs(np.sum(K-true_K)) < 1e-05) + assert (K.shape == (3,3)) + assert (np.abs(np.sum(K-true_K)) < 1e-05) if __name__ == '__main__': diff --git a/tests/test_kernels.py b/tests/test_kernels.py index 69d546f9..907f67ec 100755 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -5,19 +5,19 @@ def test_local_kernels(): - #np.random.seed(666) - #X = np.random.rand(2,4) - #Y = np.random.rand(2,4) - #K_G_good = np.zeros((len(X),len(Y))) - #K_L_good = np.zeros((len(X),len(Y))) - #for i, x in enumerate(X): + # np.random.seed(666) + # X = np.random.rand(2,4) + # Y = np.random.rand(2,4) + # K_G_good = np.zeros((len(X),len(Y))) + # K_L_good = np.zeros((len(X),len(Y))) + # for i, x in enumerate(X): # for j, y in enumerate(Y): # K_G_good[i,j] = np.dot(x-y, x-y) # K_L_good[i,j] = np.sum(abs(x-y)) - #np.exp(-K_G_good/2, out=K_G_good) - #np.exp(-K_L_good/2, out=K_L_good) - #K_dot_good = np.dot(X, Y.T) - #K_cos_good = K_dot_good / np.outer(np.linalg.norm(X, axis=1), np.linalg.norm(Y, axis=1)) + # np.exp(-K_G_good/2, out=K_G_good) + # np.exp(-K_L_good/2, out=K_L_good) + # K_dot_good = np.dot(X, Y.T) + # K_cos_good = K_dot_good / np.outer(np.linalg.norm(X, axis=1), np.linalg.norm(Y, axis=1)) X = np.array([[0.70043712, 0.84418664, 0.67651434, 0.72785806], [0.95145796, 0.0127032 , 0.4135877 , 0.04881279]]) Y = np.array([[0.09992856, 0.50806631, 0.20024754, 0.74415417], [0.192892 , 0.70084475, 0.29322811, 0.77447945]]) diff --git a/tests/test_molden.py b/tests/test_molden.py index 5ba58c7c..8ea95543 100755 --- a/tests/test_molden.py +++ b/tests/test_molden.py @@ -14,7 +14,7 @@ def test_molden(): c = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') tmpfile = tempfile.mktemp() + '.molden' coeffs_to_molden(auxmol, c, tmpfile) - assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.molden', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.molden', tmpfile)) if __name__ == '__main__': diff --git a/tests/test_moments.py b/tests/test_moments.py new file mode 100755 index 00000000..c95c557d --- /dev/null +++ b/tests/test_moments.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +import os +import numpy as np +from qstack import compound +from qstack.fields import decomposition, moments + + +def test_moments(): + path = os.path.dirname(os.path.realpath(__file__)) + mol = compound.xyz_to_mol(path+'/data/H2O_dist.xyz', 'cc-pvdz', charge=0, spin=0) + dm = np.load(path+'/data/H2O_dist.ccpvdz.dm.npy') + c = decomposition.decompose(mol, dm, 'cc-pvdz')[1] + + R0 = 9.930396060748974 + R0_atom = [5.6426496, 1.88412837, 2.4036181 ] + R1 = [ 1.53224245e-01, 1.70535989e-01, -8.51874261e-16] + R2 = 12.352661975356678 + + r0, r1, r2 = moments.r2_c(mol, c) + assert (np.allclose(r0, R0)) + assert (np.allclose(r1, R1)) + assert (np.allclose(r2, R2)) + + I0, I1, I2 = moments.r2_c(mol, None) + assert (np.allclose(r0, I0@c)) + assert (np.allclose(r1, I1@c)) + assert (np.allclose(r2, I2@c)) + + I0, I1, I2 = moments.r2_c(mol, None, per_atom=True) + r0_atom = c @ I0 + assert (np.allclose(r0_atom, R0_atom)) + r1_atom = np.einsum('p,xpa->ax', c, I1) # (atom, component) + assert (np.allclose(r1_atom.sum(axis=0), R1)) + + r0_atom, r1_atom, r2_atom = moments.r2_c(mol, c, per_atom=True) + assert (np.allclose(r0_atom, R0_atom)) + assert (np.allclose(r1_atom.sum(axis=0), R1)) + assert (np.allclose(r2_atom.sum(), R2)) + + +if __name__ == '__main__': + test_moments() diff --git a/tests/test_opt.py b/tests/test_opt.py index e861e700..f5f216bb 100755 --- a/tests/test_opt.py +++ b/tests/test_opt.py @@ -11,15 +11,16 @@ def test_hf_otpd(): mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'def2svp', charge=0, spin=0) dm = fields.dm.get_converged_dm(mol, xc="pbe") - otpd, grid = fields.hf_otpd.hf_otpd(mol, dm, return_all = True) + otpd, grid = fields.hf_otpd.hf_otpd(mol, dm, return_all=True) mol_dict = {'atom': mol.atom, 'rho': otpd, 'coords': grid.coords, 'weights': grid.weights} g = basis_opt.opt.optimize_basis(['H'], [path+'/data/initial/H_N0.txt', path+'/data/initial/O_N0.txt'], [mol_dict], check=True, printlvl=0) - assert(np.all(g['diff'] < 1e-6)) + assert (np.all(g['diff'] < 1e-6)) ob_good = {'H': [[0, [42.30256758622713, 1]], [0, [6.83662718701579, 1]], [0, [1.8547192742478775, 1]], [0, [0.3797283290452742, 1]], [1, [12.961663119622536, 1]], [1, [2.507400755551906, 1]], [1, [0.6648804678758861, 1]], [2, [3.482167705165484, 1]], [2, [0.6053728887614225, 1]], [3, [0.6284190712545101, 1]]]} ob = basis_opt.opt.optimize_basis(['H'], [path+'/data/initial/H_N0.txt'], [path+'/data/H2.ccpvtz.grid3.npz'], printlvl=2, gtol_in=1e-5) for [_l,[a,_c]], [_l1,[a1,_c1]] in zip(ob_good['H'], ob['H'], strict=True): - assert(abs(a-a1)<1e-5) + assert (abs(a-a1)<1e-5) + if __name__ == '__main__': test_hf_otpd() diff --git a/tests/test_orca.py b/tests/test_orca.py index e8330e70..99013cd9 100755 --- a/tests/test_orca.py +++ b/tests/test_orca.py @@ -6,7 +6,7 @@ import qstack.orcaio import qstack.compound import qstack.fields -from qstack.tools import reorder_ao +from qstack.reorder import reorder_ao def _dipole_moment(mol, dm): @@ -32,9 +32,9 @@ def test_orca_density_reader(): dm421 = qstack.orcaio.read_density(mol, 'H2O.orca421', directory=path+'/data/orca/', version=421, openshell=True) dm504 = qstack.orcaio.read_density(mol, 'H2O.orca504', directory=path+'/data/orca/', version=504, openshell=True) - assert(np.linalg.norm(dm-dm400)<1e-4) - assert(np.linalg.norm(dm400-dm421)<1e-10) - assert(np.linalg.norm(dm504-dm421)<5e-3) + assert (np.linalg.norm(dm-dm400)<1e-4) + assert (np.linalg.norm(dm400-dm421)<1e-10) + assert (np.linalg.norm(dm504-dm421)<5e-3) def test_orca_gbw_reader(): @@ -45,6 +45,7 @@ def test_orca_gbw_reader(): c = mf.mo_coeff e = mf.mo_energy occ = mf.mo_occ + def compare_MO(c0, c1): for s in range(c0.shape[0]): for i in range(c0.shape[-1]): diff --git a/tests/test_regression.py b/tests/test_regression.py index bfb8b989..9702fa55 100755 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -23,7 +23,7 @@ def test_hyperparameters(): [5.18262767e-01,3.00473746e-01,1.00000000e-10,3.16227766e+01], [5.10592542e-01,3.38247735e-01,1.00000000e+00,3.16227766e+01]] - assert(np.allclose(hyper, true_hyper)) + assert (np.allclose(hyper, true_hyper)) def test_regression(): @@ -38,7 +38,7 @@ def test_regression(): (6, 0.24018169400891018, 0.08584295185009833), (8, 0.2708852104417901, 7.021666937153402e-17)] - assert(np.allclose(lc, true_lc)) + assert (np.allclose(lc, true_lc)) def test_regression_sparse(): @@ -52,7 +52,7 @@ def test_regression_sparse(): (4, 0.4803773474666784, 0.19356070353924582), (6, 0.333707374435793, 0.13803898307368923), (8, 0.4501685644789055, 8.95090418262362e-17)] - assert(np.allclose(lc, true_lc)) + assert (np.allclose(lc, true_lc)) def test_regression_idx(): @@ -106,6 +106,7 @@ def test_oos(): pred3 = oos.oos(X, X[idx_train], weights, sigma=3.162278e+01, random_state=666) assert np.allclose(pred3, y[idx_train]) + def test_cross_validate_results(): path = os.path.dirname(os.path.realpath(__file__)) X = np.load(os.path.join(path, 'data/mols/X_lb.npy')) @@ -116,8 +117,7 @@ def test_cross_validate_results(): (4, 0.7336549 , 0.59839317), (6, 0.7288867 , 0.50714861), (8, 0.72604955, 0.48307486)] - assert(np.allclose(lc, true_lc)) - + assert (np.allclose(lc, true_lc)) if __name__ == '__main__': diff --git a/tests/test_reorder.py b/tests/test_reorder.py index 585266b3..1076814c 100755 --- a/tests/test_reorder.py +++ b/tests/test_reorder.py @@ -2,7 +2,7 @@ import os import numpy as np -from qstack import compound, tools +from qstack import compound, reorder from qstack.mathutils.matrix import from_tril @@ -11,15 +11,15 @@ def test_reorder_pyscf_gpr(): mol = compound.xyz_to_mol(path+'/data/H2O_dist.xyz', 'cc-pvdz', charge=0, spin=0) dm = np.load(path+'/data/H2O_dist.ccpvdz.dm.npy') - dm1 = tools.reorder_ao(mol, dm, src='pyscf', dest='gpr') - dm2 = tools.reorder_ao(mol, dm1, src='gpr', dest='pyscf') - assert(np.linalg.norm(dm-dm2)==0) + dm1 = reorder.reorder_ao(mol, dm, src='pyscf', dest='gpr') + dm2 = reorder.reorder_ao(mol, dm1, src='gpr', dest='pyscf') + assert (np.linalg.norm(dm-dm2)==0) auxmol = compound.make_auxmol(mol, 'cc-pvdz jkfit') c = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') - c1 = tools.reorder_ao(auxmol, c, src='pyscf', dest='gpr') - c2 = tools.reorder_ao(auxmol, c1, src='gpr', dest='pyscf') - assert(np.linalg.norm(c-c2)==0) + c1 = reorder.reorder_ao(auxmol, c, src='pyscf', dest='gpr') + c2 = reorder.reorder_ao(auxmol, c1, src='gpr', dest='pyscf') + assert (np.linalg.norm(c-c2)==0) def test_reorder_pyscf_gpr_orca(): @@ -29,20 +29,20 @@ def test_reorder_pyscf_gpr_orca(): dm_gpr = from_tril(np.load(path+'/data/reorder/2_3FOD.gpr.dm.npy')) dm_pyscf = from_tril(np.load(path+'/data/reorder/2_3FOD.pyscf.dm.npy')) - dm_gpr1 = tools.reorder_ao(mol, dm_orca, 'orca', 'gpr') - assert(np.linalg.norm(dm_gpr1-dm_gpr)==0) - dm_gpr1 = tools.reorder_ao(mol, dm_pyscf, 'pyscf', 'gpr') - assert(np.linalg.norm(dm_gpr1-dm_gpr)==0) + dm_gpr1 = reorder.reorder_ao(mol, dm_orca, 'orca', 'gpr') + assert (np.linalg.norm(dm_gpr1-dm_gpr)==0) + dm_gpr1 = reorder.reorder_ao(mol, dm_pyscf, 'pyscf', 'gpr') + assert (np.linalg.norm(dm_gpr1-dm_gpr)==0) - dm_pyscf1 = tools.reorder_ao(mol, dm_orca, 'orca', 'pyscf') - assert(np.linalg.norm(dm_pyscf1-dm_pyscf)==0) - dm_pyscf1 = tools.reorder_ao(mol, dm_gpr, 'gpr', 'pyscf') - assert(np.linalg.norm(dm_pyscf1-dm_pyscf)==0) + dm_pyscf1 = reorder.reorder_ao(mol, dm_orca, 'orca', 'pyscf') + assert (np.linalg.norm(dm_pyscf1-dm_pyscf)==0) + dm_pyscf1 = reorder.reorder_ao(mol, dm_gpr, 'gpr', 'pyscf') + assert (np.linalg.norm(dm_pyscf1-dm_pyscf)==0) - dm_orca1 = tools.reorder_ao(mol, dm_pyscf, 'pyscf', 'orca') - assert(np.linalg.norm(dm_orca1-dm_orca)==0) - dm_orca1 = tools.reorder_ao(mol, dm_gpr, 'gpr', 'orca') - assert(np.linalg.norm(dm_orca1-dm_orca)==0) + dm_orca1 = reorder.reorder_ao(mol, dm_pyscf, 'pyscf', 'orca') + assert (np.linalg.norm(dm_orca1-dm_orca)==0) + dm_orca1 = reorder.reorder_ao(mol, dm_gpr, 'gpr', 'orca') + assert (np.linalg.norm(dm_orca1-dm_orca)==0) if __name__ == '__main__': diff --git a/tests/test_rxn-repr.py b/tests/test_rxn-repr.py index d5381736..75e04c07 100755 --- a/tests/test_rxn-repr.py +++ b/tests/test_rxn-repr.py @@ -27,6 +27,7 @@ def read_mols(files): mol.set_positions(mol.positions*ase.units.Bohr) sub_mols.append(mol) return sub_mols + def get_data(): indices = np.loadtxt(idx_path, dtype=int) reactions = [] @@ -40,8 +41,12 @@ def get_data(): def test_b2r2_l(): _test_b2r2('l') + + def test_b2r2_a(): _test_b2r2('a') + + def test_b2r2_n(): _test_b2r2('n') @@ -51,7 +56,7 @@ def _test_b2r2(variant): reactions = Rxn_data(data_dir=data_dir).get_gdb7_data() b2r2_1 = b2r2.get_b2r2(reactions, variant=variant) b2r2_0 = np.load(f'{data_dir}/b2r2_{variant}.npy') - assert(np.linalg.norm(b2r2_1-b2r2_0) < 1e-10) + assert (np.linalg.norm(b2r2_1-b2r2_0) < 1e-10) def test_slatm_rxn(): @@ -59,7 +64,7 @@ def test_slatm_rxn(): reactions = Rxn_data(data_dir=data_dir).get_gdb7_data() slatm_1 = slatm.get_slatm_rxn(reactions, qml_mbtypes=True, progress=False) slatm_0 = np.load(f'{data_dir}/slatm_d.npy') - assert(np.linalg.norm(slatm_1-slatm_0) < 1e-10) + assert (np.linalg.norm(slatm_1-slatm_0) < 1e-10) if __name__ == '__main__': diff --git a/tests/test_slatm.py b/tests/test_slatm.py index c120d1cb..afa35107 100755 --- a/tests/test_slatm.py +++ b/tests/test_slatm.py @@ -11,7 +11,7 @@ def test_slatm_global(): v0 = np.load(f'{path}/data/slatm/slatm_global.npy') xyzs = sorted(glob.glob(f"{path}/data/slatm/*.xyz")) v = slatm.get_slatm_for_dataset(xyzs, progress=False, global_repr=True) - assert(np.linalg.norm(v-v0)<1e-10) + assert (np.linalg.norm(v-v0)<1e-10) def test_slatm_local(): @@ -19,7 +19,7 @@ def test_slatm_local(): v0 = np.load(f'{path}/data/slatm/slatm_local.npy') xyzs = sorted(glob.glob(f"{path}/data/slatm/*.xyz")) v = slatm.get_slatm_for_dataset(xyzs, progress=False) - assert(np.linalg.norm(v-v0)<1e-10) + assert (np.linalg.norm(v-v0)<1e-10) if __name__ == '__main__': diff --git a/tests/test_spahm.py b/tests/test_spahm.py index f405ca3e..2990b9b0 100755 --- a/tests/test_spahm.py +++ b/tests/test_spahm.py @@ -4,6 +4,17 @@ import numpy as np from qstack import compound from qstack.spahm import compute_spahm +from qstack.mathutils.array import vstack_padding + + +def test_spahm_GWH(): + path = os.path.dirname(os.path.realpath(__file__)) + mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'minao', charge=1, spin=1) + R = compute_spahm.get_spahm_representation(mol, 'gwh') + true_R = np.array([[-33.02835203, -8.92909895, -8.00935971, -7.51145492, -7.32962602], + [-33.02835203, -8.92909895, -8.00935971, -7.51145492, 0. ]]) + assert (R.shape == (2,5)) + assert (np.allclose(R, true_R)) def test_spahm_huckel(): @@ -12,8 +23,8 @@ def test_spahm_huckel(): R = compute_spahm.get_spahm_representation(mol, 'huckel') true_R = np.array([[-20.78722617, -1.29750913, -0.51773954, -0.4322361 , -0.40740531], [-20.78722617, -1.29750913, -0.51773954, -0.4322361 , -0.40740531]]) - assert(R.shape == (2,5)) - assert(np.abs(np.sum(R-true_R)) < 1e-05) + assert (R.shape == (2,5)) + assert (np.allclose(R, true_R)) def test_spahm_LB(): @@ -22,8 +33,8 @@ def test_spahm_LB(): R = compute_spahm.get_spahm_representation(mol, 'lb') true_R = np.array( [[-18.80209878, -1.28107468, -0.79949967, -0.63587071, -0.57481672], [-18.80209878, -1.28107468, -0.79949967, -0.63587071, 0. ]]) - assert(R.shape == (2,5)) - assert(np.abs(np.sum(R-true_R)) < 1e-05) + assert (R.shape == (2,5)) + assert (np.allclose(R, true_R)) def test_spahm_LB_ecp(): @@ -61,13 +72,13 @@ def test_generate_reps(): xyzlist = [os.path.join(path,s) for s in sorted(os.listdir(path)) if ".xyz" in s] mols = [compound.xyz_to_mol(f, basis='minao', charge=0, spin=0) for f in xyzlist] xmols = [compute_spahm.get_spahm_representation(mol, 'lb')[0] for mol in mols] - maxlen = max([len(x) for x in xmols]) - X = np.array([np.pad(x, pad_width=(0,maxlen-len(x)), constant_values=0) for x in xmols]) + X = vstack_padding(xmols) Xtrue = np.load(os.path.join(path, 'X_lb.npy')) - assert(np.allclose(X, Xtrue)) + assert (np.allclose(X, Xtrue)) if __name__ == '__main__': + test_spahm_GWH() test_spahm_huckel() test_spahm_LB() test_spahm_LB_ecp() diff --git a/tests/test_spahm_a.py b/tests/test_spahm_a.py index 5268f284..87142d53 100755 --- a/tests/test_spahm_a.py +++ b/tests/test_spahm_a.py @@ -7,12 +7,14 @@ PATH = os.path.dirname(os.path.realpath(__file__)) + def underlying_test(true_data_relpath, X): X_true = np.load(PATH+true_data_relpath, allow_pickle=True) - assert(X.shape == X_true.shape) + assert (X.shape == X_true.shape) for a, a_true in zip(X, X_true, strict=True): - assert(a[0] == a_true[0]) # atom type - assert(np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + assert (a[0] == a_true[0]) # atom type + assert (np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + def test_water(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) @@ -21,21 +23,24 @@ def test_water(): model='lowdin-long-x', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O.npy', X) + def test_water_alternate(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) - #X = atom.get_repr(mol, ["H", "O"], None, dm=None, + # X = atom.get_repr(mol, ["H", "O"], None, dm=None, # guess='LB', model='lowdin-long-x', auxbasis='ccpvdzjkfit') X = atom.get_repr("atom", [mol], [PATH], 'LB', spin=[None], auxbasis='ccpvdzjkfit', with_symbols=True) underlying_test('/data/SPAHM_a_H2O/X_H2O.npy', X) + def test_water_lowdinshortx(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, model='lowdin-short-x', auxbasis='ccpvdzjkfit') - X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) ## trimming is necessary to get the short-version vector ! + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) # trimming is necessary to get the short-version vector ! underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-short-x.npy', X) + def test_water_lowdinlong(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', @@ -43,40 +48,81 @@ def test_water_lowdinlong(): model='lowdin-long', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-long.npy', X) + def test_water_lowdinshort(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, model='lowdin-short', auxbasis='ccpvdzjkfit') - X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) ## trimming is necessary to get the short-version vector ! + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) # trimming is necessary to get the short-version vector ! underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-short.npy', X) + +def test_water_mr21(): + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) + X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', + elements=["H", "O"], spin=None, with_symbols=True, + model='MR2021', auxbasis='ccpvdzjkfit') + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) # trimming is necessary to get the short-version vector ! + underlying_test('/data/SPAHM_a_H2O/X_H2O_MR2021.npy', X) + + def test_water_SAD_guess_open_shell(): - mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=1, spin=1) ## test breaks when effective open-shell caluclation is needed + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=1, spin=1) # test breaks when effective open-shell caluclation is needed Xsad = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'sad', elements=["H", "O"], spin=[1], with_symbols=True, - xc = 'hf', model='lowdin-long-x', auxbasis='ccpvdzjkfit') + xc='hf', model='sad-diff', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy', Xsad) + def test_water_SAD_guess_close_shell(): - mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=0, spin=0) ## test breaks when effective open-shell caluclation is needed + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=0, spin=0) # test breaks when effective open-shell caluclation is needed Xsad = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'sad', elements=["H", "O"], spin=None, with_symbols=True, - xc = 'hf', model='lowdin-long-x', auxbasis='ccpvdzjkfit') + xc='hf', model='sad-diff', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O_SAD.npy', Xsad) + def test_water_single_element(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only X_true = np.load(PATH+'/data/SPAHM_a_H2O/X_H2O.npy', allow_pickle=True) # the next two lines deviate from the common template a = X[0] - assert(X.shape == np.array(X_true[0], ndmin=2).shape) + assert (X.shape == np.array(X_true[0], ndmin=2).shape) + for a_true in X_true: + if a[0] == a_true[0]: # atom type + assert (np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + + +def test_water_single_element_short(): + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) + X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', + elements=["H", "O"], spin=None, with_symbols=True, + model='lowdin-short', auxbasis='ccpvdzjkfit', only_z=['O']) + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) # trimming is necessary to get the short-version vector ! + X_true = np.load(PATH+'/data/SPAHM_a_H2O/X_H2O_lowdin-short.npy', allow_pickle=True) + a = X[0] + assert (X.shape == np.array(X_true[0], ndmin=2).shape) + for a_true in X_true: + if a[0] == a_true[0]: # atom type + assert (np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + + +def test_water_single_element_SAD(): + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=0, spin=0) + X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'sad', + elements=["H", "O"], spin=None, with_symbols=True, + xc='hf', model='sad-diff', auxbasis='ccpvdzjkfit', only_z=['O']) + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) # trimming is necessary to get the short-version vector ! + X_true = np.load(PATH+'/data/SPAHM_a_H2O/X_H2O_SAD.npy', allow_pickle=True) + a = X[0] + assert (X.shape == np.array(X_true[0], ndmin=2).shape) for a_true in X_true: if a[0] == a_true[0]: # atom type - assert(np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + assert (np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations if __name__ == '__main__': @@ -88,3 +134,6 @@ def test_water_single_element(): test_water_SAD_guess_close_shell() test_water_SAD_guess_open_shell() test_water_single_element() + test_water_single_element_short() + test_water_mr21() + test_water_single_element_SAD() diff --git a/tests/test_spahm_b.py b/tests/test_spahm_b.py index 552f00fe..9624ba9f 100755 --- a/tests/test_spahm_b.py +++ b/tests/test_spahm_b.py @@ -7,12 +7,14 @@ PATH = os.path.dirname(os.path.realpath(__file__)) + def underlying_test(X, truepath): true_file = PATH + truepath X_true = np.load(true_file) - assert(X_true.shape == X.shape) + assert (X_true.shape == X.shape) for Xa, Xa_true in zip(X, X_true, strict=True): - assert(np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + assert (np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + def test_water(): xyz_in = PATH+'/data/H2O.xyz' @@ -21,45 +23,50 @@ def test_water(): underlying_test(X, '/data/H2O_spahm_b.npy_alpha_beta.npy') + def test_water_closed(): xyz_in = PATH+'/data/H2O.xyz' mols = utils.load_mols([xyz_in], [None], [0], 'minao') X = bond.get_repr("bond", mols, [xyz_in], 'LB', spin=[None], with_symbols=False, same_basis=False) underlying_test(X, '/data/H2O_spahm_b.npy') + def test_water_O_only(): xyz_in = PATH+'/data/H2O.xyz' mols = utils.load_mols([xyz_in], [0], [0], 'minao') dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0]) X = bond.spahm_a_b("bond", mols, dms, only_z=['O']) - X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat) - X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main + X = np.squeeze(X) # contains a single elements but has shape (1,Nfeat) + X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main X_true = np.load(PATH+'/data/H2O_spahm_b.npy_alpha_beta.npy') X_true = X_true[0] # this line makes it incompatible with a call to underlying_test() - assert(X_true.shape == X.shape) + assert (X_true.shape == X.shape) for Xa, Xa_true in zip(X, X_true, strict=True): - assert(np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + assert (np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + def test_water_same_basis(): xyz_in = PATH+'/data/H2O.xyz' mols = utils.load_mols([xyz_in], [0], [0], 'minao') dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0]) X = bond.spahm_a_b("bond", mols, dms, same_basis=True) - X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat) - X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main + X = np.squeeze(X) # contains a single elements but has shape (1,Nfeat) + X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main underlying_test(X, '/data/H2O_spahm_b_CCbas.npy_alpha_beta.npy') + def test_ecp(): xyz_in = PATH+'/data/I2.xyz' mols = utils.load_mols([xyz_in], [0], [0], 'minao', ecp='def2-svp') dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0]) X = bond.spahm_a_b("bond", mols, dms, same_basis=True) - X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat) - X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main + X = np.squeeze(X) # contains a single elements but has shape (1,Nfeat) + X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main underlying_test(X, '/data/I2_spahm-b_minao-def2-svp_alpha-beta.npy') + def test_repr_shapes(): xyz_in = [PATH+'/data/H2O.xyz', PATH+'/data/HO_spinline.xyz'] mols = utils.load_mols(xyz_in, [0,-1], [0,0], 'ccpvdz') @@ -102,7 +109,7 @@ def test_from_list(): mols = utils.load_mols(xyzlist, charges, spins, 'minao', srcdir=PATH+"/data/") spahm_b = bond.get_repr("bond", mols, xyzlist, 'LB', spin=spins, same_basis=True) Xtrue = np.load(PATH+'/data/list_H2O_spahm-b_minao_LB_alpha-beta.npy') - assert(np.allclose(Xtrue, spahm_b)) + assert (np.allclose(Xtrue, spahm_b)) if __name__ == '__main__': @@ -113,4 +120,3 @@ def test_from_list(): test_ecp() test_repr_shapes() test_from_list() - diff --git a/tests/test_spahm_b_selected.py b/tests/test_spahm_b_selected.py index 329d8245..c52b8e3b 100755 --- a/tests/test_spahm_b_selected.py +++ b/tests/test_spahm_b_selected.py @@ -5,6 +5,7 @@ from qstack import compound from qstack.spahm.rho.bond_selected import get_spahm_b_selected + def test_spahm_b_selected(): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data/') fname = os.path.join(path, 'H2O.xyz') @@ -12,7 +13,7 @@ def test_spahm_b_selected(): mols = [compound.xyz_to_mol(fname, basis='minao', charge=0, spin=0)] X = get_spahm_b_selected(mols, bondij, [fname])[0][1] Xtrue = np.load(os.path.join(path, 'H2O.xyz_1_2.npy')) - assert(np.allclose(X, Xtrue)) + assert (np.allclose(X, Xtrue)) if __name__ == '__main__': diff --git a/tests/test_spahm_grad.py b/tests/test_spahm_grad.py index 4114d500..77722c5f 100755 --- a/tests/test_spahm_grad.py +++ b/tests/test_spahm_grad.py @@ -46,7 +46,7 @@ def spahm_ev(r, mol, guess): agrad = spahm.compute_spahm.get_guess_orbitals_grad(mol, guess)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_ev, mol, guess).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_re_grad(): @@ -60,7 +60,7 @@ def spahm_re(r, mol, guess_in): agrad = spahm.compute_spahm.get_spahm_representation_grad(mol, guess)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_re, mol, guess).reshape(mol.natm*3, -1).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_ev_grad_ecp(): @@ -74,7 +74,7 @@ def spahm_ev(r, mol, guess): agrad = spahm.compute_spahm.get_guess_orbitals_grad(mol, guess)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_ev, mol, guess).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_ev_grad_field(): @@ -89,7 +89,7 @@ def spahm_ev(r, mol, guess): agrad = spahm.compute_spahm.get_guess_orbitals_grad(mol, guess, field=field)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_ev, mol, guess).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_re_grad_field(): @@ -105,7 +105,7 @@ def spahm_re(r, mol, guess_in): agrad = spahm.compute_spahm.get_spahm_representation_grad(mol, guess, field=field)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_re, mol, guess).reshape(mol.natm*3, -1).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_re_field_grad(): @@ -119,7 +119,7 @@ def spahm_re(field, mol, guess_in): agrad = spahm.compute_spahm.get_spahm_representation_grad(mol, guess, field=field)[2].reshape(-1, 3) ngrad = derivatives_num(field, spahm_re, mol, guess).reshape(3, -1).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) if __name__ == '__main__': diff --git a/tests/test_splitting.py b/tests/test_splitting.py index f14fb187..909a3cc4 100755 --- a/tests/test_splitting.py +++ b/tests/test_splitting.py @@ -11,19 +11,22 @@ spin_list = os.path.join(path, "data", 'list_water_spins.txt') charge_list = os.path.join(path, "data", 'list_water_charges.txt') + def test_no_split(): nameout = tempfile.mktemp() sufix = "_alpha_beta.npy" rho.main(['--rep', 'atom', '--mol', mol_list, '--spin', spin_list, '--charge', charge_list, '--name', nameout]) reps = np.load(nameout+sufix) - assert(reps.shape == (9,414)) + assert (reps.shape == (9,414)) + def test_split_once(): nameout = tempfile.mktemp() sufix = "_alpha_beta.npy" rho.main(['--rep', 'atom', '--mol', mol_list, '--spin', spin_list, '--charge', charge_list, '--name', nameout, '--split']) - reps = np.load(nameout+sufix, allow_pickle=True) ## why is the `dtype` object ???? - assert(reps.shape == (3, 3, 414)) + reps = np.load(nameout+sufix, allow_pickle=True) # why is the `dtype` object ???? + assert (reps.shape == (3, 3, 414)) + def test_split_twice(): nameout = tempfile.mktemp() @@ -31,8 +34,8 @@ def test_split_twice(): rep_files = [nameout+"_"+os.path.basename(f).split(".")[0]+sufix for f in np.loadtxt(mol_list, dtype=str)] rho.main(['--rep', 'atom', '--mol', mol_list, '--spin', spin_list, '--charge', charge_list, '--name', nameout, '--split', "--split"]) for f in rep_files: - reps = np.load(f, allow_pickle=True) ## why is the `dtype` object ???? - assert(reps.shape == (3, 414)) + reps = np.load(f, allow_pickle=True) # why is the `dtype` object ???? + assert (reps.shape == (3, 414)) if __name__ == '__main__': diff --git a/tests/test_utils.py b/tests/test_utils.py index a789a75f..81085a67 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,29 +12,32 @@ def test_load_rep_from_list(): path = os.path.dirname(os.path.realpath(__file__)) paths2list = os.path.join(path, 'data/SPAHM_a_H2O/') - Xarray, symbols = ut.load_reps(paths2list+'reps_list.txt', from_list=True, \ - with_labels=True, local=True, sum_local=False, printlevel=0, progress=True, \ + Xarray, symbols = ut.load_reps(paths2list+'reps_list.txt', from_list=True, + with_labels=True, local=True, sum_local=False, printlevel=0, progress=True, srcdir=paths2list) - assert(Xarray.shape == (9,207)) - assert(len(symbols) == 9) + assert (Xarray.shape == (9,207)) + assert (len(symbols) == 9) + def test_load_reps(): path = os.path.dirname(os.path.realpath(__file__)) paths2X = os.path.join(path, 'data/SPAHM_a_H2O/X_H2O.npy') - X, symbols = ut.load_reps(paths2X, from_list=False, \ + X, symbols = ut.load_reps(paths2X, from_list=False, with_labels=True, local=True, sum_local=False, printlevel=0, progress=True) - assert(X.shape == (3,207)) - assert(len(symbols) == 3) + assert (X.shape == (3,207)) + assert (len(symbols) == 3) + -def test_load_reps_nosymbols(): #throws warning and returns empty list of symbols +def test_load_reps_nosymbols(): # throws warning and returns empty list of symbols path = os.path.dirname(os.path.realpath(__file__)) paths2X = os.path.join(path, 'data/H2O_spahm_b.npy_alpha_beta.npy') - X, symbols = ut.load_reps(paths2X, from_list=False, \ + X, symbols = ut.load_reps(paths2X, from_list=False, with_labels=True, local=True, sum_local=False, printlevel=0, progress=True) - assert(X.shape == (3,1108)) - assert(len(symbols) == 0) + assert (X.shape == (3,1108)) + assert (len(symbols) == 0) + def test_load_reps_singleatom(): path = os.path.dirname(os.path.realpath(__file__)) @@ -44,13 +47,14 @@ def test_load_reps_singleatom(): mol = compound.xyz_to_mol(xyzpath, basis="minao", charge=0, spin=0, ignore=False, unit='ANG', ecp=None) rep = atom.get_repr("atom", [mol], [xyzpath], 'LB', elements=["H", "O"], spin=[0], with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only np.save(tmpfile, rep) - X, symbols = ut.load_reps(tmpfile, from_list=False, \ + X, symbols = ut.load_reps(tmpfile, from_list=False, with_labels=True, local=True, sum_local=False, printlevel=0, progress=True) - assert(X.shape == (1,414)) - assert(len(symbols) == 1) - assert(symbols[0] == 'O') + assert (X.shape == (1,414)) + assert (len(symbols) == 1) + assert (symbols[0] == 'O') + def test_load_reps_singleatom_sum_local(): path = os.path.dirname(os.path.realpath(__file__)) @@ -60,11 +64,12 @@ def test_load_reps_singleatom_sum_local(): mol = compound.xyz_to_mol(xyzpath, basis="minao", charge=0, spin=0, ignore=False, unit='ANG', ecp=None) rep = atom.get_repr("atom", [mol], [xyzpath], 'LB', elements=["H", "O"], spin=[0], with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only np.save(tmpfile, rep) - X = ut.load_reps(tmpfile, from_list=False, \ + X = ut.load_reps(tmpfile, from_list=False, with_labels=False, local=True, sum_local=True, printlevel=0, progress=True) - assert(X.shape == (1,414)) + assert (X.shape == (1,414)) + def test_load_reps_singleatom_sum_local2(): path = os.path.dirname(os.path.realpath(__file__)) @@ -74,30 +79,33 @@ def test_load_reps_singleatom_sum_local2(): mol = compound.xyz_to_mol(xyzpath, basis="minao", charge=0, spin=0, ignore=False, unit='ANG', ecp=None) rep = atom.get_repr("atom", [mol], [xyzpath], 'LB', elements=["H", "O"], spin=[0], with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only np.save(tmpfile, rep) - X = ut.load_reps(tmpfile, from_list=False, \ + X = ut.load_reps(tmpfile, from_list=False, with_labels=False, local=True, sum_local=True, printlevel=0, progress=True) - assert(X.shape == (1,414)) + assert (X.shape == (1,414)) + def test_load_mols(): path = os.path.dirname(os.path.realpath(__file__)) molslist = [os.path.join(path, 'data', m) for m in ['H2O.xyz','H2O_dist.xyz','rotated_H2O.xyz']] mols = ut.load_mols(molslist, [0]*len(molslist), [None]*len(molslist), 'minao', progress=True) - assert(len(mols) == 3) + assert (len(mols) == 3) + def test_check_data_structure(): path = os.path.dirname(os.path.realpath(__file__)) test_files = [ - {'path2file': os.path.join(path, 'data', 'H2O_spahm-e_def2svp.npy'), 'is_local':False, 'is_single':True, 'is_labeled':False}, \ - {'path2file': os.path.join(path, 'data', 'H2O_spahm_b.npy_alpha_beta.npy'), 'is_local':True, 'is_single':True, 'is_labeled':False}, \ - {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/X_H2O.npy'), 'is_local':True, 'is_single':True, 'is_labeled':True}, \ - {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/Xs_H2O_array.npy'), 'is_local':True, 'is_single':False, 'is_labeled':True} \ + {'path2file': os.path.join(path, 'data', 'H2O_spahm-e_def2svp.npy'), 'is_local':False, 'is_single':True, 'is_labeled':False}, + {'path2file': os.path.join(path, 'data', 'H2O_spahm_b.npy_alpha_beta.npy'), 'is_local':True, 'is_single':True, 'is_labeled':False}, + {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/X_H2O.npy'), 'is_local':True, 'is_single':True, 'is_labeled':True}, + {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/Xs_H2O_array.npy'), 'is_local':True, 'is_single':False, 'is_labeled':True}, ] for ft in test_files: - is_single, is_labeled = ut.check_data_struct(ft['path2file'], local = ft['is_local']) - assert(ft['is_single'] == is_single) - assert(ft['is_labeled'] == is_labeled) + is_single, is_labeled = ut.check_data_struct(ft['path2file'], local=ft['is_local']) + assert (ft['is_single'] == is_single) + assert (ft['is_labeled'] == is_labeled) + def test_regroup_symbols(): path = os.path.dirname(os.path.realpath(__file__)) @@ -106,19 +114,20 @@ def test_regroup_symbols(): rep_count = {"H":2, "O":1} print(regrouped_species) for z,v in regrouped_species.items(): - assert(len(v) == rep_count[z]) + assert (len(v) == rep_count[z]) + def test_regroup_symbols_and_trim(): path = os.path.dirname(os.path.realpath(__file__)) filelist = os.path.join(path, "./data/list_water_lowdin-short-padded.txt") regrouped_species = ut.regroup_symbols(filelist, trim_reps=True) - #trimedlist = os.path.join(path, "./data/list_water_lowdin-short.txt") ## this is not possible because of inhomogenous array + # trimedlist = os.path.join(path, "./data/list_water_lowdin-short.txt") ## this is not possible because of inhomogenous array X_truth = np.load(path+"/data/SPAHM_a_H2O/X_H2O_lowdin-short.npy", allow_pickle=True) regrouped_truth = {z:[] for z in regrouped_species} for z,v in X_truth: regrouped_truth[z].append(v) for z in regrouped_species: - assert(np.allclose(regrouped_species[z], regrouped_truth[z])) + assert (np.allclose(regrouped_species[z], regrouped_truth[z])) if __name__ == '__main__':