From 7fd9f97f93c57cb90aeb654f171109938e7fe521 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Fri, 29 Nov 2024 12:43:15 +0100 Subject: [PATCH 01/23] Update pyscf version in env file --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index a60c7657..2ea08a41 100644 --- a/environment.yml +++ b/environment.yml @@ -42,7 +42,7 @@ dependencies: - pluggy==1.0.0 - py==1.11.0 - pyparsing==3.0.6 - - pyscf==2.0.1 + - pyscf==2.2.0 - pytest==6.2.5 - scipy==1.7.3 - toml==0.10.2 From 1cbc8dba47c6bf59a4bddfb4c9cd6f7dbe4433e4 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Fri, 29 Nov 2024 14:49:54 +0100 Subject: [PATCH 02/23] Rename metatensor labels for compatibility with featomic --- qstack/equio.py | 12 ++++++------ tests/data/H2O_dist.ccpvdz.ccpvdzjkfit.npz | Bin 9008 -> 9008 bytes tests/data/H2O_dist.ccpvdz.dm.npz | Bin 40708 -> 40644 bytes .../H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.npz | Bin 15608 -> 15544 bytes 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/qstack/equio.py b/qstack/equio.py index a649e37f..411f801a 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -6,14 +6,14 @@ import numbers vector_label_names = SimpleNamespace( - tm = ['spherical_harmonics_l', 'species_center'], + tm = ['o3_lambda', 'center_type'], block_prop = ['radial_channel'], block_samp = ['atom_id'], block_comp = ['spherical_harmonics_m'] ) matrix_label_names = SimpleNamespace( - tm = ['spherical_harmonics_l1', 'spherical_harmonics_l2', 'species_center1', 'species_center2'], + tm = ['o3_lambda1', 'o3_lambda2', 'center_type1', 'center_type2'], block_prop = ['radial_channel1', 'radial_channel2'], block_samp = ['atom_id1', 'atom_id2'], block_comp = ['spherical_harmonics_m1', 'spherical_harmonics_m2'] @@ -68,10 +68,10 @@ def _get_tsize(tensor): def _labels_to_array(labels): """Represents a set of metatensor labels as an array of the labels, using custom dtypes - + Args: labels (metatensor Labels): Labels - + Returns: labels (numpy ndarray[ndim=1, structured dtype]): the same labels """ @@ -179,7 +179,7 @@ def tensormap_to_vector(mol, tensor): llist = _get_llist(q, mol) il = {l: 0 for l in range(max(llist)+1)} for l in llist: - block = tensor.block(spherical_harmonics_l=l, species_center=q) + block = tensor.block(o3_lambda=l, center_type=q) id_samp = block.samples.position((iat,)) id_prop = block.properties.position((il[l],)) for m in _get_mrange(l): @@ -342,7 +342,7 @@ def tensormap_to_matrix(mol, tensor): il2 = {l2: 0 for l2 in range(max(llist2)+1)} for l2 in llist2: - block = tensor.block(spherical_harmonics_l1=l1, spherical_harmonics_l2=l2, species_center1=q1, species_center2=q2) + block = tensor.block(o3_lambda1=l1, o3_lambda2=l2, center_type1=q1, center_type2=q2) id_samp = block.samples.position((iat1, iat2)) id_prop = block.properties.position((il1[l1], il2[l2])) diff --git a/tests/data/H2O_dist.ccpvdz.ccpvdzjkfit.npz b/tests/data/H2O_dist.ccpvdz.ccpvdzjkfit.npz index ee3bd96dc679e183b7e8beb6e6308ef1926b55d0..da8a5beff7d4edb72731f8f37ab6084d0086428e 100644 GIT binary patch delta 86 zcmdnsw!uv(z?+#xgaHB+HT)iM6-^X!P|P=u&q>TpN=a1LQBb$ZG*Q>oQP5CNPR%Px kEs8IxEJ&R^LC|1wwy@ab4}z>4_nnnu0qNY_FTIc*0O2_rI{*Lx delta 84 zcmdnsw!uv(z?+#xgaHB+HD(=Ov3jDA!{lr+5t$ry9R+opOcQlY9R&^b;)2xV%+%ue iAv*xdZyGfK diff --git a/tests/data/H2O_dist.ccpvdz.dm.npz b/tests/data/H2O_dist.ccpvdz.dm.npz index 825b9152100afff14529f45587ae6d45adf55f0d..505bb16c10098b95ada5ffddd3f6bba43f0adbff 100644 GIT binary patch delta 1542 zcmY*YTWnNS6g`g?5ogL&=nOr@c7_zDPLSINwMr}8J9Op(4Kj+9M}@u^Qs{%WAOvb( zFol-BW@hndnYJ3xn5Za-@+FiiiC<0mFhm0i#Au?iKqVw51n}Mt%+&L-&)w&qz4u!C ztnItRu3Tizo&p{-n}nAw?YA2fr=C1r)5&a7>D{+$QBCbWN=(GV!ndC7cyrxq&qhiv zkOr%Lt9MsD{hJB&cp0|Cm{pE$$DaV;pkLTOVy*0bN^~JmQ z)q4LQFy#kq)fs{NsThup#l7`0xSAS+s^?7S3+YJ1^Ezf7lLyvDl^#Jwqs+!QK5dlw zH!%daV-qmyw_&U$j{9PDU%+#~ge^^iuboD4cG?`21WY*HBxoKkHOXvbdazlhpFnAL z(>-X{>}rfP=NlFn2q~-DSzftI%7o3A1b?eAd}f?-cy3@WTG|B*#NlR(8DF$Wv;x1j zxD7d`DYxDVq+m^}peF2SmDz(A@wv8LpM^WEF8?Y!-0Id36~GZv>EiE z!q?3qXibdrNln!KFrNzt1;4NP9uemA@StFS3s`*1XdTx36sEp}+debS`Xp+`EuWi> zCnK$GIlGpE>b7Mx7iZcO8jrheZhA|z7OXxb(P1rAGK~zi-e=}m=%u>3%Uw(+GQ$XW4l6Q5o&iE zZY+%4dJW1U8{#?yKmQ7WX;GNs+73axP}3n(1z&b3)Pji)H@%}-3raF$^(L~>Otc(X zZXC*rdjWhD1xig@@ZgIEP`#dys%e;Om!PrtWE)JM9X&7*?JC3fz zlWvvS6>yi1$a)RW1Go)Sz?Zc*CmDjomMqNphvK{LcbOrwE zb1gOmO8Hmcy}XVwzTct@w~ppAG47wv^|R5AWu z7_FY8Ri6tF1pj(HLHvej!+aaGvD18g>@IZ*`YsBOxo9B=gI$>{02x3=Jb4Hq1W1X(QsH^#<9#If&h zM%Q)^8~Z2&)DS;{Y?`PE8H|JDSW=R!E4`k*-s+tmZ^f(L12et<{|{5;>A`Q3 zAZIN_eQYd7N@5U5ib3jZ!$(HCTr)>Qsi6qq_7OK-MM|yA=f&WaT3P%N!*C{cA$);E z_#5VmE3w*Lf}zQP$Ll14Sqzo47QqrUz*8sD5!BVmoaSJ>PNp9~^)C7t>($){e|^4g zw~n#W^8KE&YI_Ezn$0L`lypDpXBewOiy?PN{Kj0IYA~Xr!A|dBxWT16WSHSKKbU}d zjS>~X+9>nv1t?b!a~wWsbc$c&7_P->5Uun%a+>mK5DiT-UBcxiMf5IWP~xM6UXIu9 zVM4DpOCnQZFi2rS7n&uWYR0$C>u40F7KOJj!I>5#DqHOI98R~mxGe#9TJpHj0$b}k zs=>Zig|^^)tBYHfVWiaveVd)`V{Kc$?q*U1|0_Wwy1jz2D+Sd5R+XBeSySd4)Nw{hWgd!X;%LRC2=5!p)kjc z0OsS?v3UJ6Yof8!p*1(brp?f~E5wbU328lc zs=!M(jYO%)IN@KVHCtU*_SM*PmM6<6_voD4LlK8!Z z;YLZcVpq1QbGI#wzphK75RAP}>cfk@iuigv!|3*Kfz-G37N&aLqIw5I(T+%ssTfs# zPM*CJpZ3Yrg}?h8;)Pud^Gb~xPmxioeTNf_o{$9hFie(ej1Bnfgrxt+6M>nLHa)s` z2PN^nm*J9ElR%_5d~`Q%_nUCO->$Z4vfnLs?P18>6Atz4fJ6n@H=uAT!^nUlu9q>K zE7OQt)j?l9=+<}ai)IRo;hdVOw1Qz>#X?LDTIn<%9df8L-!$arg;m%*WK!qD&Jzw` zVklov)zSBv50TO*@P-_`?I3;$nJ^x*({9`gxkc_F2J@jXHqS|kFM1vICuKT|@skeS g!8f9j&=6RPv|)+1qF~s~vyb5Tuyxk#T5TBr19UMHe*gdg diff --git a/tests/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.npz b/tests/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.npz index be2d1242946bcaf39d531ac23f148cdda02f76cb..73f862d1c69bbf7fc7871bbeb95c97e843bc1c33 100644 GIT binary patch delta 630 zcmYk3KWI}?6vpp;x2D7|NB8h35zCU^S*VyFI62*eX$w48uO$JHZ1Y3l- zxH>3OzoCee6v4qE+C@-Aa1})CkVQ&egd#{mii%M0Gcoks>74VO@BBFTXZ>Z}D9o@f zH)%y67p{gCnL!Ha#dkz#ETAX^Zr{|@whcIbO$xTP6tN$B$YH;~X z-+`hSVFONrnzOa;q`}!2k!NKu-li4YjB)l!CQ$p`xTS^Vb4NPE1{&H4{1Spmh2W85 z8-%F`SIlW?$A!mcmR`oTY00O=LEGRlP$PhgaW4IFqY|HzwmoRXv(nx%?8bBQdv8Z+ z{#UZ~A@YE?Xx0;)-ocwhj*ekJVbKv-$vl&M1Vvv<@ggaBMXvg7Ud6r0=1r(plPr$b`ipF``F25S> zFsAT4?Zegtr+4vNNcAYe{pcWGi^Zi3r@gqHIm>p(2`;EZgN2!H@mYk7iMQn>Q$uP1i`xNwxMqh!+Z06@BMi5yz{#=S(|1% z0>q10GC6BSH#Vb}1jwS@X=fVE4kaoHQBSpXZJv@UQIb-NJD$?>rvBGkOqbMPTKvKilU{ayx}6*0L6zMO;?tXV}tY9~i+C zCa|7U@kt7L4?pD;zuH5{`K3$s#rBxQm>k5sCiv~0+lo!8(^FX0EG5y4zuF`Z?dra2 z|1B5B1;5_6t>{?T2_He$=Z*WO3AUdTZ1_UBX{gY1g6-Q)u+9T8WQyPF@0!QaltXyo z2$ofX)2iDbMdvV>pHfnLaV}q>ckw81a@$$&CU_2Sv>5tzp_B)Z)+d!%5KX!77yegQq4qCL>p$I`F;wf512RP~gM_gdlT@rnW zk42Lz- zi-P}2cOgr-Y((&2LeTekIbpI^hTvExh-=Q^cv;XRsFV*gnI+hh4F_w}J Date: Fri, 31 Oct 2025 18:02:14 +0100 Subject: [PATCH 03/23] Add copilot-generated docstring --- qstack/basis_opt/basis_tools.py | 49 +++- qstack/basis_opt/opt.py | 51 ++++ qstack/c2mio.py | 46 +++ qstack/compound.py | 87 ++++-- qstack/equio.py | 110 +++++--- qstack/fields/decomposition.py | 70 +++-- qstack/fields/density2file.py | 31 ++- qstack/fields/dm.py | 13 +- qstack/fields/dori.py | 293 +++++++++----------- qstack/fields/excited.py | 19 +- qstack/fields/hf_otpd.py | 28 +- qstack/fields/hirshfeld.py | 62 +++-- qstack/fields/moments.py | 36 ++- qstack/mathutils/fps.py | 14 +- qstack/mathutils/matrix.py | 13 +- qstack/mathutils/wigner.py | 177 +++++++----- qstack/orcaio.py | 109 ++++---- qstack/qml/b2r2.py | 47 ++++ qstack/qml/slatm.py | 20 ++ qstack/regression/condition.py | 1 + qstack/regression/cross_validate_results.py | 1 + qstack/regression/final_error.py | 1 + qstack/regression/global_kernels.py | 85 ++++-- qstack/regression/hyperparameters.py | 1 + qstack/regression/kernel.py | 1 + qstack/regression/kernel_utils.py | 39 ++- qstack/regression/local_kernels.py | 48 +++- qstack/regression/oos.py | 1 + qstack/regression/parser.py | 57 ++++ qstack/regression/regression.py | 1 + qstack/spahm/LB2020guess.py | 73 +++++ qstack/spahm/compute_spahm.py | 110 +++++--- qstack/spahm/guesses.py | 156 ++++++++--- qstack/spahm/rho/Dmatrix.py | 82 +++++- qstack/spahm/rho/atom.py | 11 + qstack/spahm/rho/atomic_density.py | 21 ++ qstack/spahm/rho/bond.py | 11 + qstack/spahm/rho/bond_selected.py | 35 +++ qstack/spahm/rho/compute_rho_spahm.py | 122 ++++---- qstack/spahm/rho/dmb_rep_atom.py | 84 ++++++ qstack/spahm/rho/dmb_rep_bond.py | 85 ++++++ qstack/spahm/rho/lowdin.py | 39 +++ qstack/spahm/rho/parser.py | 18 ++ qstack/spahm/rho/sym.py | 57 ++++ qstack/spahm/rho/utils.py | 56 ++++ qstack/tools.py | 113 +++++--- qstack/tree.dat | 55 ---- 47 files changed, 1932 insertions(+), 707 deletions(-) delete mode 100644 qstack/tree.dat diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py index c6481f76..380b2fea 100644 --- a/qstack/basis_opt/basis_tools.py +++ b/qstack/basis_opt/basis_tools.py @@ -4,15 +4,14 @@ def energy_mol(newbasis, moldata): - """Computes overlap and 2-/3-centers ERI matrices. + """Computes energy for basis optimization. Args: - mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix. - auxmol (pyscf Mole): pyscf Mole object holding molecular structure, composition and the auxiliary basis set. + newbasis (dict): New basis set definition. + moldata (dict): Dictionary containing molecular data including mol, rho, coords, weights, and self. Returns: - numpy ndarray: Overlap matrix, 2-centers and 3-centers ERI matrices. - + float: Energy value for the given basis. """ mol = moldata['mol' ] rho = moldata['rho' ] @@ -31,14 +30,18 @@ def energy_mol(newbasis, moldata): def gradient_mol(nexp, newbasis, moldata): - """ + """Computes energy and gradient for basis optimization. Args: - nexp(): - newbasis(): - moldata(pyscf Mole): pyscf Mole object holding molecular structure, composition and the auxiliary basis set + nexp (int): Number of exponents. + newbasis (dict): New basis set definition. + moldata (dict): Dictionary containing molecular data including mol, rho, coords, weights, + self, idx, centers, and distances. Returns: + tuple: A tuple containing: + - E (float): Energy value. + - dE_da (numpy.ndarray): Gradient of energy with respect to exponents. """ mol = moldata['mol' ] @@ -89,15 +92,15 @@ def gradient_mol(nexp, newbasis, moldata): def exp2basis(exponents, elements, basis): - """ + """Convert exponents array to basis set format. - Argas: - exponents(): - elements(): - basis(): + Args: + exponents (numpy.ndarray): Array of basis function exponents. + elements (list): List of element symbols. + basis (dict): Template basis set definition. Returns: - newbasis(): + dict: New basis set with updated exponents. """ i = 0 newbasis = copy.deepcopy(basis) @@ -109,6 +112,16 @@ def exp2basis(exponents, elements, basis): def cut_myelements(x, myelements, bf_bounds): + """Extract subset of array corresponding to specified elements. + + Args: + x (numpy.ndarray): Input array. + myelements (list): List of element symbols to extract. + bf_bounds (dict): Dictionary mapping elements to their index bounds. + + Returns: + numpy.ndarray: Concatenated array containing only specified elements. + """ x1 = [] for q in myelements: bounds = bf_bounds[q] @@ -118,6 +131,12 @@ def cut_myelements(x, myelements, bf_bounds): def printbasis(basis, f): + """Print basis set in JSON-like format to file. + + Args: + basis (dict): Basis set definition. + f (file): File object to write to. + """ print('{', file=f) for q, b in basis.items(): print(' "'+q+'": [', file=f) diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index f6fc7d53..90278288 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -26,6 +26,14 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in= def energy(x): + """Compute total energy for given exponents. + + Args: + x (numpy.ndarray): Log of exponents. + + Returns: + float: Total energy across all molecules. + """ exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) E = 0.0 @@ -34,6 +42,16 @@ def energy(x): return E def gradient(x): + """Compute total energy and gradient for given exponents. + + Args: + x (numpy.ndarray): Log of exponents. + + Returns: + tuple: A tuple containing: + - E (float): Total energy. + - dE_dx (numpy.ndarray): Gradient with respect to log(exponents). + """ exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) @@ -56,9 +74,28 @@ def gradient(x): return E, dE_dx def gradient_only(x): + """Compute only the gradient (wrapper for optimization algorithms). + + Args: + x (numpy.ndarray): Log of exponents. + + Returns: + numpy.ndarray: Gradient with respect to log(exponents). + """ return gradient(x)[1] def read_bases(basis_files): + """Read basis set definitions from files or dicts. + + Args: + basis_files (list): List of file paths (str) or basis dicts. + + Returns: + dict: Combined basis set definition. + + Raises: + RuntimeError: If multiple sets for the same element are provided. + """ basis = {} for i in basis_files: if isinstance(i, str): @@ -76,6 +113,11 @@ def read_bases(basis_files): return basis def make_bf_start(): + """Create basis function index bounds for each element. + + Returns: + dict: Dictionary mapping elements to their [start, end] indices. + """ nbf = [len(basis[q]) for q in elements] bf_bounds = {} for i, q in enumerate(elements): @@ -84,6 +126,14 @@ def make_bf_start(): return bf_bounds def make_moldata(fname): + """Create molecular data dictionary from file or dict. + + Args: + fname (str or dict): Path to .npz file or dictionary containing rho data. + + Returns: + dict: Dictionary containing mol, rho, coords, weights, self, idx, centers, and distances. + """ if isinstance(fname, str): rho_data = np.load(fname) else: @@ -167,6 +217,7 @@ def make_moldata(fname): return newbasis def main(): + """Main function for basis set optimization command-line interface.""" import argparse parser = argparse.ArgumentParser(description='Optimize a density fitting basis set.') diff --git a/qstack/c2mio.py b/qstack/c2mio.py index e8fbc0e5..41eae921 100644 --- a/qstack/c2mio.py +++ b/qstack/c2mio.py @@ -7,6 +7,17 @@ def get_cell2mol_xyz(mol): + """Extract XYZ coordinates, charge, and spin from a cell2mol molecule object. + + Args: + mol: cell2mol molecule object. + + Returns: + tuple: A tuple containing: + - xyz (str): XYZ coordinate string. + - charge (int): Total charge of the molecule. + - spin (int): Spin of the molecule (alpha electrons - beta electrons). + """ f = io.StringIO() sys.stdout, stdout = f, sys.stdout mol.print_xyz() @@ -16,6 +27,18 @@ def get_cell2mol_xyz(mol): def get_cell(fpath, workdir='.'): + """Load a unit cell from a .cell or .cif file. + + Args: + fpath (str): Path to the input file (.cell or .cif). + workdir (str): Working directory for temporary files. Defaults to '.'. + + Returns: + cell2mol.unitcell: Unit cell object. + + Raises: + NotImplementedError: If the file extension is not .cell or .cif. + """ ext = os.path.splitext(fpath)[-1] if ext=='.cell': cell = load_binary(fpath) @@ -32,12 +55,35 @@ def get_cell(fpath, workdir='.'): def get_mol(cell, mol_idx=0, basis='minao', ecp=None): + """Extract a pyscf Mole object from a cell2mol unit cell. + + Args: + cell: cell2mol unit cell object. + mol_idx (int): Index of the molecule in the cell. Defaults to 0. + basis (str or dict): Basis set. Defaults to 'minao'. + ecp (str): Effective core potential. Defaults to None. + + Returns: + pyscf.gto.Mole: pyscf Mole object containing the molecule information. + """ mol = cell.moleclist[mol_idx] xyz, charge, spin = get_cell2mol_xyz(mol) return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp, read_string=True) def get_ligand(cell, mol_idx=0, lig_idx=0, basis='minao', ecp=None): + """Extract a ligand as a pyscf Mole object from a cell2mol unit cell. + + Args: + cell: cell2mol unit cell object. + mol_idx (int): Index of the molecule in the cell. Defaults to 0. + lig_idx (int): Index of the ligand. Defaults to 0. + basis (str or dict): Basis set. Defaults to 'minao'. + ecp (str): Effective core potential. Defaults to None. + + Returns: + pyscf.gto.Mole: pyscf Mole object containing the ligand information. + """ mol = cell.moleclist[mol_idx].ligands[lig_idx] xyz, charge, spin = get_cell2mol_xyz(mol) return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp, read_string=True) diff --git a/qstack/compound.py b/qstack/compound.py index 1019cc7f..f99bafb7 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -26,7 +26,14 @@ _re_float = re.compile(r'[+-]?[0-9]*?([0-9]\.|\.[0-9]|[0-9])[0-9]*?([eEdD][+-]?[0-9]+)?') def xyz_comment_line_parser(line): - """reads the 'comment' line of a XYZ file, and tries to infer its meaning""" + """Reads the 'comment' line of a XYZ file and tries to infer its meaning. + + Args: + line (str): Comment line from XYZ file. + + Returns: + dict: Dictionary containing parsed properties (charge, spin, etc.). + """ line = line.strip() if line == '': return {} @@ -86,16 +93,21 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit """Reads a molecular file in xyz format and returns a pyscf Mole object. Args: - inp (str): path of the xyz file to read / xyz fine contents if read_string==True - basis (str or dict): Basis set. - charge (int): Provide/override charge of the molecule. - spin (int): Provide/override spin of the molecule (alpha electrons - beta electrons). - ignore (bool): If assume molecule closed-shell an assign charge either 0 or -1 - unit (str): Provide/override units (Ang or Bohr) - ecp (str) : ECP to use + inp (str): Path of the xyz file to read, or xyz file contents if read_string==True. + basis (str or dict): Basis set. Defaults to "def2-svp". + charge (int): Provide/override charge of the molecule. Defaults to None. + spin (int): Provide/override spin of the molecule (alpha electrons - beta electrons). Defaults to None. + ignore (bool): If True, assume molecule is closed-shell and assign charge either 0 or -1. Defaults to False. + unit (str): Provide/override units (Ang or Bohr). Defaults to None. + ecp (str): ECP to use. Defaults to None. + parse_comment (bool): Whether to parse the comment line for properties. Defaults to False. + read_string (bool): Whether inp is a string containing xyz data rather than a file path. Defaults to False. Returns: - A pyscf Mole object containing the molecule information. + pyscf.gto.Mole: pyscf Mole object containing the molecule information. + + Raises: + RuntimeError: If units are not recognized or if minao basis requires ECP for heavy atoms. """ if read_string: @@ -168,11 +180,15 @@ def mol_to_xyz(mol, fout, fmt="xyz"): """Converts a pyscf Mole object into a molecular file in xyz format. Args: - pyscf Mole: pyscf Mole object. + mol (pyscf.gto.Mole): pyscf Mole object. fout (str): Name (including path) of the xyz file to write. + fmt (str): Output format. Defaults to "xyz". Returns: - A file in xyz format containing the charge, total spin and molecular coordinates. + str: String containing the xyz formatted data. + + Raises: + NotImplementedError: If fmt is not "xyz". """ fmt = fmt.lower() @@ -202,11 +218,12 @@ def make_auxmol(mol, basis, copy_ecp=False): """Builds an auxiliary Mole object given a basis set and a pyscf Mole object. Args: - mol (pyscf Mole): Original pyscf Mole object. + mol (pyscf.gto.Mole): Original pyscf Mole object. basis (str or dict): Basis set. + copy_ecp (bool): Whether to copy ECP from original molecule. Defaults to False. Returns: - An auxiliary pyscf Mole object. + pyscf.gto.Mole: Auxiliary pyscf Mole object. """ # Define attributes to the auxiliary Mole object and build it @@ -226,15 +243,14 @@ def rotate_molecule(mol, a, b, g, rad=False): """Rotate a molecule: transform nuclear coordinates given a set of Euler angles. Args: - mol (pyscf Mole): Original pyscf Mole object. + mol (pyscf.gto.Mole): Original pyscf Mole object. a (float): Alpha Euler angle. b (float): Beta Euler angle. g (float): Gamma Euler angle. - rad (bool) : Wheter the angles are in radians or not. - + rad (bool): Whether the angles are in radians. Defaults to False (degrees). Returns: - A pyscf Mole object with transformed coordinates. + pyscf.gto.Mole: pyscf Mole object with transformed coordinates. """ orig_coords = mol.atom_coords() @@ -253,13 +269,13 @@ def rotate_molecule(mol, a, b, g, rad=False): def fragments_read(frag_file): - """Loads fragement definition from a frag file. + """Loads fragment definition from a frag file. Args: frag_file (str): Name (including path) of the frag file to read. Returns: - A list of arrays containing the fragments. + list: List of numpy arrays containing the fragment indices. """ with open(frag_file) as f: fragments = [np.fromstring(line, dtype=int, sep=' ')-1 for line in f] @@ -269,12 +285,12 @@ def fragment_partitioning(fragments, prop_atom_inp, normalize=True): """Computes the contribution of each fragment. Args: - fragments (numpy ndarray): Fragment definition - prop_atom_inp (list of arrays or array): Coefficients densities. - normalize (bool): Normalized fragment partitioning. Defaults to True. + fragments (list): Fragment definition as list of numpy arrays. + prop_atom_inp (list or numpy.ndarray): Coefficients densities, either as list of arrays or single array. + normalize (bool): Whether to normalize fragment partitioning. Defaults to True. Returns: - A list of arrays or an array containing the contribution of each fragment. + list or numpy.ndarray: Contribution of each fragment. Returns list if input was list, array otherwise. """ if type(prop_atom_inp) is list: @@ -302,6 +318,15 @@ def fragment_partitioning(fragments, prop_atom_inp, normalize=True): def make_atom(q, basis): + """Create a single-atom molecule at the origin. + + Args: + q (str): Element symbol. + basis (str or dict): Basis set. + + Returns: + pyscf.gto.Mole: Single-atom pyscf Mole object. + """ mol = gto.Mole() mol.atom = q + " 0.0 0.0 0.0" mol.charge = 0 @@ -311,10 +336,18 @@ def make_atom(q, basis): return mol def singleatom_basis_enumerator(basis): - """Enumerates the different tensors of atomic orbitals within a 1-atom basis set - Each tensor is a $2l+2$-sized group of orbitals that share a radial function and $l$ value. - For each tensor, return the values of $l$, $n$ (an arbitrary radial-function counter that starts at 0), - as well as AO range + """Enumerates the different tensors of atomic orbitals within a 1-atom basis set. + + Each tensor is a 2l+1-sized group of orbitals that share a radial function and l value. + + Args: + basis (list): Basis set definition in pyscf format. + + Returns: + tuple: A tuple containing: + - l_per_bas (list): Angular momentum quantum number l for each basis function. + - n_per_bas (list): Radial function counter n (starting at 0) for each basis function. + - ao_starts (list): Starting index in AO array for each basis function. """ ao_starts = [] l_per_bas = [] diff --git a/qstack/equio.py b/qstack/equio.py index d66fe674..19fd8715 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -25,7 +25,16 @@ def _get_mrange(l): - # for l=1, the pyscf order is x,y,z (1,-1,0) + """Get the m quantum number range for a given angular momentum l. + + For l=1, returns pyscf order: x,y,z which is (1,-1,0). + + Args: + l (int): Angular momentum quantum number. + + Returns: + tuple or range: Magnetic quantum numbers for the given l. + """ if l==1: return (1,-1,0) else: @@ -33,13 +42,14 @@ def _get_mrange(l): def _get_llist(q, mol): - """ + """Get list of angular momentum quantum numbers for basis functions of an element. + Args: - q (int): Atomic number. - mol (pyscf Mole): pyscf Mole object. + q (int or str): Atomic number or element symbol. + mol (pyscf.gto.Mole): pyscf Mole object. Returns: - A list + list: List of angular momentum quantum numbers for each basis function. """ # TODO other basis formats? @@ -59,21 +69,21 @@ def _get_tsize(tensor): """Computes the size of a tensor. Args: - tensor (metatensor TensorMap): Tensor. + tensor (metatensor.TensorMap): Tensor. Returns: - The size of the tensor as an integer. + int: Total size of the tensor (total number of elements). """ return sum([np.prod(tensor.block(key).values.shape) for key in tensor.keys]) def _labels_to_array(labels): - """Represents a set of metatensor labels as an array of the labels, using custom dtypes + """Represents a set of metatensor labels as an array of the labels, using custom dtypes. Args: - labels (metatensor Labels): Labels + labels (metatensor.Labels): Labels object. Returns: - labels (numpy ndarray[ndim=1, structured dtype]): the same labels + numpy.ndarray: 1D structured array containing the same labels. """ values = labels.values dtype = [ (name,values.dtype) for name in labels.names] @@ -83,11 +93,11 @@ def vector_to_tensormap(mol, c): """Transform a vector into a tensor map. Used by :py:func:`array_to_tensormap`. Args: - mol (pyscf Mole): pyscf Mole object. - v (numpy ndarray): Vector. + mol (pyscf.gto.Mole): pyscf Mole object. + c (numpy.ndarray): Vector to transform. Returns: - A metatensor tensor map. + metatensor.TensorMap: Tensor map representation of the vector. """ atom_charges = list(mol.atom_charges()) @@ -158,14 +168,17 @@ def vector_to_tensormap(mol, c): def tensormap_to_vector(mol, tensor): - """Transform a tensor map into a vector. :py:func:`Used by tensormap_to_array`. + """Transform a tensor map into a vector. Used by :py:func:`tensormap_to_array`. Args: - mol (pyscf Mole): pyscf Mole object. - tensor (metatensor TensorMap): Tensor. + mol (pyscf.gto.Mole): pyscf Mole object. + tensor (metatensor.TensorMap): Tensor to transform. Returns: - A numpy ndarray (vector). + numpy.ndarray: 1D array (vector) representation. + + Raises: + RuntimeError: If tensor size does not match mol.nao. """ nao = _get_tsize(tensor) @@ -191,17 +204,26 @@ def tensormap_to_vector(mol, tensor): def matrix_to_tensormap(mol, dm): - """ Transform a matrix into a tensor map. Used by :py:func:`array_to_tensormap`. + """Transform a matrix into a tensor map. Used by :py:func:`array_to_tensormap`. Args: - mol (pyscf Mole): pyscf Mole object. - v (numpy ndarray): Matrix. + mol (pyscf.gto.Mole): pyscf Mole object. + dm (numpy.ndarray): Matrix to transform. Returns: - A metatensor tensor map. + metatensor.TensorMap: Tensor map representation of the matrix. """ def pairs(list1, list2): + """Generate all pairs from two lists. + + Args: + list1 (list): First list. + list2 (list): Second list. + + Returns: + numpy.ndarray: Array of all (i,j) pairs. + """ return np.array([(i,j) for i in list1 for j in list2]) atom_charges = list(mol.atom_charges()) @@ -316,11 +338,14 @@ def tensormap_to_matrix(mol, tensor): """Transform a tensor map into a matrix. Used by :py:func:`tensormap_to_array`. Args: - mol (pyscf Mole): pyscf Mole object. - tensor (metatensor TensorMap): Tensor. + mol (pyscf.gto.Mole): pyscf Mole object. + tensor (metatensor.TensorMap): Tensor to transform. Returns: - A numpy ndarray (matrix). + numpy.ndarray: 2D array (matrix) representation. + + Raises: + RuntimeError: If tensor size does not match mol.nao * mol.nao. """ nao2 = _get_tsize(tensor) @@ -358,14 +383,17 @@ def tensormap_to_matrix(mol, tensor): return dm def array_to_tensormap(mol, v): - """ Transform an array into a tensor map. + """Transform an array into a tensor map. Args: - mol (pyscf Mole): pyscf Mole object. - v (numpy ndarray): Array. It can be a vector or a matrix. + mol (pyscf.gto.Mole): pyscf Mole object. + v (numpy.ndarray): Array to transform. Can be a vector (1D) or matrix (2D). Returns: - A metatensor tensor map. + metatensor.TensorMap: Tensor map representation of the array. + + Raises: + ValueError: If array dimension is not 1 or 2. """ if v.ndim==1: return vector_to_tensormap(mol, v) @@ -379,11 +407,14 @@ def tensormap_to_array(mol, tensor): """Transform a tensor map into an array. Args: - mol (pyscf Mole): pyscf Mole object. - tensor (metatensor TensorMap): Tensor. + mol (pyscf.gto.Mole): pyscf Mole object. + tensor (metatensor.TensorMap): Tensor to transform. Returns: - A numpy ndarray. Matrix or vector, depending on the key names of the tensor. + numpy.ndarray: Array representation (1D vector or 2D matrix). + + Raises: + RuntimeError: If tensor key names don't match expected format. """ if tensor.keys.names==vector_label_names.tm: @@ -395,13 +426,16 @@ def tensormap_to_array(mol, tensor): def join(tensors): - """Merge two or more tensors with the same label names avoiding information duplictaion. + """Merge two or more tensors with the same label names avoiding information duplication. Args: - tensors (list): List of metatensor TensorMap. + tensors (list): List of metatensor.TensorMap objects. Returns: - A metatensor TensorMap containing the information of all the input tensors. + metatensor.TensorMap: Merged tensor containing information from all input tensors. + + Raises: + RuntimeError: If tensors have different label names. """ if not all(tensor.keys.names==tensors[0].keys.names for tensor in tensors): @@ -446,10 +480,14 @@ def split(tensor): """Split a tensor based on the molecule information stored within the input TensorMap. Args: - tensor (metatensor TensorMap): Tensor containing several molecules. + tensor (metatensor.TensorMap): Tensor containing several molecules. Returns: - N metatensor TensorMap, where N is equal to the total number of diferent molecules stored within the input TensorMap. + list or dict: Collection of metatensor.TensorMap objects, one per molecule. + Returns list if molecule indices are continuous, dict otherwise. + + Raises: + RuntimeError: If tensor does not contain multiple molecules. """ if tensor.sample_names[0]!=_molid_name: diff --git a/qstack/fields/decomposition.py b/qstack/fields/decomposition.py index b9f2b085..100481de 100644 --- a/qstack/fields/decomposition.py +++ b/qstack/fields/decomposition.py @@ -62,10 +62,15 @@ def get_self_repulsion(mol, dm): return np.einsum('ij,ij', j, dm) def decomposition_error(self_repulsion, c, eri2c): - """Computes the decomposition error. + """Computes the decomposition error for density fitting. - .. todo:: - Write the complete docstring + Args: + self_repulsion (float): Self-repulsion energy from the original density matrix. + c (numpy ndarray): 1D array of density expansion coefficients. + eri2c (numpy ndarray): 2D array of 2-center electron repulsion integrals. + + Returns: + float: The decomposition error. """ return self_repulsion - c @ eri2c @ c @@ -100,12 +105,15 @@ def get_coeff(dm, eri2c, eri3c, slices=None): return c def _get_inv_metric(mol, metric, v): - """ + """Computes the inverse metric applied to a vector. Args: mol (pyscf Mole): pyscf Mole object. - metric (str): unit, overlap or coulomb. - v (numpy ndarray): Number of electrons decomposed into a vector. + metric (str or numpy ndarray): Metric type ('unit', 'overlap', 'coulomb') or a metric matrix. + v (numpy ndarray): Vector to apply the inverse metric to. + + Returns: + numpy ndarray: Result of applying the inverse metric to the input vector. """ if isinstance(metric, str): metric = metric.lower() @@ -121,18 +129,19 @@ def _get_inv_metric(mol, metric, v): def correct_N_atomic(mol, N, c0, metric='u'): - """ + """Corrects decomposition coefficients to match the target electron count per atom. + + Uses Lagrange multipliers to enforce the correct number of electrons per atom + while minimizing changes to the decomposition coefficients. Args: - mol (pyscf Mole): pyscf Mole objec used for the computation of the density matrix. - N (int): Number of electrons. Defaults to None. - c0 (1D numpy array): Decomposition coefficients. - metric (str): .Defaults to 'u'. + mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix. + N (numpy ndarray): Target number of electrons per atom. + c0 (numpy ndarray): 1D array of initial decomposition coefficients. + metric (str): Metric type for correction ('u' for unit, 's' for overlap, 'j' for coulomb). Defaults to 'u'. Returns: - - .. todo:: - Write the complete docstring. + numpy ndarray: Corrected decomposition coefficients (1D array). """ Q = number_of_electrons_deco_vec(mol, per_atom=True) @@ -144,20 +153,17 @@ def correct_N_atomic(mol, N, c0, metric='u'): def correct_N(mol, c0, N=None, mode='Lagrange', metric='u'): - """ + """Corrects decomposition coefficients to match the target total electron count. Args: - mol (pyscf Mole): pyscf Mole objec used for the computation of the density matrix. - c0 (1D numpy array): Decomposition coefficients. - N (int): Number of electrons. Defaults to None. - mode (str): Defaults to Lagrange. - metric (str): Defaults to u. + mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix. + c0 (numpy ndarray): 1D array of initial decomposition coefficients. + N (int, optional): Target number of electrons. If None, uses mol.nelectron. Defaults to None. + mode (str): Correction method ('scale' or 'lagrange'). Defaults to 'Lagrange'. + metric (str): Metric type for Lagrange correction ('u', 's', or 'j'). Defaults to 'u'. Returns: - A numpy ndarray containing a set of expansion coefficients taking into account the correct total number of electrons. - - .. todo:: - Write the complete docstring. + numpy ndarray: Corrected decomposition coefficients (1D array). """ mode = mode.lower() @@ -178,10 +184,20 @@ def correct_N(mol, c0, N=None, mode='Lagrange', metric='u'): def number_of_electrons_deco_vec(mol, per_atom=False): - """ + """Computes the electron number decomposition vector for basis functions. - .. todo:: - Write the complete docstring. + For s-functions (l=0), computes the integral of the basis function which + corresponds to its contribution to the electron count. + + Args: + mol (pyscf Mole): pyscf Mole object. + per_atom (bool): If True, returns a 2D array with per-atom contributions. + If False, returns a 1D array. Defaults to False. + + Returns: + numpy ndarray: If per_atom is False, 1D array of shape (nao,) with electron + contributions for each basis function. If per_atom is True, + 2D array of shape (nao, natm) with per-atom contributions. """ if per_atom: Q = np.zeros((mol.nao,mol.natm)) diff --git a/qstack/fields/density2file.py b/qstack/fields/density2file.py index 585114c0..8ad800c8 100644 --- a/qstack/fields/density2file.py +++ b/qstack/fields/density2file.py @@ -5,15 +5,23 @@ from .decomposition import number_of_electrons_deco def coeffs_to_cube(mol, coeffs, cubename, nx = 80, ny = 80, nz = 80, resolution = 0.1, margin = 3.0): - """Saves the density in a cube file. + """Saves the electron density to a cube file format. + + Evaluates the density from expansion coefficients on a 3D grid and writes + it to a Gaussian cube file for visualization. Args: - mol (pyscf Mole): pyscf Mole. - coeffs (numpy ndarray): Expansion coefficients. - cubename (str): Name of the cubo file. + mol (pyscf Mole): pyscf Mole object. + coeffs (numpy ndarray): 1D array of density expansion coefficients. + cubename (str): Output filename (without .cube extension). + nx (int): Number of grid points in x direction. Defaults to 80. + ny (int): Number of grid points in y direction. Defaults to 80. + nz (int): Number of grid points in z direction. Defaults to 80. + resolution (float): Grid spacing in Bohr. Defaults to 0.1. + margin (float): Extra space around molecule in Bohr. Defaults to 3.0. Returns: - A new or overwrited file named .cube + None: Creates a file named .cube on disk. """ # Make grid @@ -31,15 +39,18 @@ def coeffs_to_cube(mol, coeffs, cubename, nx = 80, ny = 80, nz = 80, resolution def coeffs_to_molden(mol, coeffs, moldenname): - """Saves the density in a molden file. + """Saves the electron density to a MOLDEN file format. + + Writes the density represented by expansion coefficients to a MOLDEN file + which can be visualized with various quantum chemistry visualization tools. Args: - mol (pyscf Mole): pyscf Mole. - coeffs (numpy ndarray): Expansion coefficients. - moldenname (str): File name of the molden file. + mol (pyscf Mole): pyscf Mole object. + coeffs (numpy ndarray): 1D array of density expansion coefficients. + moldenname (str): Output filename for the MOLDEN file. Returns: - A new or overwrited file named .molden + None: Creates a file named .molden on disk. """ with open(moldenname, 'w') as f: diff --git a/qstack/fields/dm.py b/qstack/fields/dm.py index 11d27564..4d59494a 100644 --- a/qstack/fields/dm.py +++ b/qstack/fields/dm.py @@ -88,10 +88,17 @@ def sphericalize_density_matrix(mol, dm): return spherical_dm def get_converged_mf(mol, func, dm0=None): - """ + """Performs SCF calculation and returns both the mean-field object and density matrix. - .. todo:: - Write the complete docstring, and merge with get_converged_dm() + Args: + mol (pyscf Mole): pyscf Mole object. + func (str): Exchange-correlation functional. + dm0 (numpy ndarray, optional): Initial guess for density matrix. Defaults to None. + + Returns: + tuple: A tuple containing: + - mf (pyscf.dft.rks.RKS or pyscf.dft.uks.UKS): Converged mean-field object. + - dm (numpy ndarray): Converged density matrix in AO-basis. """ if mol.multiplicity == 1: diff --git a/qstack/fields/dori.py b/qstack/fields/dori.py index 50c82d5e..ca679321 100644 --- a/qstack/fields/dori.py +++ b/qstack/fields/dori.py @@ -6,26 +6,25 @@ def eval_rho_dm(mol, ao, dm, deriv=2): - r'''Calculate the electron density and the density derivatives. + r'''Calculate electron density and its derivatives from a density matrix. - Taken from pyscf/dft/numint.py and modified to return second derivative matrices. + Modified from pyscf/dft/numint.py to return full second derivative matrices + needed for DORI calculations. Args: - mol : an instance of :class:`pyscf.gto.Mole` - ao : 3D array of shape (*,ngrids,nao): - ao[0] : atomic oribitals values on the grid - ao[1:4] : atomic oribitals derivatives values (if deriv>=1) - ao[4:10] : atomic oribitals second derivatives values (if deriv==2) - dm : 2D array of (nao,nao) - Density matrix (assumed Hermitian) - Kwargs: - deriv : int - Compute with up to `deriv`-order derivatives + mol (pyscf.gto.Mole): pyscf Mole object. + ao (numpy ndarray): 3D array of shape (nderiv, ngrids, nao) where: + - ao[0]: atomic orbital values on the grid + - ao[1:4]: first derivatives (if deriv>=1) + - ao[4:10]: second derivatives in triangular form (if deriv==2) + dm (numpy ndarray): 2D array (nao, nao) - Hermitian density matrix in AO basis. + deriv (int): Maximum derivative order to compute (0, 1, or 2). Defaults to 2. Returns: - 1D array of size ngrids to store electron density - 2D array of (3,ngrids) to store density derivatives (if deriv>=1) - 3D array of (3,3,ngrids) to store 2nd derivatives (if deriv==2) + tuple: Depending on deriv value: + - deriv=0: rho (1D array of size ngrids) + - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) + - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) ''' AO, dAO_dr, d2AO_dr2 = np.split(ao, [1,4]) @@ -53,24 +52,24 @@ def eval_rho_dm(mol, ao, dm, deriv=2): def eval_rho_df(ao, c, deriv=2): - r'''Calculate the electron density and the density derivatives - for a fitted density. + r'''Calculate electron density and its derivatives from density-fitting coefficients. + + Computes density and derivatives directly from fitted/decomposed density + representation using expansion coefficients. Args: - ao : 3D array of shape (*,ngrids,nao): - ao[0] : atomic oribitals values on the grid - ao[1:4] : atomic oribitals derivatives values (if deriv>=1) - ao[4:10] : atomic oribitals second derivatives values (if deriv==2) - c : 1D array of (nao,) - density fitting coefficients - Kwargs: - deriv : int - Compute with up to `deriv`-order derivatives + ao (numpy ndarray): 3D array of shape (nderiv, ngrids, nao) where: + - ao[0]: atomic orbital values on the grid + - ao[1:4]: first derivatives (if deriv>=1) + - ao[4:10]: second derivatives in triangular form (if deriv==2) + c (numpy ndarray): 1D array of density fitting/expansion coefficients. + deriv (int): Maximum derivative order to compute (0, 1, or 2). Defaults to 2. Returns: - 1D array of size ngrids to store electron density - 2D array of (3,ngrids) to store density derivatives (if deriv>=1) - 3D array of (3,3,ngrids) to store 2nd derivatives (if deriv==2) + tuple: Depending on deriv value: + - deriv=0: rho (1D array of size ngrids) + - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) + - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) ''' maxdim = 1 if deriv==0 else (4 if deriv==1 else 10) @@ -87,26 +86,27 @@ def eval_rho_df(ao, c, deriv=2): def compute_rho(mol, coords, dm=None, c=None, deriv=2, eps=1e-4): - r'''Wrapper to calculate the electron density and the density derivatives. + r'''Wrapper to calculate electron density and derivatives efficiently. + + Computes density and its spatial derivatives on a grid from either a density + matrix or fitting coefficients, with optimizations for numerical stability. Args: - mol : an instance of :class:`pyscf.gto.Mole` - coords : 2D array of (ngrids,3) - Grid coordinates (in Bohr) - Kwargs: - dm : 2D array of (nao,nao) - Density matrix (assumed Hermitian) (confilicts with c) - c : 1D array of (nao) - density fitting coefficients (confilicts with dm) - deriv : int - Compute with up to `deriv`-order derivatives - eps : float - Min. density to compute the derivatives for + mol (pyscf.gto.Mole): pyscf Mole object. + coords (numpy ndarray): 2D array (ngrids, 3) of grid coordinates in Bohr. + dm (numpy ndarray, optional): 2D density matrix in AO basis. Conflicts with c. + c (numpy ndarray, optional): 1D density fitting coefficients. Conflicts with dm. + deriv (int): Maximum derivative order (0, 1, or 2). Defaults to 2. + eps (float): Minimum density threshold below which derivatives are set to zero. Defaults to 1e-4. Returns: - 1D array of size ngrids to store electron density - 2D array of (3,ngrids) to store density derivatives (if deriv>=1) - 3D array of (3,3,ngrids) to store 2nd derivatives (if deriv==2) + tuple: Depending on deriv value: + - deriv=0: rho (1D array) + - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) + - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) + + Raises: + RuntimeError: If both or neither of dm and c are provided. ''' if (c is None)==(dm is None): raise RuntimeError('Use either density matrix (dm) or density fitting coefficients (c)') @@ -135,19 +135,19 @@ def compute_rho(mol, coords, dm=None, c=None, deriv=2, eps=1e-4): def compute_s2rho(rho, d2rho_dr2, eps=1e-4): - """Compute the sign of 2nd eigenvalue of density Hessian × density + """Computes signed density based on second eigenvalue of the density Hessian. + + Useful for distinguishing bonding vs. non-bonding regions. The sign of the + second eigenvalue of the Hessian indicates local density topology. Args: - rho : 1D array of (ngrids) - Electron density - d2rho_dr2 : 3D array of (3,3,ngrids) - Density 2nd derivatives - Kwargs: - eps : float - density threshold + rho (numpy ndarray): 1D array (ngrids,) of electron density values. + d2rho_dr2 (numpy ndarray): 3D array (3, 3, ngrids) of density second derivatives (Hessian). + eps (float): Density threshold below which values are set to zero. Defaults to 1e-4. + Returns: - 1D array of (ngrids) --- electron density * sgn(second eigenvalue of d^2rho/dr^2) - if density>=eps else 0 + numpy ndarray: 1D array (ngrids,) containing rho * sign(λ₂) where λ₂ is the + second eigenvalue of the Hessian, or 0 where rho < eps. """ s2rho = np.zeros_like(rho) idx = np.where(rho>=eps) @@ -156,38 +156,29 @@ def compute_s2rho(rho, d2rho_dr2, eps=1e-4): def compute_dori(rho, drho_dr, d2rho_dr2, eps=1e-4): - r""" Inner function to compute DORI analytically + r"""Computes Density Overlap Regions Indicator (DORI) analytically. + + DORI is a density-based descriptor for identifying covalent bonding regions, + with values close to 1 indicating strong electron sharing (covalent bonds). Args: - rho : 1D array of (ngrids) - Electron density - drho_dr : 2D array of (3,ngrids) - Density derivatives - d2rho_dr2 : 3D array of (3,3,ngrids) - Density 2nd derivatives - Kwargs: - eps : float - Density threshold (if |rho|=eps)[0] @@ -207,26 +198,23 @@ def compute_dori(rho, drho_dr, d2rho_dr2, eps=1e-4): def compute_dori_num(mol, coords, dm=None, c=None, eps=1e-4, dx=1e-4): - r""" Inner function to compute DORI seminumerically - See documentation to compute_dori(). + r"""Computes DORI using numerical differentiation (semi-numerical approach). + + Alternative to analytical DORI calculation using finite differences for + derivatives of k². Useful for validation or when analytical gradients + are problematic. Args: - mol : an instance of :class:`pyscf.gto.Mole` - coords : 2D array of (ngrids,3) - Grid coordinates (in Bohr) - Kwargs: - dm : 2D array of (nao,nao) - Density matrix (assumed Hermitian) (confilicts with c) - c : 1D array of (nao) - density fitting coefficients (confilicts with dm) - eps : float - Density threshold (if |rho|=eps else 0 (only with alg='analytical'). + tuple: (dori, rho, s2rho) where: + - dori: 1D array (ngrids,) of DORI values + - rho: 1D array (ngrids,) of electron density + - s2rho: 1D array (ngrids,) of signed density (None if numerical) """ max_size = int(mem * 2**30) # mem * 1 GiB @@ -325,51 +306,43 @@ def dori(mol, dm=None, c=None, nx=80, ny=80, nz=80, resolution=RESOLUTION, margin=BOX_MARGIN, cubename=None, dx=1e-4, mem=1, progress=False): - """Compute DORI + """High-level interface to compute DORI with automatic grid generation and file output. + + Computes the Density Overlap Regions Indicator (DORI) for analyzing chemical + bonding. Automatically generates appropriate grids and optionally saves results + to cube files for visualization. Args: - mol : an instance of :class:`pyscf.gto.Mole` - Kwargs: - dm : 2D array of (nao,nao) - Density matrix (confilicts with c) - c : 1D array of (nao) - Density fitting coefficients (confilicts with dm) - eps : float - density threshold for DORI - alg : str - [a]nalytical or [n]umerical computation - grid_type : str - Type of grid, 'dft' for a DFT grid and 'cube' for a cubic grid. - grid_level : int - For a DFT grid, the grid level. - nx, ny, nz : int - For a cubic grid, - the number of grid point divisions in x, y, z directions. - Conflicts to keyword resolution. - resolution: float - For a cubic grid, - the resolution of the mesh grid in the cube box. - Conflicts to keywords nx, ny, nz. - cubename : str - For a cubic grid, - name for the cube files to save the results to. - mem : float - max. memory (GiB) that can be allocated to compute - the AO and their derivatives - dx : float - Step (in Bohr) to take the numerical derivatives - progress : bool - if print a progress bar + mol (pyscf.gto.Mole): pyscf Mole object. + dm (numpy ndarray, optional): 2D density matrix in AO basis. Conflicts with c. + c (numpy ndarray, optional): 1D density fitting coefficients. Conflicts with dm. + eps (float): Density threshold for DORI. Defaults to 1e-4. + alg (str): Algorithm: 'analytical' or 'numerical'. Defaults to 'analytical'. + grid_type (str): Grid type: 'dft' for DFT quadrature grid or 'cube' for uniform grid. Defaults to 'dft'. + grid_level (int): For DFT grid, the grid level (higher = more points). Defaults to 1. + nx (int): For cube grid, number of points in x direction. Defaults to 80. + ny (int): For cube grid, number of points in y direction. Defaults to 80. + nz (int): For cube grid, number of points in z direction. Defaults to 80. + resolution (float): For cube grid, grid spacing in Bohr. Conflicts with nx/ny/nz. + margin (float): For cube grid, extra space around molecule in Bohr. Defaults to BOX_MARGIN. + cubename (str, optional): For cube grid, base filename for output cube files. If None, no files saved. + dx (float): For numerical algorithm, finite difference step in Bohr. Defaults to 1e-4. + mem (float): Maximum memory in GiB for AO evaluation. Defaults to 1. + progress (bool): If True, displays progress bar. Defaults to False. Returns: - Tuple of: - 1D array of (ngrids) --- computed DORI - 1D array of (ngrids) --- electron density - 1D array of (ngrids) --- electron density * sgn(second eigenvalue of d^2rho/dr^2) - if density>=eps else 0 (only with alg='analytical'). - 2D array of (ngrids,3) --- grid coordinates - 1D array of (ngrids) --- grid weights - + tuple: (dori, rho, s2rho, coords, weights) containing: + - dori (numpy ndarray): 1D array of DORI values + - rho (numpy ndarray): 1D array of electron density + - s2rho (numpy ndarray): 1D array of signed density (None if numerical) + - coords (numpy ndarray): 2D array (ngrids, 3) of grid coordinates + - weights (numpy ndarray): 1D array of grid weights + + Note: + When cubename is provided with cube grid, creates three files: + - .dori.cube: DORI values + - .rho.cube: electron density + - .sgnL2rho.cube: signed density (analytical only) """ if grid_type=='dft': diff --git a/qstack/fields/excited.py b/qstack/fields/excited.py index 743c51fe..98e99068 100644 --- a/qstack/fields/excited.py +++ b/qstack/fields/excited.py @@ -2,10 +2,14 @@ from . import moments def get_cis(mf, nstates): - """ + """Performs CIS (Configuration Interaction Singles) calculation using TDA (Tamm-Dancoff Approximation). + + Args: + mf: Mean-field object (RHF, UHF, RKS, or UKS). + nstates (int): Number of excited states to compute. - .. todo:: - Write the complete docstring. + Returns: + TDA object: Converged TDA calculation object with excited state information. """ td = mf.TDA() td.nstates = nstates @@ -15,10 +19,13 @@ def get_cis(mf, nstates): return td def get_cis_tdm(td): - """ + """Extracts transition density matrices from TDA/CIS calculation. + + Args: + td: TDA or CIS calculation object containing excitation amplitudes. - .. todo:: - Write the complete docstring. + Returns: + numpy ndarray: Array of transition density matrices for all computed states. """ return np.sqrt(2.0) * np.array([ xy[0] for xy in td.xy ]) diff --git a/qstack/fields/hf_otpd.py b/qstack/fields/hf_otpd.py index fcaaa95d..58dfb541 100644 --- a/qstack/fields/hf_otpd.py +++ b/qstack/fields/hf_otpd.py @@ -3,17 +3,21 @@ from .dm import make_grid_for_rho def hf_otpd(mol, dm, grid_level = 3, save_otpd = False, return_all = False): - """Computes the uncorrelated on-top pair density on a grid. + """Computes the Hartree-Fock uncorrelated on-top pair density (OTPD) on a grid. + + The on-top pair density is the probability density of finding two electrons + at the same position. For Hartree-Fock, this is computed as (rho/2)^2. Args: mol (pyscf Mole): pyscf Mole object. - dm (numpy ndarray): Density matrix in AO-basis. - grid_level (int): Controls the number of radial and angular points. - save_otpd (bool): If True, saves the input and output in a .npz file. Defaults to False - return_all (bool): If true, returns the uncorrelated on-top pair density on a grid, and the cooresponding pyscf Grid object; if False, returns only the uncorrelated on-top pair density. Defaults to False + dm (numpy ndarray): 2D density matrix in AO-basis. + grid_level (int): DFT grid level controlling number of radial and angular points. Defaults to 3. + save_otpd (bool): If True, saves results to a .npz file. Defaults to False. + return_all (bool): If True, returns both OTPD and grid object; if False, returns only OTPD. Defaults to False. Returns: - A numpy ndarray with the uncorrelated on-top pair density on a grid. If 'return_all' = True, then it also returns the corresponding pyscf Grid object. + numpy ndarray or tuple: If return_all is False, returns 1D array of OTPD values. + If return_all is True, returns tuple of (otpd, grid) where grid is the pyscf Grid object. """ grid = make_grid_for_rho(mol, grid_level) @@ -32,12 +36,18 @@ def hf_otpd(mol, dm, grid_level = 3, save_otpd = False, return_all = False): return hf_otpd def save_OTPD(mol, otpd, grid): - """ Saves the information about an OTPD computation into a .npz file. + """Saves on-top pair density computation results to a NumPy compressed file. + + Creates a .npz file containing the molecular structure, OTPD values, + grid coordinates, and integration weights for later analysis. Args: mol (pyscf Mole): pyscf Mole object. - otpd (numpy ndarray): On-top pair density on a grid. - grid (pyscf Grid): Grid object + otpd (numpy ndarray): 1D array of on-top pair density values on the grid. + grid (pyscf Grid): Grid object containing coordinates and weights. + + Returns: + None: Creates a file named _otpd_data.npz on disk. """ output = ''.join(mol.elements)+"_otpd_data" diff --git a/qstack/fields/hirshfeld.py b/qstack/fields/hirshfeld.py index 5819cdd8..cde98ca0 100644 --- a/qstack/fields/hirshfeld.py +++ b/qstack/fields/hirshfeld.py @@ -4,14 +4,17 @@ def spherical_atoms(elements, atm_bas): - """Get density matrices for spherical atoms. + """Computes density matrices for spherically averaged isolated atoms. + + For each element, creates an isolated atom calculation with appropriate spin + and computes its density matrix using atomic Hartree-Fock initial guess. Args: - elements (list of str): Elements to compute the DM for. - atm_bas (string / pyscf basis dictionary): Basis to use. + elements (list of str or set): Element symbols to compute density matrices for. + atm_bas (str or dict): Basis set name (e.g., 'def2-svp') or pyscf basis dictionary. Returns: - A dict of numpy 2d ndarrays which contains the atomic density matrices for each element with its name as a key. + dict: Dictionary mapping element symbols (str) to atomic density matrices (numpy 2D ndarrays). """ dm_atoms = {} @@ -21,17 +24,21 @@ def spherical_atoms(elements, atm_bas): return dm_atoms def _hirshfeld_weights(mol_full, grid_coord, atm_dm, atm_bas, dominant): - """ Computes the Hirshfeld weights. + """Computes Hirshfeld partitioning weights for each atom at grid points. + + Hirshfeld partitioning divides the molecular density among atoms based on + their promolecular (free atom) densities. Dominant partitioning assigns + each grid point exclusively to the atom with the highest weight. Args: - mol (pyscf Mole): pyscf Mole object. - grid_coord (numpy ndarray): Coordinates of the grid. - dm_atoms (dict of numpy 2d ndarrays): Atomic density matrices (output of the `spherical_atoms` fn). - atm_bas (string / pyscf basis dictionary): Basis set used to compute dm_atoms. - dominant (bool): Whether to use dominant or classical partitioning. + mol_full (pyscf Mole): Complete molecular pyscf Mole object. + grid_coord (numpy ndarray): 2D array (ngrids, 3) of grid point coordinates in Bohr. + atm_dm (dict): Dictionary mapping element symbols to atomic density matrices from `spherical_atoms`. + atm_bas (str or dict): Basis set name or dictionary used for atomic density matrices. + dominant (bool): If True, uses dominant (all-or-nothing) partitioning; if False, uses standard Hirshfeld weights. Returns: - A numpy ndarray containing the computed Hirshfeld weights. + numpy ndarray: 2D array (natm, ngrids) of partitioning weights for each atom at each grid point. """ # promolecular density @@ -62,21 +69,32 @@ def _hirshfeld_weights(mol_full, grid_coord, atm_dm, atm_bas, dominant): def hirshfeld_charges(mol, cd, dm_atoms=None, atm_bas=None, dominant=True, occupations=False, grid_level=3): - """Fit molecular density onto an atom-centered basis. + """Computes atomic charges or occupations using Hirshfeld partitioning. + + Partitions the molecular electron density among atoms using Hirshfeld weights + based on free atom densities. Can work with either density-fitting coefficients + or full density matrices, and supports both standard and dominant partitioning. Args: - mol (pyscf Mole): pyscf Mole object. - cd (1D or 2D numpy ndarray or list of arrays): Density-fitting coefficients / density matrices. - dm_atoms (dict of numpy 2d ndarrays): Atomic density matrices (output of the `spherical_atoms` fn). - If None, is computed on-the-fly. - atm_bas (string / pyscf basis dictionary): Basis set used to compute dm_atoms. - If None, is taken from mol. - dominant (bool): Whether to use dominant or classical partitioning. - occupations (bool): Whether to return atomic occupations or charges. - grid level (int): Grid level for numerical integration. + mol (pyscf Mole): pyscf Mole object for the molecule. + cd (numpy ndarray or list): Density representation as: + - 1D array: density-fitting coefficients + - 2D array: density matrix in AO basis + - list: multiple densities (returns list of results) + dm_atoms (dict, optional): Pre-computed atomic density matrices from `spherical_atoms`. + If None, computed automatically. Defaults to None. + atm_bas (str or dict, optional): Basis set for atomic density matrices. + If None, uses mol.basis. Defaults to None. + dominant (bool): If True, uses dominant (all-or-nothing) partitioning; + if False, uses standard Hirshfeld weights. Defaults to True. + occupations (bool): If True, returns atomic electron populations; + if False, returns atomic charges (Z - N). Defaults to False. + grid_level (int): DFT grid level for numerical integration. Defaults to 3. Returns: - A numpy 1d ndarray or list of them containing the computed atomic charges or occupations. + numpy ndarray or list: Atomic charges or occupations. + - Single 1D array if cd is a single density + - List of 1D arrays if cd is a list of densities """ def atom_contributions(cd, ao, tot_weights): diff --git a/qstack/fields/moments.py b/qstack/fields/moments.py index 9d793b6d..c8d18d1e 100644 --- a/qstack/fields/moments.py +++ b/qstack/fields/moments.py @@ -19,10 +19,14 @@ def first(mol, rho): def r_dm(mol, dm): - """ + """Computes the electric dipole moment from a density matrix. + + Args: + mol (pyscf Mole): pyscf Mole object. + dm (numpy ndarray): 2D density matrix in AO basis. - .. todo:: - write docstring. + Returns: + numpy ndarray: Electric dipole moment vector (3 components). """ with mol.with_common_orig((0,0,0)): ao_dip = mol.intor_symmetric('int1e_r', comp=3) @@ -30,9 +34,17 @@ def r_dm(mol, dm): return el_dip def r_c(mol, rho): - """ - .. todo:: - Write docstring, and include uncontracted basis in code and verify formulas + """Computes the electric dipole moment from fitting coefficients. + + Args: + mol (pyscf Mole): pyscf Mole object. + rho (numpy ndarray): 1D array of density-fitting coefficients. + + Returns: + numpy ndarray: Electric dipole moment vector (3 components). + + Note: + Currently only supports contracted basis sets. """ r = np.zeros(3) i=0 @@ -69,13 +81,17 @@ def r2_c(rho, mol): = \\int \\hat{r}^{2} \\rho d r Args: - mol (scipy Mole): scipy Mole object. + rho (numpy ndarray): 1D array of density-fitting coefficients. + mol (pyscf Mole): pyscf Mole object. Returns: - The zeroth, first, and second moments of electron density distribution. + tuple: Three values (N, r, r2) representing: + - N (float): Zeroth moment (integrated density). + - r (numpy ndarray): First moment (3-component dipole vector). + - r2 (float): Second moment (mean square radius). - .. todo:: - Include uncontracted basis in code and verify formulas + Note: + Currently only supports contracted basis sets. """ N = 0.0 # <1> zeroth diff --git a/qstack/mathutils/fps.py b/qstack/mathutils/fps.py index ec335be0..2da8d6c5 100644 --- a/qstack/mathutils/fps.py +++ b/qstack/mathutils/fps.py @@ -1,7 +1,19 @@ import numpy as np def do_fps(x, d=0): - # Code from Giulio Imbalzano + """Perform Farthest Point Sampling on a set of points. + + Code from Giulio Imbalzano. + + Args: + x (numpy.ndarray): 2D array of points, shape (n_points, n_features). + d (int): Number of points to sample. If 0, samples all points. Defaults to 0. + + Returns: + tuple: A tuple containing: + - iy (numpy.ndarray): Indices of sampled points. + - measure (numpy.ndarray): Distances to nearest selected point for each iteration. + """ n = len(x) if d==0: d = n diff --git a/qstack/mathutils/matrix.py b/qstack/mathutils/matrix.py index d8bca2ed..e3db14ca 100644 --- a/qstack/mathutils/matrix.py +++ b/qstack/mathutils/matrix.py @@ -4,10 +4,10 @@ def from_tril(mat_tril): """Restore a symmetric matrix from its lower-triangular form. Args: - mat_tril (numpy 1darray): matrix in a lower-triangular form. + mat_tril (numpy.ndarray): 1D array containing matrix in lower-triangular form. Returns: - A numpy 2darray containing the matrix. + numpy.ndarray: 2D symmetric matrix. """ n = int((np.sqrt(1+8*len(mat_tril))-1)/2) ind = np.tril_indices(n) @@ -17,6 +17,15 @@ def from_tril(mat_tril): return mat def sqrtm(m, eps=1e-13): + """Compute the matrix square root of a symmetric matrix. + + Args: + m (numpy.ndarray): Symmetric matrix. + eps (float): Threshold for eigenvalues to be considered zero. Defaults to 1e-13. + + Returns: + numpy.ndarray: Symmetrized square root of the matrix. + """ e, b = np.linalg.eigh(m) e[abs(e) < eps] = 0.0 sm = b @ np.diag(np.sqrt(e)) @ b.T diff --git a/qstack/mathutils/wigner.py b/qstack/mathutils/wigner.py index ef7da4e1..5a269da8 100755 --- a/qstack/mathutils/wigner.py +++ b/qstack/mathutils/wigner.py @@ -15,81 +15,130 @@ zx,zy,zz = symbols('zx zy zz') def real_Y_correct_phase(l, m, theta, phi): - # returns real spherical harmonic in Condon--Shortley phase convention - # (sympy's Znm uses some other convention) - ym1 = Ynm (l, -abs(m), theta, phi) - ym2 = Ynm_c(l, -abs(m), theta, phi) - if m==0: - return ym1 - elif m<0: - return sp.I / sp.sqrt(2) * (ym1 - ym2) - elif m>0: - return 1 / sp.sqrt(2) * (ym1 + ym2) + """Returns real spherical harmonic in Condon-Shortley phase convention. + + Note: sympy's Ynm uses a different convention. + + Args: + l (int): Orbital angular momentum quantum number. + m (int): Magnetic quantum number. + theta (sympy.Symbol): Polar angle. + phi (sympy.Symbol): Azimuthal angle. + + Returns: + sympy.Expr: Real spherical harmonic expression. + """ + ym1 = Ynm (l, -abs(m), theta, phi) + ym2 = Ynm_c(l, -abs(m), theta, phi) + if m==0: + return ym1 + elif m<0: + return sp.I / sp.sqrt(2) * (ym1 - ym2) + elif m>0: + return 1 / sp.sqrt(2) * (ym1 + ym2) def get_polynom_Y(l, m): - # rewrites a real spherical harmonic as a polynom of x,y,z - theta = Symbol("theta", real=True) - phi = Symbol("phi", real=True) - r = Symbol('r', nonnegative=True) - expr = real_Y_correct_phase(l,m, theta, phi) * r**l - expr = expand(expr, func=True) - expr = expr.rewrite(sp.cos)#.simplify().trigsimp() - expr = expand_trig(expr) - expr = cancel(expr) - expr = expr.subs({r: sp.sqrt(x*x+y*y+z*z), phi: sp.atan2(y,x), theta: sp.atan2(sp.sqrt(x*x+y*y),z)}) - if m!=0: - expr = cancel(expr).simplify() - expr = expr.subs({x*x+y*y: 1-z*z, - 3*x*x+3*y*y : 3-3*z*z }) - return expr + """Rewrites a real spherical harmonic as a polynomial of x, y, z. + + Args: + l (int): Orbital angular momentum quantum number. + m (int): Magnetic quantum number. + + Returns: + sympy.Expr: Polynomial expression in Cartesian coordinates. + """ + theta = Symbol("theta", real=True) + phi = Symbol("phi", real=True) + r = Symbol('r', nonnegative=True) + expr = real_Y_correct_phase(l,m, theta, phi) * r**l + expr = expand(expr, func=True) + expr = expr.rewrite(sp.cos)#.simplify().trigsimp() + expr = expand_trig(expr) + expr = cancel(expr) + expr = expr.subs({r: sp.sqrt(x*x+y*y+z*z), phi: sp.atan2(y,x), theta: sp.atan2(sp.sqrt(x*x+y*y),z)}) + if m!=0: + expr = cancel(expr).simplify() + expr = expr.subs({x*x+y*y: 1-z*z, + 3*x*x+3*y*y : 3-3*z*z }) + return expr def xyzint_wrapper(knm, integrals_xyz_dict): - k,n,m = knm - if k%2 or n%2 or m%2: - return 0 - else: - knm = tuple(sorted([k//2, n//2, m//2], reverse=True)) - if knm not in integrals_xyz_dict: - integrals_xyz_dict[knm] = xyzint(*knm) - return integrals_xyz_dict[knm] + """Wrapper for xyz integrals with caching. + + Args: + knm (tuple): Tuple of three integers (k, n, m) representing powers. + integrals_xyz_dict (dict): Cache dictionary for computed integrals. + + Returns: + float or sympy.Expr: Integral value, or 0 if any power is odd. + """ + k,n,m = knm + if k%2 or n%2 or m%2: + return 0 + else: + knm = tuple(sorted([k//2, n//2, m//2], reverse=True)) + if knm not in integrals_xyz_dict: + integrals_xyz_dict[knm] = xyzint(*knm) + return integrals_xyz_dict[knm] def product_Y(Y1,Y2): - # computes the product of two spherical harmonics - # and returns coefficients and a list of powers - prod = Y1 * Y2 - prod = prod.expand().cancel() - prod = poly(prod, gens=[x,y,z]) - return Matrix(prod.coeffs()), prod.monoms() + """Computes the product of two spherical harmonics. + + Args: + Y1 (sympy.Expr): First spherical harmonic polynomial. + Y2 (sympy.Expr): Second spherical harmonic polynomial. + + Returns: + tuple: A tuple containing: + - coefficients (sympy.Matrix): Coefficients of the product. + - monomials (list): List of monomial powers. + """ + prod = Y1 * Y2 + prod = prod.expand().cancel() + prod = poly(prod, gens=[x,y,z]) + return Matrix(prod.coeffs()), prod.monoms() def print_wigner(D): - for l,d in enumerate(D): - for m1 in range(-l,l+1): - for m2 in range(-l,l+1): - print(f'D[{l}][{m1: d},{m2: d}] = {d[m1,m2]}') - print() + """Print Wigner D matrices in formatted output. -def compute_wigner(lmax): + Args: + D (list): List of Wigner D matrices for each l value. + """ + for l,d in enumerate(D): + for m1 in range(-l,l+1): + for m2 in range(-l,l+1): + print(f'D[{l}][{m1: d},{m2: d}] = {d[m1,m2]}') + print() - Y = [ [0]*(2*l+1) for l in range(lmax+1)] - Y_rot = [ [0]*(2*l+1) for l in range(lmax+1)] - for l in range(lmax+1): - for m in range(-l,l+1): - # spherical harmonic - Y[l][m] = get_polynom_Y(l, m) - # rotated spherical harmonic - Y_rot[l][m] = Y[l][m].subs({x: x1, y:y1, z:z1}).subs({x1:xx*x+xy*y+xz*z, y1:yx*x+yy*y+yz*z, z1:zx*x+zy*y+zz*z}) - - - D = [zeros(2*l+1,2*l+1) for l in range(lmax+1)] - integrals_xyz_dict = {} - for l in range(lmax+1): - for m1 in range(-l,l+1): - for m2 in range(-l,l+1): - coefs, pows = product_Y(Y[l][m2], Y_rot[l][m1]) - mono_integrals = [xyzint_wrapper(p,integrals_xyz_dict) for p in pows] - D[l][m1,m2] = coefs.dot(mono_integrals).factor() .subs({zx**2+zy**2: 1-zz**2, xx**2+xy**2:1-xz**2, yx**2+yy**2:1-yz**2}).simplify() - return D +def compute_wigner(lmax): + """Compute Wigner D matrices up to a maximum angular momentum. + + Args: + lmax (int): Maximum angular momentum quantum number. + + Returns: + list: List of Wigner D matrices (sympy.Matrix) for each l from 0 to lmax. + """ + Y = [ [0]*(2*l+1) for l in range(lmax+1)] + Y_rot = [ [0]*(2*l+1) for l in range(lmax+1)] + for l in range(lmax+1): + for m in range(-l,l+1): + # spherical harmonic + Y[l][m] = get_polynom_Y(l, m) + # rotated spherical harmonic + Y_rot[l][m] = Y[l][m].subs({x: x1, y:y1, z:z1}).subs({x1:xx*x+xy*y+xz*z, y1:yx*x+yy*y+yz*z, z1:zx*x+zy*y+zz*z}) + + + D = [zeros(2*l+1,2*l+1) for l in range(lmax+1)] + integrals_xyz_dict = {} + for l in range(lmax+1): + for m1 in range(-l,l+1): + for m2 in range(-l,l+1): + coefs, pows = product_Y(Y[l][m2], Y_rot[l][m1]) + mono_integrals = [xyzint_wrapper(p,integrals_xyz_dict) for p in pows] + D[l][m1,m2] = coefs.dot(mono_integrals).factor() .subs({zx**2+zy**2: 1-zz**2, xx**2+xy**2:1-xz**2, yx**2+yy**2:1-yz**2}).simplify() + return D if __name__ == "__main__": diff --git a/qstack/orcaio.py b/qstack/orcaio.py index 27814ee5..ded39064 100644 --- a/qstack/orcaio.py +++ b/qstack/orcaio.py @@ -7,19 +7,18 @@ def read_input(fname, basis, ecp=None): - """Read the structure from an Orca input (XYZ coordinates in simple format only) + """Read the structure from an Orca input (XYZ coordinates in simple format only). - Note: we do not read basis set info from the file. - TODO: read also %coords block? + Note: We do not read basis set info from the file. + TODO: Read also %coords block? Args: - fname (str) : path to file - basis (str/dict) : basis name, path to file, or dict in the pyscf format - Kwargs: - ecp (str) : ECP to use + fname (str): Path to Orca input file. + basis (str or dict): Basis name, path to file, or dict in the pyscf format. + ecp (str): Effective core potential to use. Defaults to None. Returns: - pyscf Mole object. + pyscf.gto.Mole: pyscf Mole object. """ with open(fname) as f: @@ -52,17 +51,19 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re Tested on Orca versions 4.0, 4.2, and 5.0. Args: - mol (pyscf Mole): pyscf Mole object. + mol (pyscf.gto.Mole): pyscf Mole object. basename (str): Job name (without extension). - Kwargs: - directory (str) : path to the directory with the density files. - version (int): ORCA version (400 for 4.0, 421 for 4.2, 500 for 5.0). - openshell (bool): If read spin density in addition to the electron density. - reorder_dest (str): Which AO ordering convention to use. + directory (str): Path to the directory with the density files. Defaults to './'. + version (int): ORCA version (400 for 4.0, 421 for 4.2, 500 for 5.0). Defaults to 500. + openshell (bool): Whether to read spin density in addition to electron density. Defaults to False. + reorder_dest (str): Which AO ordering convention to use. Defaults to 'pyscf'. Returns: - A numpy 2darray containing the density matrix (openshell=False) - or a numpy 3darray containing the density and spin density matrices (openshell=True). + numpy.ndarray: 2D array containing density matrix (openshell=False) or + 3D array containing density and spin density matrices (openshell=True). + + Raises: + RuntimeError: If density matrix reordering is compromised for def2 basis with 3d elements. """ path = directory+'/'+basename @@ -104,20 +105,23 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re def _parse_gbw(fname): - """ Parse ORCA .gbw files. + """Parse ORCA .gbw files. Many thanks to https://pysisyphus.readthedocs.io/en/latest/_modules/pysisyphus/calculators/ORCA.html Args: - fname (str): path to the gbw file. + fname (str): Path to the gbw file. Returns: - numpy 3darray of (s,nao,nao) containing the density matrix - numpy 2darray of (s,nao) containing the MO energies - numpy 2darray of (s,nao) containing the MO occupation numbers - dict of {int : [int]} with a list of basis functions angular momenta - for each atom (not for element!) + tuple: A tuple containing: + - coefficients_ab (numpy.ndarray): 3D array of shape (s,nao,nao) with MO coefficients. + - energies_ab (numpy.ndarray): 2D array of shape (s,nao) with MO energies. + - occupations_ab (numpy.ndarray): 2D array of shape (s,nao) with MO occupation numbers. + - ls (dict): Dictionary mapping atom index to list of basis function angular momenta. + + Raises: + RuntimeError: If number of MO sets is not 1 or 2. """ def read_array(f, n, dtype): @@ -177,18 +181,20 @@ def read_basis(MAX_PRIMITIVES=37): def _get_indices(mol, ls_from_orca): - """ Get coefficient needed to reorder the AO read from Orca. + """Get coefficients needed to reorder the AO read from Orca. Args: - mol (pyscf Mole): pyscf Mole object. - ls_from_orca : dict of {int : [int]} with a list of basis functions - angular momenta for those atoms (not elements!) - whose basis functions are *not* sorted wrt to angular momenta. - The lists represent the Orca order. + mol (pyscf.gto.Mole): pyscf Mole object. + ls_from_orca (dict): Dictionary mapping atom index to list of basis function angular momenta + for atoms whose basis functions are NOT sorted wrt angular momenta. + The lists represent the Orca order. Returns: - numpy int 1darray of (nao,) containing the indices to be used as - c_reordered = c_orca[indices] + numpy.ndarray: 1D integer array of shape (nao,) containing reordering indices. + Use as: c_reordered = c_orca[indices] + + Raises: + RuntimeError: If AO reordering fails. """ if ls_from_orca is None: return None @@ -210,18 +216,14 @@ def _get_indices(mol, ls_from_orca): def reorder_coeff_inplace(c_full, mol, reorder_dest='pyscf', ls_from_orca=None): - """ Reorder coefficient read from ORCA .gbw + """Reorder coefficients read from ORCA .gbw in-place. Args: - c_full : numpy 3darray of (s,nao,nao) containing the MO coefficients - to reorder - mol (pyscf Mole): pyscf Mole object. - Kwargs: - reorder_dest (str): Which AO ordering convention to use. - ls_from_orca : dict of {int : [int]} with a list of basis functions - angular momenta for those atoms (not elements!) - whose basis functions are *not* sorted wrt to angular momenta. - The lists represent the Orca order. + c_full (numpy.ndarray): 3D array of shape (s,nao,nao) containing MO coefficients to reorder. + mol (pyscf.gto.Mole): pyscf Mole object. + reorder_dest (str): Which AO ordering convention to use. Defaults to 'pyscf'. + ls_from_orca (dict): Dictionary mapping atom index to list of basis function angular momenta + for atoms whose basis functions are NOT sorted wrt angular momenta. Defaults to None. """ def _reorder_coeff(c): # In ORCA, at least def2-SVP and def2-TZVP for 3d metals @@ -242,19 +244,22 @@ def read_gbw(mol, fname, reorder_dest='pyscf', sort_l=True): Limited for Orca version 4.0 (cannot read the basis set). Args: - mol (pyscf Mole): pyscf Mole object. - fname (str): path to the gbw file. - Kwargs: - reorder_dest (str): Which AO ordering convention to use. - sort_l (bool): if sort the basis functions wrt angular momenta. - e.g. PySCF requires them sorted. + mol (pyscf.gto.Mole): pyscf Mole object. + fname (str): Path to the gbw file. + reorder_dest (str): Which AO ordering convention to use. Defaults to 'pyscf'. + sort_l (bool): Whether to sort basis functions wrt angular momenta. + PySCF requires them sorted. Defaults to True. Returns: - numpy 3darray of (s,nao,nao) containing the MO coefficients - numpy 2darray of (s,nao) containing the MO energies - numpy 2darray of (s,nao) containing the MO occupation numbers - s is 1 for closed-shell and 2 for open-shell computation. - nao is number of atomic/molecular orbitals. + tuple: A tuple containing: + - c (numpy.ndarray): 3D array of shape (s,nao,nao) with MO coefficients. + - e (numpy.ndarray): 2D array of shape (s,nao) with MO energies. + - occ (numpy.ndarray): 2D array of shape (s,nao) with MO occupation numbers. + Where s is 1 for closed-shell and 2 for open-shell computation, + and nao is the number of atomic/molecular orbitals. + + Raises: + RuntimeError: If basis set information not found and sort_l=True. """ c, e, occ, ls = _parse_gbw(fname) if not ls and sort_l: diff --git a/qstack/qml/b2r2.py b/qstack/qml/b2r2.py index 0d5e1763..0fa954f0 100644 --- a/qstack/qml/b2r2.py +++ b/qstack/qml/b2r2.py @@ -8,6 +8,14 @@ def get_bags(unique_ncharges): + """Generates all unique element pair combinations including self-interactions. + + Args: + unique_ncharges (array-like): Array of unique atomic charges/numbers. + + Returns: + list: List of all unique element pairs [Z_i, Z_j] including self-interactions. + """ combs = list(itertools.combinations(unique_ncharges, r=2)) combs = [list(x) for x in combs] # add self interaction @@ -17,12 +25,29 @@ def get_bags(unique_ncharges): def get_mu_sigma(R): + """Computes Gaussian distribution parameters from interatomic distance. + + Args: + R (float): Interatomic distance. + + Returns: + tuple: Mean (mu) and standard deviation (sigma) for the Gaussian distribution. + """ mu = R * 0.5 sigma = R * 0.125 return mu, sigma def get_gaussian(x, R): + """Computes Gaussian function values for a given interatomic distance. + + Args: + x (numpy ndarray): Grid points to evaluate the Gaussian. + R (float): Interatomic distance determining the Gaussian parameters. + + Returns: + numpy ndarray: Gaussian function values at the grid points. + """ mu, sigma = get_mu_sigma(R) X = (x-mu) / (sigma*np.sqrt(2)) g = np.exp(-X**2) / (np.sqrt(2*np.pi) * sigma) @@ -30,6 +55,17 @@ def get_gaussian(x, R): def get_skew_gaussian_l_both(x, R, Z_I, Z_J): + """Computes skewed Gaussian distributions for local B2R2 representation. + + Args: + x (numpy ndarray): Grid points to evaluate the functions. + R (float): Interatomic distance. + Z_I (int): Atomic number of atom I. + Z_J (int): Atomic number of atom J. + + Returns: + tuple: Two skewed Gaussian distributions (a, b) for the atom pair. + """ mu, sigma = get_mu_sigma(R) # a = Z_J * scipy.stats.skewnorm.pdf(x, Z_J, mu, sigma) # b = Z_I * scipy.stats.skewnorm.pdf(x, Z_I, mu, sigma) @@ -45,6 +81,17 @@ def get_skew_gaussian_l_both(x, R, Z_I, Z_J): def get_skew_gaussian_n_both(x, R, Z_I, Z_J): + """Computes combined skewed Gaussian distribution for nuclear B2R2 representation. + + Args: + x (numpy ndarray): Grid points to evaluate the function. + R (float): Interatomic distance. + Z_I (int): Atomic number of atom I. + Z_J (int): Atomic number of atom J. + + Returns: + numpy ndarray: Combined skewed Gaussian distribution for the atom pair. + """ mu, sigma = get_mu_sigma(R) # a = Z_I * scipy.stats.skewnorm.pdf(x, Z_J, mu, sigma) # b = Z_J * scipy.stats.skewnorm.pdf(x, Z_I, mu, sigma) diff --git a/qstack/qml/slatm.py b/qstack/qml/slatm.py index 11997fd4..9bef6b2e 100644 --- a/qstack/qml/slatm.py +++ b/qstack/qml/slatm.py @@ -7,6 +7,18 @@ def get_mbtypes(qs, qml=False): + """Generates many-body types (elements, pairs, triples) for SLATM representation. + + Args: + qs (list): List of atomic number arrays for all molecules. + qml (bool): If True, uses set ordering (QML-compatible). If False, uses sorted ordering. Defaults to False. + + Returns: + dict: Dictionary with keys 1, 2, 3 containing: + - 1: Array of unique elements + - 2: List of element pairs (including self-pairs) + - 3: List of valid element triples + """ # all the elements elements = itertools.chain.from_iterable(list(i) for i in qs) @@ -33,6 +45,14 @@ def get_mbtypes(qs, qml=False): def pad_zeros(slatm): + """Pads SLATM representations with zeros to have uniform size. + + Args: + slatm (list): List of SLATM representation arrays with potentially different sizes. + + Returns: + list: List of zero-padded SLATM arrays with uniform size. + """ n_features = np.array([x.shape[-1] for x in slatm]) pad_sizes = max(n_features)-n_features for i in range(len(slatm)): diff --git a/qstack/regression/condition.py b/qstack/regression/condition.py index 71d80240..0ce74d0b 100644 --- a/qstack/regression/condition.py +++ b/qstack/regression/condition.py @@ -52,6 +52,7 @@ def condition(X, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, def main(): + """Command-line entry point for computing kernel matrix condition numbers.""" parser = RegressionParser(description='This program computes the condition number for the kernel matrix.', hyperparameters_set='single') parser.remove_argument('prop') parser.remove_argument('train_size') diff --git a/qstack/regression/cross_validate_results.py b/qstack/regression/cross_validate_results.py index 3c77150e..b11b4288 100644 --- a/qstack/regression/cross_validate_results.py +++ b/qstack/regression/cross_validate_results.py @@ -85,6 +85,7 @@ def cv_results(X, y, def main(): + """Command-line entry point for full cross-validation with hyperparameter search.""" parser = RegressionParser(description='This program runs a full cross-validation of the learning curves (hyperparameters search included).', hyperparameters_set='array') parser.remove_argument('random_state') parser.add_argument('--n', type=int, dest='n_rep', default=defaults.n_rep, help='the number of repetition for each point') diff --git a/qstack/regression/final_error.py b/qstack/regression/final_error.py index 246167c7..68316be0 100644 --- a/qstack/regression/final_error.py +++ b/qstack/regression/final_error.py @@ -71,6 +71,7 @@ def final_error(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, def main(): + """Command-line entry point for computing final prediction errors.""" parser = RegressionParser(description='This program computes the full-training error for each molecule.', hyperparameters_set='single') parser.remove_argument('train_size') parser.add_argument('--save-alpha', type=str, dest='save_alpha', default=None, help='file to write the regression coefficients to') diff --git a/qstack/regression/global_kernels.py b/qstack/regression/global_kernels.py index c94339ee..2ae4cebb 100644 --- a/qstack/regression/global_kernels.py +++ b/qstack/regression/global_kernels.py @@ -5,10 +5,18 @@ def get_global_K(X, Y, sigma, local_kernel, global_kernel, options): - """ - - .. todo:: - Write the docstring + """Computes global kernel matrix between two sets of molecular representations. + + Args: + X (list): List of molecular representations (first set). + Y (list): List of molecular representations (second set). + sigma (float): Kernel width parameter. + local_kernel (callable): Local kernel function for atomic environments. + global_kernel (callable): Global kernel function for combining local kernels. + options (dict): Dictionary of kernel options (normalize, verbose, etc.). + + Returns: + numpy ndarray: Global kernel matrix of shape (len(X), len(Y)). """ self = (Y is X) verbose = options.get('verbose', 0) @@ -54,10 +62,19 @@ def get_global_K(X, Y, sigma, local_kernel, global_kernel, options): def get_covariance(mol1, mol2, species, max_atoms, max_size, kernel, sigma=None): - """ - - .. todo:: - Write the docstring + """Computes the covariance matrix between two molecules using local kernels. + + Args: + mol1 (dict): First molecule represented as dictionary of atomic environments by species. + mol2 (dict): Second molecule represented as dictionary of atomic environments by species. + species (numpy ndarray): Array of unique atomic species present. + max_atoms (dict): Maximum number of atoms per species across all molecules. + max_size (int): Total size of the padded covariance matrix. + kernel (callable): Local kernel function. + sigma (float, optional): Kernel width parameter. Defaults to None. + + Returns: + numpy ndarray: Covariance matrix of shape (max_size, max_size). """ K_covar = np.zeros((max_size, max_size)) idx = 0 @@ -76,10 +93,16 @@ def get_covariance(mol1, mol2, species, max_atoms, max_size, kernel, sigma=None) def normalize_kernel(kernel, self_x=None, self_y=None, verbose=0): - """ + """Normalizes a kernel matrix using self-kernel values. + + Args: + kernel (numpy ndarray): Kernel matrix to normalize. + self_x (numpy ndarray, optional): Self-kernel values for X. If None, extracted from diagonal. Defaults to None. + self_y (numpy ndarray, optional): Self-kernel values for Y. If None, extracted from diagonal. Defaults to None. + verbose (int): Verbosity level. Defaults to 0. - .. todo:: - Write the docstring + Returns: + numpy ndarray: Normalized kernel matrix. """ if verbose: print("Normalizing kernel.") @@ -91,10 +114,14 @@ def normalize_kernel(kernel, self_x=None, self_y=None, verbose=0): def mol_to_dict(mol, species): - """ + """Converts molecular representation to a dictionary organized by atomic species. + + Args: + mol (numpy ndarray): Molecular representation where each row is [atomic_number, features...]. + species (numpy ndarray): Array of unique atomic species. - .. todo:: - Write the docstring + Returns: + dict: Dictionary mapping atomic numbers to arrays of atomic feature vectors. """ mol_dict = {q:[] for q in species} @@ -106,23 +133,41 @@ def mol_to_dict(mol, species): def sumsq(x): + """Computes sum of squares (dot product with itself). + + Args: + x (numpy ndarray): Input vector. + + Returns: + float: Sum of squared elements. + """ return x@x def avg_kernel(kernel, _options): - """ + """Computes the average kernel value. - .. todo:: - Write the docstring + Args: + kernel (numpy ndarray): Kernel matrix. + _options (dict): Options dictionary (unused). + + Returns: + float: Average of all kernel matrix elements. """ return np.sum(kernel) / math.prod(kernel.shape) def rematch_kernel(kernel, options): - """ + """Computes the REMatch (Regularized Entropy Match) kernel. + + Uses Sinkhorn algorithm to compute optimal transport-based kernel similarity. + + Args: + kernel (numpy ndarray): Local kernel matrix. + options (dict): Options dictionary containing 'alpha' parameter for regularization. - .. todo:: - Write the docstring + Returns: + float: REMatch kernel value. """ alpha = options['alpha'] thresh = 1e-6 diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py index 61dc517f..ec115825 100644 --- a/qstack/regression/hyperparameters.py +++ b/qstack/regression/hyperparameters.py @@ -132,6 +132,7 @@ def hyper_loop(sigma, eta): def main(): + """Command-line entry point for hyperparameter optimization.""" parser = RegressionParser(description='This program finds the optimal hyperparameters.', hyperparameters_set='array') parser.remove_argument("random_state") parser.remove_argument("train_size") diff --git a/qstack/regression/kernel.py b/qstack/regression/kernel.py index 7be898dd..c58c0438 100644 --- a/qstack/regression/kernel.py +++ b/qstack/regression/kernel.py @@ -27,6 +27,7 @@ def kernel(X, Y=None, sigma=defaults.sigma, akernel=defaults.kernel, gkernel=def def main(): + """Command-line entry point for computing kernel matrices.""" parser = RegressionParser(description='This program computes kernel.', hyperparameters_set='single') parser.remove_argument('prop') parser.remove_argument('test_size') diff --git a/qstack/regression/kernel_utils.py b/qstack/regression/kernel_utils.py index 2e24817c..42ff4a72 100644 --- a/qstack/regression/kernel_utils.py +++ b/qstack/regression/kernel_utils.py @@ -41,16 +41,22 @@ def __call__(self, _parser, namespace, values, _option_string=None): def get_local_kernel(arg): - """ Obtains a local-envronment kernel by name. + """Obtains a local-environment kernel function by name. Args: - arg (str): the name of the kernel, in [''] # TODO + arg (str): Kernel name. Available options include: + - 'G': Gaussian (RBF) kernel + - 'L': Laplacian kernel + - 'dot': Linear (dot product) kernel + - 'cosine': Cosine similarity kernel + - Implementation-specific variants: 'G_sklearn', 'G_custom_c', 'L_sklearn', 'L_custom_c', 'L_custom_py' Returns: - kernel (Callable[np.ndarray,np.ndarray,float -> np.ndarray]): the actual kernel function, to call as ``K = kernel(X,Y,gamma)`` + callable: Kernel function with signature kernel(X, Y, gamma) -> numpy.ndarray. - .. todo:: - Write the docstring + Raises: + NotImplementedError: If the specified kernel is not implemented. + RuntimeError: If the kernel implementation is not available (e.g., C library missing). """ if arg not in local_kernels_dict: raise NotImplementedError(f'{arg} kernel is not implemented') @@ -62,10 +68,17 @@ def get_local_kernel(arg): def get_global_kernel(arg, local_kernel): - """ + """Creates a global kernel function from a local kernel. + + Args: + arg (tuple): Tuple of (gkernel_name, options_dict). + local_kernel (callable): Local kernel function. - .. todo:: - Write the docstring + Returns: + callable: Global kernel function that combines local kernels. + + Raises: + NotImplementedError: If the specified global kernel is not implemented. """ gkernel, options = arg @@ -76,10 +89,14 @@ def get_global_kernel(arg, local_kernel): def get_kernel(arg, arg2=None): - """ Returns the kernel function depending on the cli argument + """Returns the appropriate kernel function based on arguments. + + Args: + arg (str): Local kernel name. + arg2 (tuple, optional): If provided, tuple of (global_kernel_name, options) for global kernel. Defaults to None. - .. todo:: - Write the docstring + Returns: + callable: Kernel function (local or global). """ local_kernel = get_local_kernel(arg) diff --git a/qstack/regression/local_kernels.py b/qstack/regression/local_kernels.py index b2d177c2..9ab85075 100644 --- a/qstack/regression/local_kernels.py +++ b/qstack/regression/local_kernels.py @@ -6,10 +6,20 @@ def custom_laplacian_kernel(X, Y, gamma): - """ Compute Laplacian kernel between X and Y + """Computes Laplacian kernel between X and Y using Python implementation. - .. todo:: - Write the docstring + K(x, y) = exp(-gamma * ||x - y||_1) + + Args: + X (numpy ndarray): First set of samples (can be multi-dimensional). + Y (numpy ndarray): Second set of samples. + gamma (float): Kernel width parameter. + + Returns: + numpy ndarray: Laplacian kernel matrix of shape (len(X), len(Y)). + + Raises: + RuntimeError: If X and Y have incompatible shapes. """ if X.shape[1:] != Y.shape[1:]: raise RuntimeError(f"Incompatible shapes {X.shape} and {Y.shape}") @@ -27,10 +37,14 @@ def cdist(X, Y): def custom_C_kernels(kernel_function, return_distance_function=False): - """ + """Creates kernel function wrappers using C implementation for speed. + + Args: + kernel_function (str): Kernel type ('L' for Laplacian, 'G' for Gaussian). + return_distance_function (bool): If True, returns distance function instead of kernel. Defaults to False. - .. todo:: - Write the docstring + Returns: + callable or None: Kernel or distance function, or None if C library cannot be loaded. """ import ctypes import sysconfig @@ -79,10 +93,32 @@ def kernel_func_c(X, Y, gamma): def dot_kernel_wrapper(x, y, *_kargs, **_kwargs): + """Wrapper for linear (dot product) kernel. + + Args: + x (numpy ndarray): First set of samples. + y (numpy ndarray): Second set of samples. + *_kargs: Unused positional arguments (for compatibility). + **_kwargs: Unused keyword arguments (for compatibility). + + Returns: + numpy ndarray: Linear kernel matrix. + """ return _SKLEARN_PAIRWISE.linear_kernel(x, y) def cosine_similarity_wrapper(x, y, *_kargs, **_kwargs): + """Wrapper for cosine similarity kernel. + + Args: + x (numpy ndarray): First set of samples. + y (numpy ndarray): Second set of samples. + *_kargs: Unused positional arguments (for compatibility). + **_kwargs: Unused keyword arguments (for compatibility). + + Returns: + numpy ndarray: Cosine similarity matrix. + """ return _SKLEARN_PAIRWISE.cosine_similarity(x, y) diff --git a/qstack/regression/oos.py b/qstack/regression/oos.py index 7083b414..1341a049 100644 --- a/qstack/regression/oos.py +++ b/qstack/regression/oos.py @@ -43,6 +43,7 @@ def oos(X, X_oos, alpha, sigma=defaults.sigma, def main(): + """Command-line entry point for out-of-sample predictions.""" parser = RegressionParser(description='This program makes prediction for OOS.', hyperparameters_set='single') parser.remove_argument('prop') parser.remove_argument('train_size') diff --git a/qstack/regression/parser.py b/qstack/regression/parser.py index e46fc3cf..9fdc5cb5 100644 --- a/qstack/regression/parser.py +++ b/qstack/regression/parser.py @@ -2,6 +2,46 @@ from .kernel_utils import defaults, ParseKwargs, local_kernels_dict, global_kernels_dict class RegressionParser(argparse.ArgumentParser): + """Custom argument parser for kernel ridge regression tasks. + + Provides pre-configured argument sets for machine learning workflows with + molecular representations. Supports single hyperparameter evaluation and + hyperparameter optimization via grid search. + + Args: + hyperparameters_set (str, optional): Hyperparameter mode. Options: + - None: No hyperparameter arguments added + - 'single': Single eta/sigma values for direct regression + - 'array': Multiple eta/sigma values for grid search/cross-validation + Defaults to None. + **kwargs: Additional arguments passed to ArgumentParser. + + Attributes: + Standard arguments added for all modes: + - x (--x): Path to molecular representations file + - y (--y): Path to target properties file + - akernel (--akernel): Local/atomic kernel type (Gaussian, Laplacian, etc.) + - gkernel (--gkernel): Global/molecular kernel type (avg, REMatch) + - gdict (--gdict): Global kernel parameters dictionary + - test (--test): Test set fraction (0.0-1.0) + - train (--train): Training set size(s) + - ll (--ll): Thread correction flag + - readkernel (--readkernel): Flag if input is pre-computed kernel + - sparse (--sparse): Sparse learning basis size + - random_state (--random_state): Random seed for reproducibility + + Additional for 'single' mode: + - eta (--eta): Single regularization parameter + - sigma (--sigma): Single kernel width parameter + + Additional for 'array' mode: + - eta (--eta): Array of regularization parameters + - sigma (--sigma): Array of kernel width parameters + - splits (--splits): Number of k-fold cross-validation splits + - print (--print): Verbosity level + - ada (--ada): Adaptive sigma flag + - name (--name): Output filename + """ def __init__(self, hyperparameters_set=None, **kwargs): super().__init__( formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -37,6 +77,23 @@ def __init__(self, hyperparameters_set=None, **kwargs): def remove_argument(parser, arg): + """Removes an argument from the parser. + + Utility method for customizing parsers by removing unwanted arguments + from the pre-configured set. Useful when deriving specialized parsers. + + Args: + arg (str): Argument name (with or without dashes, e.g., '--x' or 'x') + or destination name (e.g., 'repr'). + + Returns: + None: Modifies parser in place. + + Example: + >>> parser = RegressionParser(hyperparameters_set='single') + >>> parser.remove_argument('--sparse') + >>> # sparse argument is now removed + """ for action in parser._actions: opts = action.option_strings if (opts and opts[0] == arg) or action.dest == arg: diff --git a/qstack/regression/regression.py b/qstack/regression/regression.py index 24b83f59..f8a12823 100644 --- a/qstack/regression/regression.py +++ b/qstack/regression/regression.py @@ -86,6 +86,7 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, def main(): + """Command-line entry point for computing learning curves.""" parser = RegressionParser(description='This program computes the learning curve.', hyperparameters_set='single') parser.add_argument('--splits', type=int, dest='splits', default=defaults.n_rep, help='number of splits') parser.add_argument('--name', type=str, dest='nameout', default=None, help='the name of the output file containting the LC data (.txt)') diff --git a/qstack/spahm/LB2020guess.py b/qstack/spahm/LB2020guess.py index cf50f083..4f8f1f5b 100644 --- a/qstack/spahm/LB2020guess.py +++ b/qstack/spahm/LB2020guess.py @@ -12,6 +12,14 @@ def __init__(self, fname=None, parameters='HF'): self.get_basis(fname, parameters) def renormalize(self, a): + """Computes renormalization factor for Gaussian basis functions. + + Args: + a (float): Gaussian exponent. + + Returns: + float: Renormalization factor (0.5*a/pi)^(3/4). + """ # 1/norm1 = \int \exp(-a*r^2) d^3 r => norm1 = (a/pi)^(3/2) # 1/norm2^2 = \int (\exp(-a*r^2))^2 d^3 r => norm2 = (2.0*a/pi)^(3/4) # coefficient = norm1 / norm2 = (0.5*a/pi)^(3/4) @@ -19,6 +27,14 @@ def renormalize(self, a): return x*x*x def read_ac(self, fname): + """Reads auxiliary basis parameters from file. + + Args: + fname (str, optional): Path to parameter file. If None, uses default. + + Returns: + dict: Dictionary mapping element symbols to basis function parameters. + """ if fname is None: fname = self.acfile_default with open(fname) as f: @@ -37,6 +53,14 @@ def read_ac(self, fname): return basis def add_caps(self, basis): + """Adds cap (diffuse) functions to the auxiliary basis. + + Args: + basis (dict): Basis set dictionary to modify. + + Returns: + dict: Modified basis set with cap functions added. + """ caps_array = np.zeros(103) caps_array [ 1 : 2 +1] = 1.0 / 3.0 caps_array [ 3 : 4 +1] = 1.0 / 16.0 @@ -60,6 +84,12 @@ def add_caps(self, basis): return basis def get_basis(self, fname, parameters): + """Initializes auxiliary basis set from file or predefined parameters. + + Args: + fname (str, optional): Path to custom parameter file. + parameters (str): Parameter set to use ('HF', 'HFS', or None for custom file). + """ if not parameters: acbasis = self.read_ac(fname) self.add_caps(acbasis) @@ -279,6 +309,14 @@ def get_basis(self, fname, parameters): def use_charge(self, mol): + """Adjusts basis coefficients based on molecular charge. + + Args: + mol (pyscf Mole): pyscf Mole object. + + Returns: + dict: Adjusted auxiliary basis set. + """ if self.parameters == 'HF': acbasis = copy.deepcopy(self.acbasis) factor = 1.0-mol.charge/mol.natm @@ -289,6 +327,15 @@ def use_charge(self, mol): return acbasis def use_ecp(self, mol, acbasis): + """Adjusts basis set to account for effective core potentials (ECP). + + Args: + mol (pyscf Mole): pyscf Mole object with ECP. + acbasis (dict): Auxiliary basis set dictionary. + + Returns: + dict: Adjusted auxiliary basis set accounting for ECP. + """ acbasis = copy.deepcopy(acbasis) q_cleaned = set() for iat, z in enumerate(mol.atom_charges()): @@ -318,6 +365,14 @@ def use_ecp(self, mol, acbasis): return acbasis def get_auxweights(self, auxmol): + """Extracts auxiliary basis weights from auxiliary molecule object. + + Args: + auxmol (pyscf Mole): Auxiliary molecule object. + + Returns: + numpy ndarray: Array of auxiliary basis function weights. + """ w = np.zeros(auxmol.nao) iao = 0 for iat in range(auxmol.natm): @@ -328,9 +383,27 @@ def get_auxweights(self, auxmol): return w def merge_caps(self, w, eri3c): + """Contracts 3-center integrals with auxiliary basis weights. + + Args: + w (numpy ndarray): Auxiliary basis weights. + eri3c (numpy ndarray): 3-center electron repulsion integrals. + + Returns: + numpy ndarray: Contracted integrals. + """ return np.einsum('...i,i->...', eri3c, w) def get_eri3c(self, mol, auxmol): + """Computes 3-center electron repulsion integrals. + + Args: + mol (pyscf Mole): Main molecule object. + auxmol (pyscf Mole): Auxiliary molecule object. + + Returns: + numpy ndarray: 3-center ERIs (ij|P) where i,j are AO indices and P is aux basis index. + """ pmol = mol + auxmol shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) eri3c = pmol.intor('int3c2e_sph', shls_slice=shls_slice) diff --git a/qstack/spahm/compute_spahm.py b/qstack/spahm/compute_spahm.py index 602895cb..2576e46a 100644 --- a/qstack/spahm/compute_spahm.py +++ b/qstack/spahm/compute_spahm.py @@ -4,19 +4,28 @@ def get_guess_orbitals(mol, guess, xc="pbe", field=None, return_ao_dip=False): - """ Compute the guess Hamiltonian orbitals + """Computes molecular orbitals from a guess Hamiltonian with optional electric field. + + Solves the generalized eigenvalue problem for the guess Hamiltonian, optionally + including an external uniform electric field perturbation. Args: mol (pyscf Mole): pyscf Mole object. - guess (func): Method used to compute the guess Hamiltonian. Output of get_guess. - xc (str): Exchange-correlation functional. Defaults to pbe. - field (numpy.array(3)): applied uniform electric field i.e. $\\vec \\nabla \\phi$ in a.u. - return_ao_dip (bool): if return computed AO dipole integrals + guess (callable or str): Guess Hamiltonian method function (from get_guess) or 'huckel'. + xc (str): Exchange-correlation functional name. Defaults to 'pbe'. + field (numpy ndarray, optional): 3-component uniform electric field vector (∇φ) in atomic units. + Defaults to None. + return_ao_dip (bool): If True, also returns AO dipole integrals. Defaults to False. Returns: - 1D numpy array containing the eigenvalues - 2D numpy array containing the eigenvectors of the guess Hamiltonian. - (optional) 2D numpy array with the AO dipole integrals + tuple: Depending on return_ao_dip: + - If False: (e, v) where: + - e (numpy ndarray): 1D array of orbital eigenvalues + - v (numpy ndarray): 2D array (nao, nao) of MO coefficients + - If True: (e, v, ao_dip) where ao_dip is 3D array (3, nao, nao) of AO dipole integrals + + Raises: + NotImplementedError: If field is specified with Hückel guess. """ if guess == 'huckel': if field is not None: @@ -38,14 +47,20 @@ def get_guess_orbitals(mol, guess, xc="pbe", field=None, return_ao_dip=False): def ext_field_generator(mol, field): - """ Generator for Hext (i.e. applied uniform electiric field interaction) gradient + """Generates external electric field Hamiltonian gradient function. + + Creates a function that computes derivatives of the external field interaction + Hamiltonian (H_ext) with respect to nuclear coordinates for each atom. Args: mol (pyscf Mole): pyscf Mole object. - field (numpy.array(3)): applied uniform electric field i.e. $\\vec \\nabla \\phi$ in a.u. + field (numpy ndarray or None): 3-component uniform electric field vector (∇φ) in atomic units. + If None, treated as zero field. Returns: - func(int: iat): returns the derivative of Hext wrt the coordinates of atom iat, i.e. dHext/dr[iat] + callable: Function field_deriv(iat) that takes atom index and returns + 3D array (3, nao, nao) of dH_ext/dr[iat] - external field Hamiltonian + gradient for atom iat. """ shls_slice = (0, mol.nbas, 0, mol.nbas) @@ -65,17 +80,24 @@ def field_deriv(iat): def get_guess_orbitals_grad(mol, guess, field=None): - """ Compute the guess Hamiltonian eigenvalues and their derivatives + """Computes guess Hamiltonian eigenvalues and their nuclear/field gradients. + + Calculates orbital energies and their derivatives with respect to both nuclear + coordinates (for geometry optimization/force calculations) and electric field + (for response properties). Args: mol (pyscf Mole): pyscf Mole object. - guess (func): Tuple of methods used to compute the guess Hamiltonian and its eigenvalue derivatives. Output of get_guess_g - field (numpy.array(3)): applied uniform electric field i.e. $\\vec \\nabla \\phi$ in a.u. + guess (tuple): Pair (hamiltonian_func, gradient_func) from get_guess_g(). + field (numpy ndarray, optional): 3-component uniform electric field (∇φ) in atomic units. + Defaults to None. Returns: - numpy 1d array (mol.nao,): eigenvalues - numpy 3d ndarray (mol.nao,mol.natm,3): gradient of the eigenvalues in Eh/bohr - numpy 2d ndarray (mol.nao,3): derivative of the eigenvalues wrt field in Eh/a.u. + tuple: (e, de_dr, de_dfield) where: + - e (numpy ndarray): 1D array (nao,) of orbital eigenvalues in Eh + - de_dr (numpy ndarray): 3D array (nao, natm, 3) of eigenvalue gradients in Eh/bohr + - de_dfield (numpy ndarray or None): 2D array (nao, 3) of eigenvalue derivatives + w.r.t. electric field in Eh/a.u., or None if field is None """ e, c, ao_dip = get_guess_orbitals(mol, guess[0], field=field, return_ao_dip=True) @@ -94,32 +116,46 @@ def get_guess_orbitals_grad(mol, guess, field=None): def get_guess_dm(mol, guess, xc="pbe", openshell=None, field=None): - """ Compute the density matrix with the guess Hamiltonian. + """Computes density matrix from guess Hamiltonian orbitals. + + Constructs the density matrix by occupying the lowest energy orbitals from + the guess Hamiltonian according to the aufbau principle. Args: mol (pyscf Mole): pyscf Mole object. - guess (func): Method used to compute the guess Hamiltonian. Output of get_guess. - xc (str): Exchange-correlation functional. Defaults to pbe - openshell (bool): . Defaults to None. + guess (callable): Guess Hamiltonian method function from get_guess(). + xc (str): Exchange-correlation functional name. Defaults to 'pbe'. + openshell (bool, optional): If True, forces open-shell treatment even for closed-shell systems. + Defaults to None. + field (numpy ndarray, optional): 3-component uniform electric field (∇φ) in atomic units. + Defaults to None. Returns: - A numpy ndarray containing the density matrix computed using the guess Hamiltonian. + numpy ndarray: Density matrix in AO basis. + - Closed-shell: 2D array (nao, nao) + - Open-shell: 3D array (2, nao, nao) for alpha and beta separately """ _e, v = get_guess_orbitals(mol, guess, xc, field=field) return get_dm(v, mol.nelec, mol.spin if mol.spin>0 or openshell is not None else None) def get_spahm_representation(mol, guess_in, xc="pbe", field=None): - """ Compute the SPAHM representation. + """Computes the SPAHM (Single-Point Approximate Hamiltonian Method) molecular representation. + + The SPAHM representation consists of the occupied orbital energies from a guess + Hamiltonian, providing a fast quantum-mechanical descriptor for machine learning. Args: mol (pyscf Mole): pyscf Mole object. - guess_in (str): Method used to obtain the guess Hamiltoninan. - xc (str): Exchange-correlation functional. Defaults to pbe. - field (numpy.array(3)): applied uniform electric field i.e. $\\vec \\nabla \\phi$ in a.u. + guess_in (str): Guess method name (e.g., 'LB', 'SAD', 'core', 'GWH'). + xc (str): Exchange-correlation functional name. Defaults to 'pbe'. + field (numpy ndarray, optional): 3-component uniform electric field (∇φ) in atomic units. + Defaults to None. Returns: - A numpy ndarray containing the SPAHM representation. + numpy ndarray: SPAHM representation consisting of occupied orbital eigenvalues. + - Closed-shell: 1D array of shape (n_occupied,) in Eh + - Open-shell: 2D array of shape (2, n_alpha/n_beta) for alpha and beta orbitals """ guess = get_guess(guess_in) e, _v = get_guess_orbitals(mol, guess, xc, field=field) @@ -128,17 +164,25 @@ def get_spahm_representation(mol, guess_in, xc="pbe", field=None): def get_spahm_representation_grad(mol, guess_in, field=None): - """ Compute the SPAHM representation and its gradient + """Computes SPAHM representation and its nuclear/field gradients for force/response calculations. + + Calculates the SPAHM descriptor (occupied orbital energies) along with derivatives + needed for molecular dynamics, geometry optimization, and response properties. Args: mol (pyscf Mole): pyscf Mole object. - guess_in (str): Method used to obtain the guess Hamiltoninan. - field (numpy.array(3)): applied uniform electric field i.e. $\\vec \\nabla \\phi$ in a.u. + guess_in (str): Guess method name with gradient support ('core' or 'lb'). + field (numpy ndarray, optional): 3-component uniform electric field (∇φ) in atomic units. + Defaults to None. Returns: - numpy 1d array (occ,): the SPAHM representation (Eh). - numpy 3d array (occ,mol.natm,3): gradient of the representation (Eh/bohr) - numpy 2d array (occ,3): gradient of the representation wrt electric field (Eh/a.u.) + tuple: (spahm, spahm_grad, spahm_field_grad) where: + - spahm (numpy ndarray): SPAHM representation - occupied orbital energies in Eh. + Shape: (n_occ,) for closed-shell or (2, n_alpha/n_beta) for open-shell + - spahm_grad (numpy ndarray): Nuclear gradients of SPAHM in Eh/bohr. + Shape: (n_occ, natm, 3) or (2, n_alpha/n_beta, natm, 3) + - spahm_field_grad (numpy ndarray or None): Electric field gradients in Eh/a.u. + Shape: (n_occ, 3) or (2, n_alpha/n_beta, 3), or None if field is None """ guess = get_guess_g(guess_in) e, agrad, fgrad = get_guess_orbitals_grad(mol, guess, field=field) diff --git a/qstack/spahm/guesses.py b/qstack/spahm/guesses.py index 9e6e2763..29c102d6 100644 --- a/qstack/spahm/guesses.py +++ b/qstack/spahm/guesses.py @@ -6,24 +6,29 @@ def hcore(mol, *_): - """Uses the core potential (kin + nuc + ecp) to compute the guess Hamiltonian. + """Computes guess Hamiltonian from core contributions (kinetic + nuclear + ECP). Args: mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). Returns: - A numpy ndarray containing the computed approximate Hamiltonian. + numpy ndarray: 2D array containing the core Hamiltonian matrix in AO basis. """ return scf.hf.get_hcore(mol) def GWH(mol, *_): - """Uses the generalized Wolfsberg-Helmholtz to compute the guess Hamiltonian. + """Computes guess Hamiltonian using Generalized Wolfsberg-Helmholtz (GWH) method. + + Uses the empirical formula: H_ij = 0.5 * K * (H_ii + H_jj) * S_ij + where K = 1.75 (from J. Chem. Phys. 1952, 20, 837). Args: mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). Returns: - A numpy ndarray containing the computed approximate Hamiltonian. + numpy ndarray: 2D GWH Hamiltonian matrix in AO basis. """ h = hcore(mol) S = mol.intor_symmetric('int1e_ovlp') @@ -38,14 +43,20 @@ def GWH(mol, *_): return h_gwh def SAD(mol, func): - """Uses the superposition of atomic densities to compute the guess Hamiltonian. + """Computes guess Hamiltonian using Superposition of Atomic Densities (SAD). + + Constructs the Fock matrix from atomic Hartree-Fock density matrices + summed together as an initial guess for molecular calculations. Args: mol (pyscf Mole): pyscf Mole object. - func (str): Exchange-correlation functional. + func (str): Exchange-correlation functional name (e.g., 'pbe', 'b3lyp'). Returns: - A numpy ndarray containing the computed approximate Hamiltonian. + numpy ndarray: 2D Fock matrix in AO basis computed from SAD. + + Warns: + RuntimeWarning: If alpha and beta effective potentials differ for the functional. """ hc = hcore(mol) dm = scf.hf.init_guess_by_atom(mol) @@ -62,13 +73,16 @@ def SAD(mol, func): return fock def SAP(mol, *_): - """Uses the superposition of atomic potentials to compute the guess Hamiltonian. + """Computes guess Hamiltonian using Superposition of Atomic Potentials (SAP). + + Constructs initial Hamiltonian from kinetic energy plus summed atomic potentials. Args: mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). Returns: - A numpy ndarray containing the computed approximate Hamiltonian. + numpy ndarray: 2D Hamiltonian matrix (T + V_SAP) in AO basis. """ mf = dft.RKS(mol) vsap = mf.get_vsap() @@ -77,46 +91,67 @@ def SAP(mol, *_): return fock def LB(mol, *_): - """Uses the Laikov-Briling model with HF-based parameters to compute the guess Hamiltonian. + """Computes guess Hamiltonian using Laikov-Briling 2020 model with HF parameters. + + Uses auxiliary basis representation optimized for Hartree-Fock calculations. Args: mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). Returns: - A numpy ndarray containing the computed approximate Hamiltonian. + numpy ndarray: 2D effective Hamiltonian matrix from LB2020 model in AO basis. """ return LB20(parameters='HF').Heff(mol) def LB_HFS(mol, *_): - """ Laikov-Briling using HFS-based parameters + """Computes guess Hamiltonian using Laikov-Briling 2020 model with HFS parameters. + + Uses auxiliary basis representation optimized for Hartree-Fock-Slater calculations. Args: mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). Returns: - A numpy ndarray containing the computed approximate Hamiltonian. + numpy ndarray: 2D effective Hamiltonian matrix from LB2020-HFS model in AO basis. """ return LB20(parameters='HFS').Heff(mol) def solveF(mol, fock): - """Computes the eigenvalues and eigenvectors corresponding to the given Hamiltonian. + """Solves generalized eigenvalue problem FC = SCε for the Fock/Hamiltonian matrix. Args: mol (pyscf Mole): pyscf Mole object. - fock (numpy ndarray): Approximate Hamiltonian. + fock (numpy ndarray): 2D Fock or Hamiltonian matrix in AO basis. + + Returns: + tuple: (eigenvalues, eigenvectors) where: + - eigenvalues: 1D array of orbital energies + - eigenvectors: 2D array of MO coefficients (columns are MOs) """ s1e = mol.intor_symmetric('int1e_ovlp') return scipy.linalg.eigh(fock, s1e) def get_guess(arg): - """Returns the function of the method selected to compute the approximate hamiltoninan + """Returns guess Hamiltonian function by name. Args: - arg (str): Approximate Hamiltonian + arg (str): Guess method name. Available options: + - 'core': Core Hamiltonian (H_core) + - 'sad': Superposition of Atomic Densities + - 'sap': Superposition of Atomic Potentials + - 'gwh': Generalized Wolfsberg-Helmholtz + - 'lb': Laikov-Briling 2020 (HF parameters) + - 'lb-hfs': Laikov-Briling 2020 (HFS parameters) + - 'huckel': Extended Hückel method Returns: - The function of the selected method. + callable: Guess Hamiltonian function with signature f(mol, xc) -> numpy.ndarray. + + Raises: + RuntimeError: If the specified guess method is not available. """ arg = arg.lower() if arg not in guesses_dict: @@ -125,8 +160,18 @@ def get_guess(arg): def check_nelec(nelec, nao): - """ Checks if there is enough orbitals - for the electrons""" + """Validates that the number of electrons can be accommodated by available orbitals. + + Args: + nelec (tuple or int): Number of electrons (alpha, beta) or total. + nao (int): Number of atomic orbitals. + + Raises: + RuntimeError: If there are more electrons than available orbitals. + + Warns: + RuntimeWarning: If all orbitals are filled (complete shell warning). + """ if np.any(np.array(nelec) > nao): raise RuntimeError(f'Too many electrons ({nelec}) for {nao} orbitals') elif np.any(np.array(nelec) == nao): @@ -135,15 +180,17 @@ def check_nelec(nelec, nao): def get_occ(e, nelec, spin): - """Returns the occupied subset of e + """Extracts occupied orbital eigenvalues/energies. Args: - e (numpy ndarray): Energy eigenvalues. - nelec(tuple): Number of alpha and beta electrons. - spin(int): Spin. + e (numpy ndarray): Full array of orbital eigenvalues. + nelec (tuple): Number of (alpha, beta) electrons. + spin (int or None): Spin multiplicity. If None, assumes closed-shell. Returns: - A numpy ndarray containing the occupied eigenvalues. + numpy ndarray: Occupied eigenvalues. Shape depends on spin: + - Closed-shell (spin=None): 1D array of occupied eigenvalues + - Open-shell: 2D array (2, nocc) for alpha and beta separately """ check_nelec(nelec, e.shape[0]) if spin is None: @@ -158,15 +205,17 @@ def get_occ(e, nelec, spin): def get_dm(v, nelec, spin): - """Computes the density matrix. + """Constructs density matrix from occupied molecular orbitals. Args: - v (numpy ndarray): Eigenvectors of a previously solve Hamiltoinan. - nelec(tuple): Number of alpha and beta electrons. - spin(int): Spin. + v (numpy ndarray): 2D array of MO coefficients (eigenvectors), columns are MOs. + nelec (tuple): Number of (alpha, beta) electrons. + spin (int or None): Spin multiplicity. If None, assumes closed-shell (RHF). - Return: - A numpy ndarray containing the density matrix computed using the guess Hamiltonian. + Returns: + numpy ndarray: Density matrix in AO basis. + - Closed-shell: 2D array (nao, nao) + - Open-shell: 3D array (2, nao, nao) for alpha and beta """ check_nelec(nelec, len(v)) @@ -183,9 +232,27 @@ def get_dm(v, nelec, spin): ############################################################################### def hcore_grad(mf): + """Returns core Hamiltonian gradient generator function. + + Args: + mf: Mean-field object with hcore_generator method. + + Returns: + callable: Function that returns core Hamiltonian gradient for a given atom. + """ return mf.hcore_generator(mf.mol) def LB_grad(mf): + """Returns Laikov-Briling Hamiltonian gradient generator function. + + Combines core Hamiltonian gradient with LB2020 model gradient. + + Args: + mf: Mean-field object with hcore_generator method. + + Returns: + callable: Function that returns total Hamiltonian gradient for a given atom. + """ hcore_grad = mf.hcore_generator(mf.mol) HLB_grad = LB20().HLB20_generator(mf.mol) def H_grad(iat): @@ -193,6 +260,17 @@ def H_grad(iat): return H_grad def get_guess_g(arg): + """Returns both guess Hamiltonian function and its gradient generator. + + Args: + arg (str): Guess method name. Available: 'core', 'lb'. + + Returns: + tuple: (hamiltonian_function, gradient_function) pair. + + Raises: + RuntimeError: If the specified guess method is not available for gradients. + """ arg = arg.lower() guesses = {'core':(hcore, hcore_grad), 'lb':(LB, LB_grad)} if arg not in guesses: @@ -200,19 +278,19 @@ def get_guess_g(arg): return guesses[arg] def eigenvalue_grad(mol, e, c, s1, h1): + """Computes nuclear gradients of orbital eigenvalues from generalized eigenvalue problem HC = eSC. - """Compute gradients of eigenvalues found from HC=eSC + Uses the Hellmann-Feynman theorem for eigenvalue derivatives. Args: - mol (pyscf Mole): pyscf Mole object - e (numpy 1d ndarray, mol.nao): eigenvalues - c (numpy 2d ndarray, mol.nao*mol.nao): eigenvectors - s1 (numpy 3d ndarray, 3*mol.nao*mol.nao): compact gradient of the overlap matrix [-(nabla \\|\\)] - h1 (func(int: iat)): returns the derivative of H wrt the coordinates of atom iat, i.e. dH/dr[iat] + mol (pyscf Mole): pyscf Mole object. + e (numpy ndarray): 1D array (nao,) of orbital eigenvalues. + c (numpy ndarray): 2D array (nao, nao) of MO coefficients (eigenvectors). + s1 (numpy ndarray): 3D array (3, nao, nao) - compact gradient of overlap matrix. + h1 (callable): Function returning dH/dr[iat] - Hamiltonian gradient for atom iat. Returns: - numpy 3d ndarray, mol.nao*mol.natm*3: gradient of the eigenvalues in Eh/bohr - + numpy ndarray: 3D array (nao, natm, 3) of eigenvalue gradients in Eh/bohr. """ de_dr = np.zeros((mol.nao, mol.natm, 3)) aoslices = mol.aoslice_by_atom()[:,2:] diff --git a/qstack/spahm/rho/Dmatrix.py b/qstack/spahm/rho/Dmatrix.py index 0bfff6d3..52a96d35 100644 --- a/qstack/spahm/rho/Dmatrix.py +++ b/qstack/spahm/rho/Dmatrix.py @@ -2,7 +2,22 @@ from numpy import sqrt def c_split(mol, c): -# works for an uncontracted basis only + """Splits coefficient vector by angular momentum quantum number for each atom. + + Organizes expansion coefficients into sublists grouped by angular momentum (l) + for each atomic basis function. Only supports uncontracted basis sets. + + Args: + mol (pyscf Mole): pyscf Mole object. + c (numpy ndarray): 1D array of expansion coefficients. + + Returns: + list: List of [l, coefficients] pairs where l is angular momentum and + coefficients is the subset of c for that angular momentum shell. + + Note: + Works for uncontracted basis sets only. + """ cs = [] i0 = 0 for at in mol.aoslice_by_atom(): @@ -14,6 +29,17 @@ def c_split(mol, c): return cs def rotate_c(D, cs): + """Rotates coefficient vector using Wigner D-matrices. + + Applies angular momentum rotation to each angular momentum block separately. + + Args: + D (list): List of Wigner D-matrices indexed by angular momentum l. + cs (list): List of [l, coefficients] pairs from c_split(). + + Returns: + numpy ndarray: 1D array of rotated coefficients. + """ c_new = [] for l,ci in cs: ci_new = D[l] @ ci @@ -21,7 +47,19 @@ def rotate_c(D, cs): return np.hstack(c_new) def new_xy_axis(z): - # finds the "optimal" axes x' and y' from z' + """Constructs orthonormal coordinate system from a given z-axis. + + Finds optimal x' and y' axes that form a right-handed orthonormal system + with the given z' direction. The algorithm chooses x' to have maximal + component along the original axis with minimal projection onto z'. + + Args: + z (numpy ndarray): 3D vector defining the new z-axis direction. + + Returns: + numpy ndarray: 3x3 rotation matrix with rows [x', y', z'] defining the + new orthonormal coordinate system. + """ z = z/np.linalg.norm(z) # don't use /= so a copy of z is created i = np.argmin(abs(z)) # find the axis with the minimal projection of the vector z x = -z[i] * z @@ -32,9 +70,27 @@ def new_xy_axis(z): def Dmatrix(xyz, lmax, order='xyz'): - # generate Wigner D-matrices D[l][m1,m2] = D_{m1,m2}^l - # for a rotation encoded as x'=xyz[0], y'=xyz[1], z'=xyz[2] - # (m1 is rotated so D is transposed) + """Generates Wigner D-matrices for spatial rotation of spherical harmonics. + + Computes rotation matrices D^l for angular momenta l = 0 to lmax, where + D^l[m1, m2] transforms spherical harmonics under the specified rotation. + The rotation is defined by new axes x' = xyz[0], y' = xyz[1], z' = xyz[2]. + + Args: + xyz (numpy ndarray): 3x3 rotation matrix with rows defining new [x', y', z'] axes. + lmax (int): Maximum angular momentum (supports lmax <= 4). + order (str): Ordering convention for spherical harmonics. Defaults to 'xyz'. + + Returns: + list: List of numpy ndarrays D[l] where D[l] is the (2l+1) x (2l+1) Wigner + D-matrix for angular momentum l. Note: m1 index is rotated (D is transposed). + + Raises: + NotImplementedError: If lmax > 4. + + Note: + The matrices are computed using explicit algebraic expressions for each l. + """ xx = xyz[0,0]; xy = xyz[0,1]; xz = xyz[0,2] yx = xyz[1,0]; yy = xyz[1,1]; yz = xyz[1,2] @@ -248,5 +304,21 @@ def Dmatrix(xyz, lmax, order='xyz'): def Dmatrix_for_z(z, lmax, order='xyz'): + """Generates Wigner D-matrices for rotation that aligns z-axis with given vector. + + Convenience function that combines new_xy_axis() and Dmatrix() to compute + rotation matrices for a rotation defined only by the target z-direction. + + Args: + z (numpy ndarray): 3D vector defining the target z-axis direction. + lmax (int): Maximum angular momentum (supports lmax <= 4). + order (str): Ordering convention for spherical harmonics. Defaults to 'xyz'. + + Returns: + list: List of Wigner D-matrices for l = 0 to lmax. + + Raises: + NotImplementedError: If lmax > 4. + """ return Dmatrix(new_xy_axis(z), lmax, order) diff --git a/qstack/spahm/rho/atom.py b/qstack/spahm/rho/atom.py index c4885897..826d3e11 100644 --- a/qstack/spahm/rho/atom.py +++ b/qstack/spahm/rho/atom.py @@ -5,6 +5,17 @@ def main(args=None): + """Command-line interface for computing SPAHM(a) atomic representations. + + Computes atom-centered SPAHM representations for a single molecule from an XYZ file. + The representation is based on fitted atomic densities from a guess Hamiltonian. + + Args: + args (list, optional): Command-line arguments. If None, uses sys.argv. Defaults to None. + + Returns: + None: Saves representation to numpy file specified by --name argument. + """ parser = SpahmParser(description='This program computes the SPAHM(a) representation for a given molecular system', atom=True) parser.add_argument('--mol', dest='mol', required=True, type=str, help="the path to the xyz file with the molecular structure") parser.add_argument('--charge', dest='charge', default=0, type=int, help='total charge of the system (default: 0)') diff --git a/qstack/spahm/rho/atomic_density.py b/qstack/spahm/rho/atomic_density.py index 53856cb8..3ced2c97 100644 --- a/qstack/spahm/rho/atomic_density.py +++ b/qstack/spahm/rho/atomic_density.py @@ -4,6 +4,27 @@ def fit(mol, dm, aux_basis, short=False, w_slicing=True, only_i=None): + """Fits atomic density matrices using Löwdin partitioning and density fitting. + + Decomposes the molecular density matrix into atomic contributions using Löwdin + orthogonalization, then fits each atomic density with auxiliary basis functions. + + Args: + mol (pyscf Mole): pyscf Mole object. + dm (numpy ndarray): 2D density matrix in AO basis. + aux_basis (str or dict): Auxiliary basis set for density fitting. + short (bool): If True, returns only diagonal blocks (atom-centered coefficients). + Defaults to False. + w_slicing (bool): If True, uses block-diagonal Coulomb matrix (faster). + Defaults to True. + only_i (list or None): List of atom indices to compute. If None, computes all atoms. + Defaults to None. + + Returns: + list or numpy ndarray: Density fitting coefficients for each atom. + - If short=False: list of 1D arrays (full aux basis per atom) + - If short=True: 1D array (concatenated atom-centered coefficients only) + """ L = lowdin.Lowdin_split(mol, dm) if only_i is not None and len(only_i) > 0: diff --git a/qstack/spahm/rho/bond.py b/qstack/spahm/rho/bond.py index 9b02b5eb..b7ea520b 100644 --- a/qstack/spahm/rho/bond.py +++ b/qstack/spahm/rho/bond.py @@ -7,6 +7,17 @@ def main(args=None): + """Command-line interface for computing SPAHM(b) bond representations. + + Computes bond-centered SPAHM representations for molecules or molecular datasets. + Representations capture chemical bonding information using density fitting on bond centers. + + Args: + args (list, optional): Command-line arguments. If None, uses sys.argv. Defaults to None. + + Returns: + None: Saves representations to numpy files with names based on --name argument. + """ parser = SpahmParser(description='This program computes the SPAHM(b) representation for a given molecular system or a list of thereof', unified=True, bond=True) args = parser.parse_args(args=args) if args.print>0: diff --git a/qstack/spahm/rho/bond_selected.py b/qstack/spahm/rho/bond_selected.py index 929fb13d..e2ba913c 100644 --- a/qstack/spahm/rho/bond_selected.py +++ b/qstack/spahm/rho/bond_selected.py @@ -10,6 +10,29 @@ def get_spahm_b_selected(mols, bondidx, xyzlist, readdm=None, guess=defaults.guess, xc=defaults.xc, spin=None, cutoff=defaults.cutoff, printlevel=0, omods=defaults.omod, bpath=defaults.bpath, only_m0=False, same_basis=False): + """Computes SPAHM(b) representations for specific bonds in molecules. + + Generates bond-centered representations for user-specified atom pairs across + a dataset of molecules, useful for targeted bond analysis. + + Args: + mols (list): List of pyscf Mole objects. + bondidx (numpy ndarray): 2D array (nmols, 2) of 0-indexed atom pairs defining bonds. + xyzlist (list): List of XYZ filenames corresponding to mols. + readdm (str, optional): Directory to load pre-computed density matrices. Defaults to None. + guess (str): Guess Hamiltonian method name. Defaults to defaults.guess. + xc (str): Exchange-correlation functional. Defaults to defaults.xc. + spin (numpy ndarray, optional): Array of spin multiplicities per molecule. Defaults to None. + cutoff (float): Maximum bond distance in Angstrom. Defaults to defaults.cutoff. + printlevel (int): Verbosity level. Defaults to 0. + omods (list): Open-shell modes ('alpha', 'beta'). Defaults to defaults.omod. + bpath (str): Path to bond basis set directory. Defaults to defaults.bpath. + only_m0 (bool): Use only m=0 basis functions. Defaults to False. + same_basis (bool): Use generic CC.bas for all pairs. Defaults to False. + + Returns: + list: List of (filename, representation) tuples for each specified bond. + """ if spin is None or (spin == None).all(): omods = [None] @@ -36,6 +59,18 @@ def get_spahm_b_selected(mols, bondidx, xyzlist, def main(): + """Command-line interface for computing SPAHM(b) representations for specific bonds. + + Reads a file listing XYZ structures and bond indices, computes representations + for each specified bond, and saves them to individual files. The input file format + is: XYZ_path atom1_index atom2_index (1-indexed). + + Args: + None: Parses command-line arguments. + + Returns: + None: Saves bond representations to numpy files in specified directory. + """ parser = SpahmParser(description='This program computes the SPAHM(b) representation for a list of bonds', bond=True) parser.remove_argument('elements') parser.remove_argument('only_z') diff --git a/qstack/spahm/rho/compute_rho_spahm.py b/qstack/spahm/rho/compute_rho_spahm.py index 582ccf43..3533983c 100644 --- a/qstack/spahm/rho/compute_rho_spahm.py +++ b/qstack/spahm/rho/compute_rho_spahm.py @@ -13,30 +13,35 @@ def spahm_a_b(rep_type, mols, dms, elements=None, only_m0=False, zeros=False, printlevel=0, auxbasis=defaults.auxbasis, model=defaults.model, pairfile=None, dump_and_exit=False, same_basis=False, only_z=None): - """ Computes SPAHM(a,b) representations for a set of molecules. + """Computes SPAHM(a) or SPAHM(b) representations for a set of molecules. + + Core computation function that generates atom-centered or bond-centered + molecular representations using density fitting on guess Hamiltonians. Args: - - rep_type (str) : the representation type ('atom' or 'bond' centered) - - mols (list): the list of molecules (pyscf.Mole objects) - - dms (list of numpy.ndarray): list of guess density matrices for each molecule - - bpath (str): path to the directory containing bond-optimized basis-functions (.bas) - - cutoff (float): the cutoff distance (angstrom) between atoms to be considered as bond - - omods (list of str): the selected mode for open-shell computations - - elements (list of str): list of all elements present in the set of molecules - - only_m0 (bool): use only basis functions with `m=0` - - zeros (bool): add zeros features for non-existing bond pairs - - printlevel (int): level of verbosity - - pairfile (str): path to the pairfile (if None, atom pairs are detected automatically) - - dump_and_exit (bool): to save pairfile for the set of molecules (without generating representaitons) - - same_basis (bool): to use the same bond-optimized basis function for all atomic pairs (ZZ.bas == CC.bas for any Z) - - only_z (list of str): restrict the atomic representations to atom types in this list + rep_type (str): Representation type: 'atom' for SPAHM(a) or 'bond' for SPAHM(b). + mols (list): List of pyscf Mole objects. + dms (list): List of density matrices (2D or 3D numpy arrays) for each molecule. + bpath (str): Directory path containing bond-optimized basis files (.bas). Defaults to defaults.bpath. + cutoff (float): Bond cutoff distance in Angstrom for SPAHM(b). Defaults to defaults.cutoff. + omods (list): Open-shell modes ('alpha', 'beta', 'sum', 'diff'). Defaults to defaults.omod. + elements (list, optional): Element symbols present in dataset. Auto-detected if None. Defaults to None. + only_m0 (bool): Use only m=0 angular momentum components. Defaults to False. + zeros (bool): Pad with zeros for non-existent bond pairs in SPAHM(b). Defaults to False. + printlevel (int): Verbosity level (0=silent, >0=verbose). Defaults to 0. + auxbasis (str): Auxiliary basis set for SPAHM(a). Defaults to defaults.auxbasis. + model (str): Atomic density fitting model for SPAHM(a). Defaults to defaults.model. + pairfile (str, optional): Path to atom pair file for SPAHM(b). Auto-detected if None. Defaults to None. + dump_and_exit (bool): Save pairfile and exit without computing. Defaults to False. + same_basis (bool): Use generic CC.bas for all atom pairs. Defaults to False. + only_z (list, optional): Restrict to specific atom types. Defaults to None. Returns: - A numpy.ndarray with the atomic spahm-b representations for each molecule (Nmods,Nmolecules,NatomMax,Nfeatures). - with: - Nmods: the alpha and beta components of the representation - - Nmolecules: the number of molecules in the set - - NatomMax: the maximum number of atoms in one molecule - - Nfeatures: the number of features (for each omods) + numpy ndarray: 4D array (n_omods, n_mols, max_atoms, n_features) where: + - n_omods: Number of open-shell components (1 for closed-shell, 2 for UHF) + - n_mols: Number of molecules in dataset + - max_atoms: Maximum number of atoms/bonds across all molecules + - n_features: Representation dimension """ maxlen = 0 # This needs fixing `UnboundLocalError` if only_z is None: @@ -95,43 +100,43 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= elements=None, only_m0=False, zeros=False, split=False, printlevel=0, auxbasis=defaults.auxbasis, model=defaults.model, with_symbols=False, only_z=None, merge=True): - """ Computes and reshapes an array of SPAHM(a,b) representations + """Computes and reshapes SPAHM(a) or SPAHM(b) representations with flexible output formats. + + High-level interface that handles density matrix computation, representation generation, + and output formatting including splitting, symbol labeling, and merging options. Args: - - rep_type (str) : the representation type ('atom' or 'bond' centered) - - mols (list): the list of molecules (pyscf.Mole objects) - - xyzlist (list of str): list with the paths to the xyz files - - guess (str): the guess Hamiltonian - - xc (str): the exchange-correlation functionals - - dms (list of numpy.ndarray): list of guess density matrices for each molecule - - readdm (str): path to the .npy file containins density matrices - - bpath (str): path to the directory containing bond-optimized basis-functions (.bas) - - cutoff (float): the cutoff distance (angstrom) between atoms to be considered as bond - - omods (list of str): the selected mode for open-shell computations - - spin (list of int): list of spins for each molecule - - elements (list of str): list of all elements present in the set of molecules - - only_m0 (bool): use only basis functions with `m=0` - - zeros (bool): add zeros features for non-existing bond pairs - - printlevel (int): level of verbosity - - pairfile (str): path to the pairfile (if None, atom pairs are detected automatically) - - dump_and_exit (bool): to save pairfile for the set of molecules (without generating representaitons) - - same_basis (bool): to use the same bond-optimized basis function for all atomic pairs (ZZ.bas == CC.bas for any Z) - - only_z (list of str): restrict the atomic representations to atom types in this list - - split (bool): to split the final array into molecules - - with_symbols (bool): to associate atomic symbol to representations in final array - - merge (bool): to concatenate alpha and beta representations to a single feature vector + rep_type (str): Representation type ('atom' or 'bond'). + mols (list): List of pyscf Mole objects. + xyzlist (list): List of XYZ file paths corresponding to mols. + guess (str): Guess Hamiltonian method name. + xc (str): Exchange-correlation functional. Defaults to defaults.xc. + spin (list, optional): List of spin multiplicities per molecule. Defaults to None. + readdm (str, optional): Directory path to load pre-computed density matrices. Defaults to None. + pairfile (str, optional): Path to atom pair file for SPAHM(b). Defaults to None. + dump_and_exit (bool): Save pairfile and exit without computing. Defaults to False. + same_basis (bool): Use generic CC.bas for all atom pairs. Defaults to True. + bpath (str): Directory with bond-optimized basis files. Defaults to defaults.bpath. + cutoff (float): Bond cutoff distance in Angstrom. Defaults to defaults.cutoff. + omods (list): Open-shell modes ('alpha', 'beta', 'sum', 'diff'). Defaults to defaults.omod. + elements (list, optional): Element symbols in dataset. Auto-detected if None. Defaults to None. + only_m0 (bool): Use only m=0 basis functions. Defaults to False. + zeros (bool): Pad with zeros for non-existent bonds. Defaults to False. + split (bool): Split output by molecule. Defaults to False. + printlevel (int): Verbosity level. Defaults to 0. + auxbasis (str): Auxiliary basis for SPAHM(a). Defaults to defaults.auxbasis. + model (str): Atomic density model. Defaults to defaults.model. + with_symbols (bool): Include atomic symbols with representations. Defaults to False. + only_z (list, optional): Restrict to specific atom types. Defaults to None. + merge (bool): Merge alpha/beta into single vector. Defaults to True. Returns: - A numpy.ndarray with all representations with shape (Nmods,Nmolecules,Natoms,Nfeatures) - with: - - Nmods: the alpha and beta components of the representation - - Nmolecules: the number of molecules in the set - - Natoms: the number of atoms in one molecule - - Nfeatures: the number of features (for each omod) - reshaped according to: - - if split==False: collapses Nmolecules and returns a single np.ndarray (Nmods,Natoms,Nfeatures) (where Natoms is the total number of atoms in the set of molecules) - - if merge==True: collapses the Nmods axis into the Nfeatures axis - - if with_symbols==True: returns (for each molecule (Natoms, 2) containging the atom symbols along 1st dim and one of the above arrays + numpy ndarray: Representation array with shape depending on options: + - Base: (n_omods, n_mols, max_atoms, n_features) + - If split=False: (n_omods, total_atoms, n_features) - all molecules concatenated + - If merge=True: Features concatenated, omods dimension removed + - If with_symbols=True: Object array with (symbol, vector) tuples per atom + - If split=True and with_symbols=True: List format per molecule """ if not dump_and_exit: dms = utils.mols_guess(mols, xyzlist, guess, xc=xc, spin=spin, readdm=readdm, printlevel=printlevel) @@ -212,6 +217,17 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= return allvec def main(args=None): + """Command-line interface for computing SPAHM representations (atom or bond centered). + + Unified CLI that supports both SPAHM(a) and SPAHM(b) computations with extensive + options for molecular datasets, splitting, and output formatting. + + Args: + args (list, optional): Command-line arguments. If None, uses sys.argv. Defaults to None. + + Returns: + None: Saves representations to numpy files based on --name argument and options. + """ parser = SpahmParser(description='This program computes the SPAHM(a,b) representations for a given molecular system or a list thereof', unified=True, atom=True, bond=True) parser.add_argument('--rep', dest='rep', type=str, choices=['atom', 'bond'], required=True, help='the type of representation') args = parser.parse_args(args=args) diff --git a/qstack/spahm/rho/dmb_rep_atom.py b/qstack/spahm/rho/dmb_rep_atom.py index 58e14a26..802e9d26 100644 --- a/qstack/spahm/rho/dmb_rep_atom.py +++ b/qstack/spahm/rho/dmb_rep_atom.py @@ -5,6 +5,22 @@ def get_basis_info(atom_types, auxbasis): + """Gathers auxiliary basis information for all atom types. + + Computes overlap matrices, basis function indices, and metric matrices + needed for atomic density fitting. + + Args: + atom_types (list): List of element symbols (e.g., ['C', 'H', 'O']). + auxbasis (str or dict): Auxiliary basis set specification. + + Returns: + tuple: (ao, ao_len, idx, M) where: + - ao (dict): Angular momentum info per element + - ao_len (dict): Basis set size per element + - idx (dict): Pair indices for symmetrization per element + - M (dict): Metric matrices per element + """ ao = {} idx = {} M = {} @@ -61,6 +77,25 @@ def df_occup(mol, dm, auxbasis): def get_model(arg): + """Returns density fitting and symmetrization functions for specified model. + + Args: + arg (str): Model name. Available options: + - 'pure': Pure density fitting + - 'sad-diff': Superposition of Atomic Densities difference + - 'occup': Occupation-corrected + - 'lowdin-short': Short Löwdin partitioning + - 'lowdin-long': Long Löwdin partitioning + - 'lowdin-short-x': Short Löwdin without slicing + - 'lowdin-long-x': Long Löwdin without slicing + - 'mr2021': Method from Margraf & Reuter 2021 + + Returns: + tuple: (density_fitting_function, symmetrization_function) pair. + + Raises: + RuntimeError: If model name is not recognized. + """ arg = arg.lower() if arg not in models_dict: raise RuntimeError(f'Unknown model. Available models: {list(models_dict.keys())}') @@ -68,6 +103,23 @@ def get_model(arg): def coefficients_symmetrize_MR2021(c, mol, idx, ao, ao_len, _M, _): + """Symmetrizes density fitting coefficients using MR2021 method. + + Implementation of the method from J. T. Margraf and K. Reuter, + Nat. Commun. 12, 344 (2021). + + Args: + c (numpy ndarray): Concatenated density fitting coefficients. + mol (pyscf Mole): pyscf Mole object. + idx (dict): Pair indices per element. + ao (dict): Angular momentum info per element. + ao_len (dict): Basis set sizes per element. + _M: Unused (for interface compatibility). + _: Unused (for interface compatibility). + + Returns: + list: Symmetrized vectors for each atom. + """ # J. T. Margraf and K. Reuter, Nat. Commun. 12, 344 (2021). v = [] i0 = 0 @@ -79,6 +131,22 @@ def coefficients_symmetrize_MR2021(c, mol, idx, ao, ao_len, _M, _): def coefficients_symmetrize_short(c, mol, idx, ao, ao_len, M, _): + """Symmetrizes coefficients for short Löwdin and related models. + + Applies metric matrix transformation and pads to consistent length. + + Args: + c (numpy ndarray): Density fitting coefficients. + mol (pyscf Mole): pyscf Mole object. + idx (dict): Pair indices per element. + ao (dict): Angular momentum info per element. + ao_len (dict): Basis set sizes per element. + M (dict): Metric matrices per element. + _: Unused (for interface compatibility). + + Returns: + numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. + """ # short lowdin / everything else v = [] i0 = 0 @@ -92,6 +160,22 @@ def coefficients_symmetrize_short(c, mol, idx, ao, ao_len, M, _): def coefficients_symmetrize_long(c_df, mol, idx, ao, ao_len, M, atom_types): + """Symmetrizes coefficients for long Löwdin partitioning. + + Handles per-atom coefficient lists from Löwdin splitting. + + Args: + c_df (list): List of coefficient arrays per atom. + mol (pyscf Mole): pyscf Mole object. + idx (dict): Pair indices per element. + ao (dict): Angular momentum info per element. + ao_len (dict): Basis set sizes per element. + M (dict): Metric matrices per element. + atom_types (list): All element types in dataset. + + Returns: + list: Symmetrized vectors for each atom. + """ # long lowdin vectors = [] for c_a in c_df: diff --git a/qstack/spahm/rho/dmb_rep_bond.py b/qstack/spahm/rho/dmb_rep_bond.py index 33e17c77..1ec74104 100644 --- a/qstack/spahm/rho/dmb_rep_bond.py +++ b/qstack/spahm/rho/dmb_rep_bond.py @@ -8,10 +8,34 @@ def make_bname(q0, q1): + """Creates canonical bond name from two element symbols. + + Orders elements alphabetically to ensure consistent naming (e.g., 'CH' not 'HC'). + + Args: + q0 (str): First element symbol. + q1 (str): Second element symbol. + + Returns: + str: Concatenated element symbols in alphabetical order (e.g., 'CH', 'CC', 'NO'). + """ return operator.concat(*sorted((q0, q1))) def get_basis_info(qqs, mybasis, only_m0, printlevel): + """Computes basis indices and metric matrices for bond pairs. + + Args: + qqs (list): List of bond pair names (e.g., ['CC', 'CH', 'OH']). + mybasis (dict): Dictionary mapping bond names to basis set dictionaries. + only_m0 (bool): If True, use only m=0 angular momentum components. + printlevel (int): Verbosity level. + + Returns: + tuple: (idx, M) where: + - idx (dict): Pair indices for each bond type + - M (dict): Metric matrices for each bond type + """ idx = {} M = {} for qq in qqs: @@ -27,6 +51,16 @@ def get_basis_info(qqs, mybasis, only_m0, printlevel): def read_df_basis(bnames, bpath, same_basis=False): + """Loads bond-optimized basis sets from .bas files. + + Args: + bnames (list): List of bond pair names (e.g., ['CC', 'CH']). + bpath (str): Directory path containing .bas files. + same_basis (bool): If True, uses generic CC.bas for all pairs. Defaults to False. + + Returns: + dict: Dictionary mapping bond names to basis set dictionaries. + """ mybasis = {} for bname in bnames: if bname in mybasis: @@ -38,6 +72,18 @@ def read_df_basis(bnames, bpath, same_basis=False): def get_element_pairs(elements): + """Generates all possible element pair combinations. + + Creates complete list of bond types assuming all elements can bond with each other. + + Args: + elements (list): List of element symbols. + + Returns: + tuple: (qqs, qqs4q) where: + - qqs (list): Sorted list of unique bond pair names + - qqs4q (dict): Maps each element to its list of possible bond partners + """ qqs = [] qqs4q = {} for q1 in elements: @@ -52,6 +98,23 @@ def get_element_pairs(elements): def get_element_pairs_cutoff(elements, mols, cutoff, align=False): + """Determines element pairs based on actual distances in molecules. + + Identifies which element pairs actually form bonds within the distance cutoff + by scanning molecular geometries. + + Args: + elements (list): List of element symbols to consider. + mols (list): List of pyscf Mole objects. + cutoff (float): Maximum bond distance in Angstrom. + align (bool): If True, includes all element pairs regardless of distance. + Defaults to False. + + Returns: + tuple: (qqs, qqs4q) where: + - qqs (list): Sorted list of bond pair names found within cutoff + - qqs4q (dict): Maps each element to its list of bond partners + """ qqs4q = {q: [] for q in elements} qqs = [] if align: @@ -142,6 +205,28 @@ def vec_from_cs(z, cs, lmax, idx): def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): + """Computes bond representation for a specific atom pair. + + Extracts bond density, fits it with basis functions at the bond center, + and rotates coefficients to bond axis to create rotationally invariant representation. + + Args: + i0 (int): Index of first atom. + i1 (int): Index of second atom. + L (Lowdin_split): Löwdin-split density matrix object. + mybasis (dict): Bond basis sets keyed by bond names. + idx (dict): Pair indices for symmetrization. + q (list): Element symbols for all atoms. + r (numpy ndarray): Atomic coordinates in Angstrom. + cutoff (float): Maximum bond distance. + + Returns: + tuple: ([v0, v1], bname) where: + - v0: Representation from atom i0's perspective + - v1: Representation from atom i1's perspective + - bname: Bond name (e.g., 'CH') + Returns (None, None) if distance exceeds cutoff. + """ q0, q1 = q[i0], q[i1] r0, r1 = r[i0], r[i1] z = r1-r0 diff --git a/qstack/spahm/rho/lowdin.py b/qstack/spahm/rho/lowdin.py index 8e9b1800..6463d11e 100644 --- a/qstack/spahm/rho/lowdin.py +++ b/qstack/spahm/rho/lowdin.py @@ -1,8 +1,27 @@ import numpy as np class Lowdin_split: + """Löwdin orthogonalization for density matrix partitioning. + + Transforms density matrix to orthogonal basis using symmetric orthogonalization, + enabling clean atomic and bond partitioning of electron density. + + Attributes: + S (numpy ndarray): Overlap matrix in AO basis. + S12 (numpy ndarray): Square root of overlap matrix (S^{1/2}). + S12i (numpy ndarray): Inverse square root of overlap matrix (S^{-1/2}). + mol (pyscf Mole): pyscf Mole object. + dm (numpy ndarray): Original density matrix in AO basis. + dmL (numpy ndarray): Löwdin-orthogonalized density matrix. + """ def __init__(self, mol, dm): + """Initializes Löwdin split with molecule and density matrix. + + Args: + mol (pyscf Mole): pyscf Mole object. + dm (numpy ndarray): Density matrix in AO basis. + """ S = mol.intor_symmetric('int1e_ovlp') S12,S12i = self.sqrtm(S) self.S = S @@ -13,6 +32,14 @@ def __init__(self, mol, dm): self.dmL = S12 @ dm @ S12 def sqrtm(self, m): + """Computes matrix square root and inverse square root via eigendecomposition. + + Args: + m (numpy ndarray): Symmetric positive-definite matrix. + + Returns: + tuple: (m^{1/2}, m^{-1/2}) both symmetrized. + """ e,b = np.linalg.eigh(m) e = np.sqrt(e) sm = b @ np.diag(e ) @ b.T @@ -20,6 +47,18 @@ def sqrtm(self, m): return (sm+sm.T)*0.5, (sm1+sm1.T)*0.5 def get_bond(self, at1idx, at2idx): + """Extracts bond density matrix for an atom pair. + + Isolates the density matrix components corresponding to interactions + between two atoms, transforming back to AO basis. + + Args: + at1idx (int): Index of first atom. + at2idx (int): Index of second atom. + + Returns: + numpy ndarray: Bond density matrix in AO basis (2D array). + """ mo1idx = range(*self.mol.aoslice_nr_by_atom()[at1idx][2:]) mo2idx = range(*self.mol.aoslice_nr_by_atom()[at2idx][2:]) ix1 = np.ix_(mo1idx,mo2idx) diff --git a/qstack/spahm/rho/parser.py b/qstack/spahm/rho/parser.py index 4a5f740d..12e5c520 100644 --- a/qstack/spahm/rho/parser.py +++ b/qstack/spahm/rho/parser.py @@ -5,6 +5,17 @@ class SpahmParser(argparse.ArgumentParser): + """Custom argument parser for SPAHM command-line tools. + + Provides pre-configured argument sets for atomic and bond SPAHM computations + with consistent interface across different entry points. + + Args: + unified (bool): Enable unified file/list interface. Defaults to False. + atom (bool): Add atom-specific arguments (auxbasis, model). Defaults to False. + bond (bool): Add bond-specific arguments (cutoff, bpath, etc.). Defaults to False. + **kwargs: Additional arguments passed to ArgumentParser. + """ def __init__(self, unified=False, atom=False, bond=False, **kwargs): super().__init__(formatter_class=argparse.ArgumentDefaultsHelpFormatter, **kwargs) parser = self @@ -40,6 +51,13 @@ def __init__(self, unified=False, atom=False, bond=False, **kwargs): def remove_argument(parser, arg): + """Removes an argument from the parser. + + Utility method for customizing parsers by removing unwanted arguments. + + Args: + arg (str): Argument name (with or without dashes) or destination name. + """ for action in parser._actions: opts = action.option_strings if (opts and opts[0] == arg) or action.dest == arg: diff --git a/qstack/spahm/rho/sym.py b/qstack/spahm/rho/sym.py index a413bb74..7c570483 100644 --- a/qstack/spahm/rho/sym.py +++ b/qstack/spahm/rho/sym.py @@ -4,6 +4,18 @@ def idxl0(i, l, ao): + """Returns index of basis function with same L and N quantum numbers but M=0. + + Finds the m=0 component of the same angular momentum shell for normalization. + + Args: + i (int): Basis function index. + l (int): Angular momentum quantum number. + ao (dict): Angular momentum info dict with 'l' and 'm' keys. + + Returns: + int: Index of corresponding m=0 basis function. + """ # return the index of the basis function with the same L and N but M=0 if l != 1: return i - ao['m'][i]+l @@ -11,6 +23,20 @@ def idxl0(i, l, ao): return i + [0, 2, 1][ao['m'][i]] def get_S(q, basis): + """Computes overlap matrix and angular momentum info for an atom. + + Creates single-atom molecule and extracts basis function structure. + + Args: + q (str): Element symbol. + basis (str or dict): Basis set specification. + + Returns: + tuple: (S, ao, ao_start) where: + - S (numpy ndarray): Overlap matrix + - ao (dict): Angular momentum info with 'l' and 'm' lists + - ao_start (list): Starting indices for each angular momentum shell + """ mol = compound.make_atom(q, basis) S = mol.intor_symmetric('int1e_ovlp') @@ -29,6 +55,16 @@ def get_S(q, basis): def store_pair_indices(ao): + """Stores basis function pair indices with matching L and M quantum numbers. + + Creates list of all (i,j) pairs where basis functions have identical angular momenta. + + Args: + ao (dict): Angular momentum info with 'l' and 'm' keys. + + Returns: + list: List of [i, j] index pairs with matching (l, m). + """ idx = [] for i, [li, mi] in enumerate(zip(ao['l'], ao['m'], strict=True)): for j, [lj, mj] in enumerate(zip(ao['l'], ao['m'], strict=True)): @@ -68,6 +104,16 @@ def metric_matrix(q, idx, ao, S): def metric_matrix_short(idx, ao, S): + """Computes metric matrix for symmetrization of short-format coefficients. + + Args: + idx (list): List of basis function pair indices. + ao (dict): Angular momentum info. + S (numpy ndarray): Overlap matrix. + + Returns: + numpy ndarray: Square root of metric matrix for normalization. + """ N = len(idx) A = np.zeros((N,N)) for p in range(N): @@ -83,6 +129,17 @@ def metric_matrix_short(idx, ao, S): def vectorize_c(idx, c): + """Vectorizes density fitting coefficients by forming products. + + Creates rotationally invariant representation from coefficient products. + + Args: + idx (list): List of [i, j] basis function pair indices. + c (numpy ndarray): 1D array of coefficients. + + Returns: + numpy ndarray: 1D array of coefficient products c[i]*c[j]. + """ v = np.zeros(len(idx)) for p, (i,j) in enumerate(idx): v[p] = c[i]*c[j] diff --git a/qstack/spahm/rho/utils.py b/qstack/spahm/rho/utils.py index 39f63075..4ec72d65 100644 --- a/qstack/spahm/rho/utils.py +++ b/qstack/spahm/rho/utils.py @@ -21,6 +21,20 @@ def get_chsp(fname, n): + """Loads charge and spin information from file. + + Reads a file containing charge/spin values, converting 'None' strings to None objects. + + Args: + fname (str or None): Path to charge/spin file. If None, returns array of Nones. + n (int): Expected number of entries in the file. + + Returns: + numpy ndarray: Array of charge/spin values (int or None) of length n. + + Raises: + RuntimeError: If file is not found or has wrong length. + """ def chsp_converter(chsp): if chsp == 'None': chsp = None @@ -39,6 +53,22 @@ def chsp_converter(chsp): def load_mols(xyzlist, charge, spin, basis, printlevel=0, units='ANG', ecp=None, progress=False, srcdir=None): + """Loads molecules from XYZ files and creates pyscf Mole objects. + + Args: + xyzlist (list): List of XYZ filenames. + charge (list or None): List of molecular charges (or None for neutral). + spin (list or None): List of spin multiplicities (or None for default). + basis (str or dict): Basis set specification. + printlevel (int): Verbosity level (0=silent). Defaults to 0. + units (str): Coordinate units ('ANG' or 'BOHR'). Defaults to 'ANG'. + ecp (str or dict, optional): Effective core potential specification. Defaults to None. + progress (bool): If True, shows progress bar. Defaults to False. + srcdir (str, optional): Source directory prepended to XYZ filenames. Defaults to None. + + Returns: + list: List of pyscf Mole objects. + """ mols = [] if spin is None: spin = [None] * len(xyzlist) @@ -59,6 +89,20 @@ def load_mols(xyzlist, charge, spin, basis, printlevel=0, units='ANG', ecp=None, def mols_guess(mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=None, printlevel=0): + """Computes or loads guess density matrices for a list of molecules. + + Args: + mols (list): List of pyscf Mole objects. + xyzlist (list): List of XYZ filenames (for naming/loading). + guess (str or callable): Guess method name or function. + xc (str): Exchange-correlation functional for guess. Defaults to defaults.xc. + spin (list or None): List of spin multiplicities. Defaults to None. + readdm (str, optional): Directory path to load pre-computed density matrices. Defaults to None. + printlevel (int): Verbosity level. Defaults to 0. + + Returns: + list: List of density matrices (2D or 3D numpy arrays). + """ dms = [] guess = guesses.get_guess(guess) if spin is None: @@ -80,6 +124,18 @@ def mols_guess(mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=None, pri def dm_open_mod(dm, omod): + """Applies open-shell modification to density matrix. + + Args: + dm (numpy ndarray): Density matrix (2D for closed-shell, 3D for open-shell). + omod (str or None): Open-shell modification type. Options in omod_fns_dict. + + Returns: + numpy ndarray: Modified density matrix. + + Raises: + ValueError: If omod is not a valid modification type. + """ omod_fns_dict[None] = lambda dm: dm if omod in omod_fns_dict: return omod_fns_dict[omod](dm) diff --git a/qstack/tools.py b/qstack/tools.py index 50d44456..366748bb 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -8,10 +8,10 @@ def _orca2gpr_idx(mol): """Given a molecule returns a list of reordered indices to tranform orca AO ordering into SA-GPR. Args: - mol (pyscf Mole): pyscf Mole object. + mol (pyscf.gto.Mole): pyscf Mole object. Returns: - A numpy ndarray of re-arranged indices. + numpy.ndarray: Re-arranged indices array. """ #def _M1(n): # return (n+1)//2 if n%2 else -((n+1)//2) @@ -38,10 +38,10 @@ def _orca2gpr_sign(mol): """Given a molecule returns a list of multipliers needed to tranform from orca AO. Args: - mol (pyscf Mole): pyscf Mole object. + mol (pyscf.gto.Mole): pyscf Mole object. Returns: - A numpy ndarray of +1/-1 multipliers + numpy.ndarray: Array of +1/-1 multipliers. """ signs = np.ones(mol.nao, dtype=int) i=0 @@ -64,10 +64,10 @@ def _pyscf2gpr_idx(mol): """Given a molecule returns a list of reordered indices to tranform pyscf AO ordering into SA-GPR. Args: - mol (pyscf Mole): pyscf Mole object. + mol (pyscf.gto.Mole): pyscf Mole object. Returns: - A numpy ndarray of re-arranged indices. + numpy.ndarray: Re-arranged indices array. """ idx = np.arange(mol.nao, dtype=int) @@ -89,16 +89,22 @@ def _pyscf2gpr_idx(mol): def reorder_ao(mol, vector, src='pyscf', dest='gpr'): """Reorder the atomic orbitals from one convention to another. - For example, src=pyscf dest=gpr reorders p-orbitals from +1,-1,0 (pyscf convention) to -1,0,+1 (SA-GPR convention). + + For example, src=pyscf dest=gpr reorders p-orbitals from +1,-1,0 (pyscf convention) + to -1,0,+1 (SA-GPR convention). Args: - mol (pyscf Mole): pyscf Mole object. - vector (numpy ndarray): vector or matrix - src (string): current convention - dest (string): convention to convert to (available: 'pyscf', 'gpr', ... + mol (pyscf.gto.Mole): pyscf Mole object. + vector (numpy.ndarray): Vector or matrix to reorder. + src (str): Current convention. Defaults to 'pyscf'. + dest (str): Convention to convert to (available: 'pyscf', 'gpr', 'orca'). Defaults to 'gpr'. Returns: - A numpy ndarray with the reordered vector or matrix. + numpy.ndarray: Reordered vector or matrix. + + Raises: + NotImplementedError: If the specified convention is not implemented. + ValueError: If vector dimension is not 1 or 2. """ def get_idx(mol, convention): @@ -145,10 +151,10 @@ def _Rz(a): """Computes the rotation matrix around absolute z-axis. Args: - a (float): Rotation angle. + a (float): Rotation angle in radians. Returns: - A 2D numpy ndarray containing the rotation matrix. + numpy.ndarray: 3x3 rotation matrix. """ A = np.zeros((3,3)) @@ -168,10 +174,10 @@ def _Ry(b): """Computes the rotation matrix around absolute y-axis. Args: - b (float): Rotation angle. + b (float): Rotation angle in radians. Returns: - A 2D numpy ndarray containing the rotation matrix. + numpy.ndarray: 3x3 rotation matrix. """ A = np.zeros((3,3)) @@ -190,10 +196,10 @@ def _Rx(g): """Computes the rotation matrix around absolute x-axis. Args: - g (float): Rotation angle. + g (float): Rotation angle in radians. Returns: - A 2D numpy ndarray containing the rotation matrix. + numpy.ndarray: 3x3 rotation matrix. """ A = np.zeros((3,3)) @@ -216,10 +222,10 @@ def rotate_euler(a, b, g, rad=False): a (float): Alpha Euler angle. b (float): Beta Euler angle. g (float): Gamma Euler angle. - rad (bool) : Wheter the angles are in radians or not. + rad (bool): Whether the angles are in radians. Defaults to False (degrees). Returns: - A 2D numpy ndarray with the rotation matrix. + numpy.ndarray: 3x3 rotation matrix. """ if not rad: @@ -235,31 +241,56 @@ def rotate_euler(a, b, g, rad=False): def unix_time_decorator(func): -# thanks to https://gist.github.com/turicas/5278558 - def wrapper(*args, **kwargs): - start_time, start_resources = time.time(), resource.getrusage(resource.RUSAGE_SELF) - ret = func(*args, **kwargs) - end_resources, end_time = resource.getrusage(resource.RUSAGE_SELF), time.time() - real = end_time - start_time - user = end_resources.ru_utime - start_resources.ru_utime - syst = end_resources.ru_stime - start_resources.ru_stime - print(f'{func.__name__} : real: {real:.4f} user: {user:.4f} sys: {syst:.4f}') - return ret - return wrapper + """Decorator to measure and print execution time statistics for a function. + + Measures real, user, and system time for the decorated function. + Thanks to https://gist.github.com/turicas/5278558 + + Args: + func (callable): Function to be decorated. + + Returns: + callable: Wrapped function that prints timing information. + """ + def wrapper(*args, **kwargs): + start_time, start_resources = time.time(), resource.getrusage(resource.RUSAGE_SELF) + ret = func(*args, **kwargs) + end_resources, end_time = resource.getrusage(resource.RUSAGE_SELF), time.time() + real = end_time - start_time + user = end_resources.ru_utime - start_resources.ru_utime + syst = end_resources.ru_stime - start_resources.ru_stime + print(f'{func.__name__} : real: {real:.4f} user: {user:.4f} sys: {syst:.4f}') + return ret + return wrapper def unix_time_decorator_with_tvalues(func): -# thanks to https://gist.github.com/turicas/5278558 - def wrapper(*args, **kwargs): - start_time, start_resources = time.time(), resource.getrusage(resource.RUSAGE_SELF) - ret = func(*args, **kwargs) - end_resources, end_time = resource.getrusage(resource.RUSAGE_SELF), time.time() - timing = {'real' : end_time - start_time, - 'user' : end_resources.ru_utime - start_resources.ru_utime, - 'sys' : end_resources.ru_stime - start_resources.ru_stime} - return timing, ret - return wrapper + """Decorator to measure execution time statistics and return them along with function result. + + Measures real, user, and system time for the decorated function and returns timing dict. + Thanks to https://gist.github.com/turicas/5278558 + + Args: + func (callable): Function to be decorated. + + Returns: + callable: Wrapped function that returns (timing_dict, result). + """ + def wrapper(*args, **kwargs): + start_time, start_resources = time.time(), resource.getrusage(resource.RUSAGE_SELF) + ret = func(*args, **kwargs) + end_resources, end_time = resource.getrusage(resource.RUSAGE_SELF), time.time() + timing = {'real' : end_time - start_time, + 'user' : end_resources.ru_utime - start_resources.ru_utime, + 'sys' : end_resources.ru_stime - start_resources.ru_stime} + return timing, ret + return wrapper def correct_num_threads(): + """Set MKL and OpenBLAS thread counts based on SLURM environment. + + If running under SLURM, sets MKL_NUM_THREADS and OPENBLAS_NUM_THREADS + to match SLURM_CPUS_PER_TASK. + """ if "SLURM_CPUS_PER_TASK" in os.environ: os.environ["MKL_NUM_THREADS"] = os.environ["SLURM_CPUS_PER_TASK"] os.environ["OPENBLAS_NUM_THREADS"] = os.environ["SLURM_CPUS_PER_TASK"] diff --git a/qstack/tree.dat b/qstack/tree.dat deleted file mode 100644 index 368ef60c..00000000 --- a/qstack/tree.dat +++ /dev/null @@ -1,55 +0,0 @@ -├── basis_opt -│   ├── basis_tools.py -│   ├── opt.py -├── compound.py -├── constants.py -├── equio.py -├── fields -│   ├── decomposition.py -│   ├── density2file.py -│   ├── dm.py -│   ├── dori.py -│   ├── excited.py -│   ├── hf_otpd.py -│   ├── hirshfeld.py -│   ├── moments.py -├── mathutils -│   ├── fps.py -│   ├── matrix.py -│   ├── wigner.py -│   ├── xyz_integrals_float.py -│   └── xyz_integrals_sym.py -├── orcaio.py -├── qml.py -├── qstack-qml -│   ├── qstack_qml -│   │   ├── b2r2.py -│   │   └── slatm.py -├── regression -│   ├── condition.py -│   ├── cross_validate_results.py -│   ├── final_error.py -│   ├── global_kernels.py -│   ├── hyperparameters.py -│   ├── kernel.py -│   ├── kernel_utils.py -│   ├── local_kernels.py -│   ├── oos.py -│   └── regression.py -├── spahm -│   ├── compute_spahm.py -│   ├── guesses.py -│   ├── LB2020guess.py -│   └── rho -│   ├── atomic_density.py -│   ├── atom.py -│   ├── bond.py -│   ├── bond_selected.py -│   ├── Dmatrix.py -│   ├── dmb_rep_atom.py -│   ├── dmb_rep_bond.py -│   ├── lowdin.py -│   ├── rep_completion.py -│   ├── sym.py -│   └── utils.py -└── tools.py From 36596da1f1eb5e00e4cf7676a0367f8e16671685 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Fri, 31 Oct 2025 18:24:56 +0100 Subject: [PATCH 04/23] Reorganize tools.py --- qstack/compound.py | 5 +- qstack/mathutils/rotation_matrix.py | 95 ++++++++++ qstack/orcaio.py | 4 +- qstack/regression/parser.py | 36 +--- qstack/reorder.py | 145 +++++++++++++++ qstack/spahm/rho/parser.py | 25 +-- qstack/tools.py | 274 ++++------------------------ tests/test_orca.py | 2 +- tests/test_reorder.py | 22 +-- 9 files changed, 299 insertions(+), 309 deletions(-) create mode 100644 qstack/mathutils/rotation_matrix.py create mode 100644 qstack/reorder.py diff --git a/qstack/compound.py b/qstack/compound.py index f99bafb7..f79c073e 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -8,7 +8,8 @@ import numpy as np from pyscf import gto, data from qstack import constants -from qstack.tools import rotate_euler +from qstack.mathutils.rotation_matrix import rotate_euler + # detects a charge-spin line, containing only two ints (one positive or negative, the other positive and nonzero) @@ -337,7 +338,7 @@ def make_atom(q, basis): def singleatom_basis_enumerator(basis): """Enumerates the different tensors of atomic orbitals within a 1-atom basis set. - + Each tensor is a 2l+1-sized group of orbitals that share a radial function and l value. Args: diff --git a/qstack/mathutils/rotation_matrix.py b/qstack/mathutils/rotation_matrix.py new file mode 100644 index 00000000..eaaf9ea9 --- /dev/null +++ b/qstack/mathutils/rotation_matrix.py @@ -0,0 +1,95 @@ +import numpy as np + + +def _Rz(a): + """Computes the rotation matrix around absolute z-axis. + + Args: + a (float): Rotation angle in radians. + + Returns: + numpy.ndarray: 3x3 rotation matrix. + """ + + A = np.zeros((3,3)) + A[0,0] = np.cos(a) + A[0,1] = -np.sin(a) + A[0,2] = 0 + A[1,0] = np.sin(a) + A[1,1] = np.cos(a) + A[1,2] = 0 + A[2,0] = 0 + A[2,1] = 0 + A[2,2] = 1 + return A + + +def _Ry(b): + """Computes the rotation matrix around absolute y-axis. + + Args: + b (float): Rotation angle in radians. + + Returns: + numpy.ndarray: 3x3 rotation matrix. + """ + + A = np.zeros((3,3)) + A[0,0] = np.cos(b) + A[0,1] = 0 + A[0,2] = np.sin(b) + A[1,0] = 0 + A[1,1] = 1 + A[1,2] = 0 + A[2,0] = -np.sin(b) + A[2,1] = 0 + A[2,2] = np.cos(b) + return A + + +def _Rx(g): + """Computes the rotation matrix around absolute x-axis. + + Args: + g (float): Rotation angle in radians. + + Returns: + numpy.ndarray: 3x3 rotation matrix. + """ + + A = np.zeros((3,3)) + A[0,0] = 1 + A[0,1] = 0 + A[0,2] = 0 + A[1,0] = 0 + A[1,1] = np.cos(g) + A[1,2] = -np.sin(g) + A[2,0] = 0 + A[2,1] = np.sin(g) + A[2,2] = np.cos(g) + return A + + +def rotate_euler(a, b, g, rad=False): + """Computes the rotation matrix given Euler angles. + + Args: + a (float): Alpha Euler angle. + b (float): Beta Euler angle. + g (float): Gamma Euler angle. + rad (bool): Whether the angles are in radians. Defaults to False (degrees). + + Returns: + numpy.ndarray: 3x3 rotation matrix. + """ + + if not rad: + a = a * np.pi / 180 + b = b * np.pi / 180 + g = g * np.pi / 180 + + A = _Rz(a) + B = _Ry(b) + G = _Rx(g) + + return A@B@G diff --git a/qstack/orcaio.py b/qstack/orcaio.py index ded39064..2ea2de9b 100644 --- a/qstack/orcaio.py +++ b/qstack/orcaio.py @@ -3,7 +3,7 @@ import numpy as np import pyscf from qstack.mathutils.matrix import from_tril -from qstack.tools import reorder_ao +from qstack.reorder import reorder_ao def read_input(fname, basis, ecp=None): @@ -59,7 +59,7 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re reorder_dest (str): Which AO ordering convention to use. Defaults to 'pyscf'. Returns: - numpy.ndarray: 2D array containing density matrix (openshell=False) or + numpy.ndarray: 2D array containing density matrix (openshell=False) or 3D array containing density and spin density matrices (openshell=True). Raises: diff --git a/qstack/regression/parser.py b/qstack/regression/parser.py index 9fdc5cb5..a3e63814 100644 --- a/qstack/regression/parser.py +++ b/qstack/regression/parser.py @@ -1,7 +1,9 @@ import argparse +from qstack.tools import FlexParser from .kernel_utils import defaults, ParseKwargs, local_kernels_dict, global_kernels_dict -class RegressionParser(argparse.ArgumentParser): + +class RegressionParser(FlexParser): """Custom argument parser for kernel ridge regression tasks. Provides pre-configured argument sets for machine learning workflows with @@ -74,35 +76,3 @@ def __init__(self, hyperparameters_set=None, **kwargs): parser.add_argument('--print', type=int, dest='printlevel', default=0, help='printlevel') parser.add_argument('--ada', action='store_true', dest='adaptive', default=False, help='if adapt sigma') parser.add_argument('--name', type=str, dest='nameout', default=None, help='the name of the output file') - - - def remove_argument(parser, arg): - """Removes an argument from the parser. - - Utility method for customizing parsers by removing unwanted arguments - from the pre-configured set. Useful when deriving specialized parsers. - - Args: - arg (str): Argument name (with or without dashes, e.g., '--x' or 'x') - or destination name (e.g., 'repr'). - - Returns: - None: Modifies parser in place. - - Example: - >>> parser = RegressionParser(hyperparameters_set='single') - >>> parser.remove_argument('--sparse') - >>> # sparse argument is now removed - """ - for action in parser._actions: - opts = action.option_strings - if (opts and opts[0] == arg) or action.dest == arg: - parser._remove_action(action) - break - - for action in parser._action_groups: - for group_action in action._group_actions: - opts = group_action.option_strings - if (opts and opts[0] == arg) or group_action.dest == arg: - action._group_actions.remove(group_action) - return diff --git a/qstack/reorder.py b/qstack/reorder.py new file mode 100644 index 00000000..c052166e --- /dev/null +++ b/qstack/reorder.py @@ -0,0 +1,145 @@ +import numpy as np + + +def _orca2gpr_idx(mol): + """Given a molecule returns a list of reordered indices to tranform orca AO ordering into SA-GPR. + + Args: + mol (pyscf.gto.Mole): pyscf Mole object. + + Returns: + numpy.ndarray: Re-arranged indices array. + """ + #def _M1(n): + # return (n+1)//2 if n%2 else -((n+1)//2) + idx = np.arange(mol.nao, dtype=int) + i=0 + for iat in range(mol.natm): + q = mol._atom[iat][0] + for gto in mol._basis[q]: + l = gto[0] + msize = 2*l+1 + nf = max([len(prim)-1 for prim in gto[1:]]) + for _n in range(nf): + #for m in range(-l, l+1): + # m1 = _M1(m+l) + # idx[(i+(m1-m))] = i + # i+=1 + I = np.s_[i:i+msize] + idx[I] = np.concatenate((idx[I][::-2], idx[I][1::2])) + i += msize + return idx + + +def _orca2gpr_sign(mol): + """Given a molecule returns a list of multipliers needed to tranform from orca AO. + + Args: + mol (pyscf.gto.Mole): pyscf Mole object. + + Returns: + numpy.ndarray: Array of +1/-1 multipliers. + """ + signs = np.ones(mol.nao, dtype=int) + i=0 + for iat in range(mol.natm): + q = mol._atom[iat][0] + for gto in mol._basis[q]: + l = gto[0] + msize = 2*l+1 + nf = max([len(prim)-1 for prim in gto[1:]]) + if l<3: + i += msize*nf + else: + for _n in range(nf): + signs[i+5:i+msize] = -1 # |m| >= 3 + i+= msize + return signs + + +def _pyscf2gpr_idx(mol): + """Given a molecule returns a list of reordered indices to tranform pyscf AO ordering into SA-GPR. + + Args: + mol (pyscf.gto.Mole): pyscf Mole object. + + Returns: + numpy.ndarray: Re-arranged indices array. + """ + + idx = np.arange(mol.nao, dtype=int) + i=0 + for iat in range(mol.natm): + q = mol._atom[iat][0] + for gto in mol._basis[q]: + l = gto[0] + msize = 2*l+1 + nf = max([len(prim)-1 for prim in gto[1:]]) + if l==1: + for _n in range(nf): + idx[i:i+3] = [i+1,i+2,i] + i += 3 + else: + i += msize * nf + return idx + + +def reorder_ao(mol, vector, src='pyscf', dest='gpr'): + """Reorder the atomic orbitals from one convention to another. + + For example, src=pyscf dest=gpr reorders p-orbitals from +1,-1,0 (pyscf convention) + to -1,0,+1 (SA-GPR convention). + + Args: + mol (pyscf.gto.Mole): pyscf Mole object. + vector (numpy.ndarray): Vector or matrix to reorder. + src (str): Current convention. Defaults to 'pyscf'. + dest (str): Convention to convert to (available: 'pyscf', 'gpr', 'orca'). Defaults to 'gpr'. + + Returns: + numpy.ndarray: Reordered vector or matrix. + + Raises: + NotImplementedError: If the specified convention is not implemented. + ValueError: If vector dimension is not 1 or 2. + """ + + def get_idx(mol, convention): + convention = convention.lower() + if convention == 'gpr': + return np.arange(mol.nao) + elif convention == 'pyscf': + return _pyscf2gpr_idx(mol) + elif convention == 'orca': + return _orca2gpr_idx(mol) + else: + errstr = f'Conversion to/from the {convention} convention is not implemented' + raise NotImplementedError(errstr) + + def get_sign(mol, convention): + convention = convention.lower() + if convention in ['gpr', 'pyscf']: + return np.ones(mol.nao, dtype=int) + elif convention == 'orca': + return _orca2gpr_sign(mol) + + idx_src = get_idx(mol, src) + idx_dest = get_idx(mol, dest) + sign_src = get_sign(mol, src) + sign_dest = get_sign(mol, dest) + + if vector.ndim == 2: + sign_src = np.einsum('i,j->ij', sign_src, sign_src) + sign_dest = np.einsum('i,j->ij', sign_dest, sign_dest) + idx_dest = np.ix_(idx_dest,idx_dest) + idx_src = np.ix_(idx_src,idx_src) + elif vector.ndim!=1: + errstr = f'Dim = {vector.ndim} (should be 1 or 2)' + raise ValueError(errstr) + + newvector = np.zeros_like(vector) + newvector[idx_dest] = (sign_src*vector)[idx_src] + newvector *= sign_dest + + return newvector + diff --git a/qstack/spahm/rho/parser.py b/qstack/spahm/rho/parser.py index 12e5c520..367ecd28 100644 --- a/qstack/spahm/rho/parser.py +++ b/qstack/spahm/rho/parser.py @@ -1,10 +1,11 @@ import argparse +from qstack.tools import FlexParser from .utils import defaults, omod_fns_dict from .dmb_rep_atom import models_dict from ..guesses import guesses_dict -class SpahmParser(argparse.ArgumentParser): +class SpahmParser(FlexParser): """Custom argument parser for SPAHM command-line tools. Provides pre-configured argument sets for atomic and bond SPAHM computations @@ -48,25 +49,3 @@ def __init__(self, unified=False, atom=False, bond=False, **kwargs): parser.add_argument('--pairfile', dest='pairfile', default=None, type=str, help='path to the atom pair file') parser.add_argument('--dump_and_exit', dest='dump_and_exit', action='store_true', help='write the atom pair file and exit if --pairfile is set') parser.add_argument('--same_basis', dest='same_basis', action='store_true', help='if to use generic CC.bas basis file for all atom pairs (Default: uses pair-specific basis, if exists)') - - - def remove_argument(parser, arg): - """Removes an argument from the parser. - - Utility method for customizing parsers by removing unwanted arguments. - - Args: - arg (str): Argument name (with or without dashes) or destination name. - """ - for action in parser._actions: - opts = action.option_strings - if (opts and opts[0] == arg) or action.dest == arg: - parser._remove_action(action) - break - - for action in parser._action_groups: - for group_action in action._group_actions: - opts = group_action.option_strings - if (opts and opts[0] == arg) or group_action.dest == arg: - action._group_actions.remove(group_action) - return diff --git a/qstack/tools.py b/qstack/tools.py index 366748bb..cc7972ae 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -1,243 +1,7 @@ import os import time import resource -import numpy as np - - -def _orca2gpr_idx(mol): - """Given a molecule returns a list of reordered indices to tranform orca AO ordering into SA-GPR. - - Args: - mol (pyscf.gto.Mole): pyscf Mole object. - - Returns: - numpy.ndarray: Re-arranged indices array. - """ - #def _M1(n): - # return (n+1)//2 if n%2 else -((n+1)//2) - idx = np.arange(mol.nao, dtype=int) - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - for gto in mol._basis[q]: - l = gto[0] - msize = 2*l+1 - nf = max([len(prim)-1 for prim in gto[1:]]) - for _n in range(nf): - #for m in range(-l, l+1): - # m1 = _M1(m+l) - # idx[(i+(m1-m))] = i - # i+=1 - I = np.s_[i:i+msize] - idx[I] = np.concatenate((idx[I][::-2], idx[I][1::2])) - i += msize - return idx - - -def _orca2gpr_sign(mol): - """Given a molecule returns a list of multipliers needed to tranform from orca AO. - - Args: - mol (pyscf.gto.Mole): pyscf Mole object. - - Returns: - numpy.ndarray: Array of +1/-1 multipliers. - """ - signs = np.ones(mol.nao, dtype=int) - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - for gto in mol._basis[q]: - l = gto[0] - msize = 2*l+1 - nf = max([len(prim)-1 for prim in gto[1:]]) - if l<3: - i += msize*nf - else: - for _n in range(nf): - signs[i+5:i+msize] = -1 # |m| >= 3 - i+= msize - return signs - - -def _pyscf2gpr_idx(mol): - """Given a molecule returns a list of reordered indices to tranform pyscf AO ordering into SA-GPR. - - Args: - mol (pyscf.gto.Mole): pyscf Mole object. - - Returns: - numpy.ndarray: Re-arranged indices array. - """ - - idx = np.arange(mol.nao, dtype=int) - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - for gto in mol._basis[q]: - l = gto[0] - msize = 2*l+1 - nf = max([len(prim)-1 for prim in gto[1:]]) - if l==1: - for _n in range(nf): - idx[i:i+3] = [i+1,i+2,i] - i += 3 - else: - i += msize * nf - return idx - - -def reorder_ao(mol, vector, src='pyscf', dest='gpr'): - """Reorder the atomic orbitals from one convention to another. - - For example, src=pyscf dest=gpr reorders p-orbitals from +1,-1,0 (pyscf convention) - to -1,0,+1 (SA-GPR convention). - - Args: - mol (pyscf.gto.Mole): pyscf Mole object. - vector (numpy.ndarray): Vector or matrix to reorder. - src (str): Current convention. Defaults to 'pyscf'. - dest (str): Convention to convert to (available: 'pyscf', 'gpr', 'orca'). Defaults to 'gpr'. - - Returns: - numpy.ndarray: Reordered vector or matrix. - - Raises: - NotImplementedError: If the specified convention is not implemented. - ValueError: If vector dimension is not 1 or 2. - """ - - def get_idx(mol, convention): - convention = convention.lower() - if convention == 'gpr': - return np.arange(mol.nao) - elif convention == 'pyscf': - return _pyscf2gpr_idx(mol) - elif convention == 'orca': - return _orca2gpr_idx(mol) - else: - errstr = f'Conversion to/from the {convention} convention is not implemented' - raise NotImplementedError(errstr) - - def get_sign(mol, convention): - convention = convention.lower() - if convention in ['gpr', 'pyscf']: - return np.ones(mol.nao, dtype=int) - elif convention == 'orca': - return _orca2gpr_sign(mol) - - idx_src = get_idx(mol, src) - idx_dest = get_idx(mol, dest) - sign_src = get_sign(mol, src) - sign_dest = get_sign(mol, dest) - - if vector.ndim == 2: - sign_src = np.einsum('i,j->ij', sign_src, sign_src) - sign_dest = np.einsum('i,j->ij', sign_dest, sign_dest) - idx_dest = np.ix_(idx_dest,idx_dest) - idx_src = np.ix_(idx_src,idx_src) - elif vector.ndim!=1: - errstr = f'Dim = {vector.ndim} (should be 1 or 2)' - raise ValueError(errstr) - - newvector = np.zeros_like(vector) - newvector[idx_dest] = (sign_src*vector)[idx_src] - newvector *= sign_dest - - return newvector - - -def _Rz(a): - """Computes the rotation matrix around absolute z-axis. - - Args: - a (float): Rotation angle in radians. - - Returns: - numpy.ndarray: 3x3 rotation matrix. - """ - - A = np.zeros((3,3)) - A[0,0] = np.cos(a) - A[0,1] = -np.sin(a) - A[0,2] = 0 - A[1,0] = np.sin(a) - A[1,1] = np.cos(a) - A[1,2] = 0 - A[2,0] = 0 - A[2,1] = 0 - A[2,2] = 1 - return A - - -def _Ry(b): - """Computes the rotation matrix around absolute y-axis. - - Args: - b (float): Rotation angle in radians. - - Returns: - numpy.ndarray: 3x3 rotation matrix. - """ - - A = np.zeros((3,3)) - A[0,0] = np.cos(b) - A[0,1] = 0 - A[0,2] = np.sin(b) - A[1,0] = 0 - A[1,1] = 1 - A[1,2] = 0 - A[2,0] = -np.sin(b) - A[2,1] = 0 - A[2,2] = np.cos(b) - return A - -def _Rx(g): - """Computes the rotation matrix around absolute x-axis. - - Args: - g (float): Rotation angle in radians. - - Returns: - numpy.ndarray: 3x3 rotation matrix. - """ - - A = np.zeros((3,3)) - A[0,0] = 1 - A[0,1] = 0 - A[0,2] = 0 - A[1,0] = 0 - A[1,1] = np.cos(g) - A[1,2] = -np.sin(g) - A[2,0] = 0 - A[2,1] = np.sin(g) - A[2,2] = np.cos(g) - return A - - -def rotate_euler(a, b, g, rad=False): - """Computes the rotation matrix given Euler angles. - - Args: - a (float): Alpha Euler angle. - b (float): Beta Euler angle. - g (float): Gamma Euler angle. - rad (bool): Whether the angles are in radians. Defaults to False (degrees). - - Returns: - numpy.ndarray: 3x3 rotation matrix. - """ - - if not rad: - a = a * np.pi / 180 - b = b * np.pi / 180 - g = g * np.pi / 180 - - A = _Rz(a) - B = _Ry(b) - G = _Rx(g) - - return A@B@G +import argparse def unix_time_decorator(func): @@ -263,6 +27,7 @@ def wrapper(*args, **kwargs): return ret return wrapper + def unix_time_decorator_with_tvalues(func): """Decorator to measure execution time statistics and return them along with function result. @@ -285,6 +50,7 @@ def wrapper(*args, **kwargs): return timing, ret return wrapper + def correct_num_threads(): """Set MKL and OpenBLAS thread counts based on SLURM environment. @@ -294,3 +60,37 @@ def correct_num_threads(): if "SLURM_CPUS_PER_TASK" in os.environ: os.environ["MKL_NUM_THREADS"] = os.environ["SLURM_CPUS_PER_TASK"] os.environ["OPENBLAS_NUM_THREADS"] = os.environ["SLURM_CPUS_PER_TASK"] + + +class FlexParser(argparse.ArgumentParser): + """Argument parser that allows removing arguments. + + Args: + **kwargs: Arguments passed to ArgumentParser. + + """ + + def remove_argument(self, arg): + """Removes an argument from the parser. + + Utility method for customizing parsers by removing unwanted arguments + from the pre-configured set. Useful when deriving specialized parsers. + + Args: + arg (str): Option destination name. + + Returns: + None: Modifies parser in place. + """ + for action in self._actions: + opts = action.option_strings + if (opts and opts[0] == arg) or action.dest == arg: + self._remove_action(action) + break + + for action in self._action_groups: + for group_action in action._group_actions: + opts = group_action.option_strings + if (opts and opts[0] == arg) or group_action.dest == arg: + action._group_actions.remove(group_action) + return diff --git a/tests/test_orca.py b/tests/test_orca.py index e8330e70..7fbbbc36 100755 --- a/tests/test_orca.py +++ b/tests/test_orca.py @@ -6,7 +6,7 @@ import qstack.orcaio import qstack.compound import qstack.fields -from qstack.tools import reorder_ao +from qstack.reorder import reorder_ao def _dipole_moment(mol, dm): diff --git a/tests/test_reorder.py b/tests/test_reorder.py index 585266b3..501f2dd2 100755 --- a/tests/test_reorder.py +++ b/tests/test_reorder.py @@ -2,7 +2,7 @@ import os import numpy as np -from qstack import compound, tools +from qstack import compound, reorder from qstack.mathutils.matrix import from_tril @@ -11,14 +11,14 @@ def test_reorder_pyscf_gpr(): mol = compound.xyz_to_mol(path+'/data/H2O_dist.xyz', 'cc-pvdz', charge=0, spin=0) dm = np.load(path+'/data/H2O_dist.ccpvdz.dm.npy') - dm1 = tools.reorder_ao(mol, dm, src='pyscf', dest='gpr') - dm2 = tools.reorder_ao(mol, dm1, src='gpr', dest='pyscf') + dm1 = reorder.reorder_ao(mol, dm, src='pyscf', dest='gpr') + dm2 = reorder.reorder_ao(mol, dm1, src='gpr', dest='pyscf') assert(np.linalg.norm(dm-dm2)==0) auxmol = compound.make_auxmol(mol, 'cc-pvdz jkfit') c = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') - c1 = tools.reorder_ao(auxmol, c, src='pyscf', dest='gpr') - c2 = tools.reorder_ao(auxmol, c1, src='gpr', dest='pyscf') + c1 = reorder.reorder_ao(auxmol, c, src='pyscf', dest='gpr') + c2 = reorder.reorder_ao(auxmol, c1, src='gpr', dest='pyscf') assert(np.linalg.norm(c-c2)==0) @@ -29,19 +29,19 @@ def test_reorder_pyscf_gpr_orca(): dm_gpr = from_tril(np.load(path+'/data/reorder/2_3FOD.gpr.dm.npy')) dm_pyscf = from_tril(np.load(path+'/data/reorder/2_3FOD.pyscf.dm.npy')) - dm_gpr1 = tools.reorder_ao(mol, dm_orca, 'orca', 'gpr') + dm_gpr1 = reorder.reorder_ao(mol, dm_orca, 'orca', 'gpr') assert(np.linalg.norm(dm_gpr1-dm_gpr)==0) - dm_gpr1 = tools.reorder_ao(mol, dm_pyscf, 'pyscf', 'gpr') + dm_gpr1 = reorder.reorder_ao(mol, dm_pyscf, 'pyscf', 'gpr') assert(np.linalg.norm(dm_gpr1-dm_gpr)==0) - dm_pyscf1 = tools.reorder_ao(mol, dm_orca, 'orca', 'pyscf') + dm_pyscf1 = reorder.reorder_ao(mol, dm_orca, 'orca', 'pyscf') assert(np.linalg.norm(dm_pyscf1-dm_pyscf)==0) - dm_pyscf1 = tools.reorder_ao(mol, dm_gpr, 'gpr', 'pyscf') + dm_pyscf1 = reorder.reorder_ao(mol, dm_gpr, 'gpr', 'pyscf') assert(np.linalg.norm(dm_pyscf1-dm_pyscf)==0) - dm_orca1 = tools.reorder_ao(mol, dm_pyscf, 'pyscf', 'orca') + dm_orca1 = reorder.reorder_ao(mol, dm_pyscf, 'pyscf', 'orca') assert(np.linalg.norm(dm_orca1-dm_orca)==0) - dm_orca1 = tools.reorder_ao(mol, dm_gpr, 'gpr', 'orca') + dm_orca1 = reorder.reorder_ao(mol, dm_gpr, 'gpr', 'orca') assert(np.linalg.norm(dm_orca1-dm_orca)==0) From a798ea1ff2d1f215390d807e000f0b3b5832b3a0 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Fri, 31 Oct 2025 20:10:42 +0100 Subject: [PATCH 05/23] Check first half of docs --- qstack/basis_opt/basis_tools.py | 25 ++++---- qstack/basis_opt/opt.py | 38 ++++++++---- qstack/c2mio.py | 26 ++++---- qstack/fields/density2file.py | 20 ++----- qstack/fields/hf_otpd.py | 5 +- qstack/mathutils/fps.py | 5 ++ qstack/mathutils/matrix.py | 6 +- qstack/mathutils/rotation_matrix.py | 66 +++++++-------------- qstack/mathutils/wigner.py | 29 +++++---- qstack/regression/condition.py | 12 ++-- qstack/regression/cross_validate_results.py | 16 ++--- qstack/regression/final_error.py | 12 ++-- qstack/regression/global_kernels.py | 4 +- qstack/regression/hyperparameters.py | 16 ++--- qstack/regression/kernel.py | 12 ++-- qstack/regression/local_kernels.py | 15 ++++- qstack/regression/oos.py | 12 ++-- qstack/regression/parser.py | 13 ++-- qstack/regression/regression.py | 14 ++--- qstack/spahm/rho/atomic_density.py | 4 +- qstack/spahm/rho/bond_selected.py | 4 +- qstack/spahm/rho/parser.py | 3 +- 22 files changed, 185 insertions(+), 172 deletions(-) diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py index 380b2fea..bcb80208 100644 --- a/qstack/basis_opt/basis_tools.py +++ b/qstack/basis_opt/basis_tools.py @@ -4,14 +4,14 @@ def energy_mol(newbasis, moldata): - """Computes energy for basis optimization. + """Computes loss function (fitting error) for one molecule. Args: - newbasis (dict): New basis set definition. - moldata (dict): Dictionary containing molecular data including mol, rho, coords, weights, and self. + newbasis (dict): Basis set. + moldata (dict): Dictionary containing molecular data. Returns: - float: Energy value for the given basis. + float: Loss function value for the given basis. """ mol = moldata['mol' ] rho = moldata['rho' ] @@ -30,18 +30,17 @@ def energy_mol(newbasis, moldata): def gradient_mol(nexp, newbasis, moldata): - """Computes energy and gradient for basis optimization. + """Computes loss function and gradient for one molecule. Args: nexp (int): Number of exponents. - newbasis (dict): New basis set definition. - moldata (dict): Dictionary containing molecular data including mol, rho, coords, weights, - self, idx, centers, and distances. + newbasis (dict): Basis set. + moldata (dict): Dictionary containing molecular data. Returns: tuple: A tuple containing: - - E (float): Energy value. - - dE_da (numpy.ndarray): Gradient of energy with respect to exponents. + - E (float): Loss function value. + - dE_da (numpy.ndarray): Gradient of loss function with respect to exponents. """ mol = moldata['mol' ] @@ -96,7 +95,7 @@ def exp2basis(exponents, elements, basis): Args: exponents (numpy.ndarray): Array of basis function exponents. - elements (list): List of element symbols. + elements (list): List of elements for which change the basis. basis (dict): Template basis set definition. Returns: @@ -117,10 +116,10 @@ def cut_myelements(x, myelements, bf_bounds): Args: x (numpy.ndarray): Input array. myelements (list): List of element symbols to extract. - bf_bounds (dict): Dictionary mapping elements to their index bounds. + bf_bounds (dict): Dictionary mapping elements to their basis set bound indices. Returns: - numpy.ndarray: Concatenated array containing only specified elements. + numpy.ndarray: Array containing x only for specified elements. """ x1 = [] for q in myelements: diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index 90278288..2b9b46eb 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -11,13 +11,13 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in= """ Optimize a given basis set. Args: - elements_in (str): - basis_in (str or dict): Basis set - molecules_in (dict): which contains the cartesian coordinates of the molecule (string) with the key 'atom', the uncorrelated on-top pair density on a grid (numpy array) with the key 'rho', the grid coordinates (numpy array) with the key 'coords', and the grid weights (numpy array) with the key 'weight'. + elements_in (str): List of elements to optimize. If None, optimize all elements in the basis. + basis_in (list): List of files paths (str) or dicts containing basis set(s). + molecules_in (list): List of file paths (str) or dicts containing molecular data. gtol_in (float): Gradient norm must be less than gtol_in before successful termination (minimization). method_in (str): Type of solver. Check scipy.optimize.minimize for full documentation. - printlvl (int): - check (bool): + printlvl (int): Level of printing during optimization (0: none, 1: final basis, 2: detailed). + check (bool): If True, compute and return both analytical and numerical gradients without optimization. Returns: Dictionary containing the optimized basis. @@ -26,13 +26,13 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in= def energy(x): - """Compute total energy for given exponents. + """Compute total loss function (fitting error) for given exponents. Args: x (numpy.ndarray): Log of exponents. Returns: - float: Total energy across all molecules. + float: Loss function value. """ exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) @@ -42,14 +42,14 @@ def energy(x): return E def gradient(x): - """Compute total energy and gradient for given exponents. + """Compute total loss function (fitting error) and gradient for given exponents. Args: x (numpy.ndarray): Log of exponents. Returns: tuple: A tuple containing: - - E (float): Total energy. + - E (float): Loss function value. - dE_dx (numpy.ndarray): Gradient with respect to log(exponents). """ exponents = np.exp(x) @@ -73,8 +73,9 @@ def gradient(x): dE_dx = dE_da * exponents return E, dE_dx + def gradient_only(x): - """Compute only the gradient (wrapper for optimization algorithms). + """Compute only the gradient of the loss function (wrapper for optimization algorithms). Args: x (numpy.ndarray): Log of exponents. @@ -84,6 +85,7 @@ def gradient_only(x): """ return gradient(x)[1] + def read_bases(basis_files): """Read basis set definitions from files or dicts. @@ -112,6 +114,7 @@ def read_bases(basis_files): basis.update(i) return basis + def make_bf_start(): """Create basis function index bounds for each element. @@ -125,14 +128,24 @@ def make_bf_start(): bf_bounds[q] = [start, start+nbf[i]] return bf_bounds + def make_moldata(fname): """Create molecular data dictionary from file or dict. Args: - fname (str or dict): Path to .npz file or dictionary containing rho data. + fname (str or dict): Path to .npz file or dictionary containing molecular structure, + grid coordinates and weights, and reference density evaluated on it. Returns: - dict: Dictionary containing mol, rho, coords, weights, self, idx, centers, and distances. + dict: Dictionary containing: + mol (pyscf Mole): pyscf Mole object. + rho (numpy.ndarray): Reference density values on the grid. + coords (numpy.ndarray): Grid coordinates. + weights (numpy.ndarray): Grid weights. + self (float): Integral of the squared reference density. + idx (numpy.ndarray): Basis function indices for each AO. + centers (list): Atomic center indices for each AO. + distances (numpy.ndarray): Squared distances from each atom to each grid point. """ if isinstance(fname, str): rho_data = np.load(fname) @@ -216,6 +229,7 @@ def make_moldata(fname): return newbasis + def main(): """Main function for basis set optimization command-line interface.""" import argparse diff --git a/qstack/c2mio.py b/qstack/c2mio.py index 41eae921..9a4abe08 100644 --- a/qstack/c2mio.py +++ b/qstack/c2mio.py @@ -7,31 +7,37 @@ def get_cell2mol_xyz(mol): - """Extract XYZ coordinates, charge, and spin from a cell2mol molecule object. + """Extract XYZ coordinates, charge, and spin from a cell2mol object. Args: - mol: cell2mol molecule object. + mol: cell2mol molecule or ligand object. Returns: tuple: A tuple containing: - xyz (str): XYZ coordinate string. - charge (int): Total charge of the molecule. - - spin (int): Spin of the molecule (alpha electrons - beta electrons). + - spin (int): Number of unpaired electrons of the molecule (multiplicity - 1) + for molecules and None for ligands. """ f = io.StringIO() sys.stdout, stdout = f, sys.stdout mol.print_xyz() xyz, sys.stdout = f.getvalue(), stdout f.close() - return xyz, mol.totcharge, (mol.get_spin()-1 if hasattr(mol, 'get_spin') else 0) + return xyz, mol.totcharge, (mol.get_spin()-1 if hasattr(mol, 'get_spin') else None) def get_cell(fpath, workdir='.'): """Load a unit cell from a .cell or .cif file. + If a .cif file is provided, the function checks for a corresponding .cell file + in the working directory. If it exists, it loads the .cell file; otherwise, it + calls cell2mol to process the .cif file to generate the unit cell. + Args: fpath (str): Path to the input file (.cell or .cif). - workdir (str): Working directory for temporary files. Defaults to '.'. + workdir (str): Directory to read / write .cell file and logs if a .cif file + is provided. Defaults to '.'. Returns: cell2mol.unitcell: Unit cell object. @@ -55,7 +61,7 @@ def get_cell(fpath, workdir='.'): def get_mol(cell, mol_idx=0, basis='minao', ecp=None): - """Extract a pyscf Mole object from a cell2mol unit cell. + """Convert a molecule in a cell2mol unit cell object to a pyscf Mole object. Args: cell: cell2mol unit cell object. @@ -64,7 +70,7 @@ def get_mol(cell, mol_idx=0, basis='minao', ecp=None): ecp (str): Effective core potential. Defaults to None. Returns: - pyscf.gto.Mole: pyscf Mole object containing the molecule information. + pyscf.gto.Mole: pyscf Mole object for the molecule. """ mol = cell.moleclist[mol_idx] xyz, charge, spin = get_cell2mol_xyz(mol) @@ -72,17 +78,17 @@ def get_mol(cell, mol_idx=0, basis='minao', ecp=None): def get_ligand(cell, mol_idx=0, lig_idx=0, basis='minao', ecp=None): - """Extract a ligand as a pyscf Mole object from a cell2mol unit cell. + """Convert a ligand in a cell2mol unit cell object to a pyscf Mole object. Args: cell: cell2mol unit cell object. mol_idx (int): Index of the molecule in the cell. Defaults to 0. - lig_idx (int): Index of the ligand. Defaults to 0. + lig_idx (int): Index of the ligand in the molecule. Defaults to 0. basis (str or dict): Basis set. Defaults to 'minao'. ecp (str): Effective core potential. Defaults to None. Returns: - pyscf.gto.Mole: pyscf Mole object containing the ligand information. + pyscf.gto.Mole: pyscf Mole object for the ligand. """ mol = cell.moleclist[mol_idx].ligands[lig_idx] xyz, charge, spin = get_cell2mol_xyz(mol) diff --git a/qstack/fields/density2file.py b/qstack/fields/density2file.py index 8ad800c8..e3376c84 100644 --- a/qstack/fields/density2file.py +++ b/qstack/fields/density2file.py @@ -4,11 +4,8 @@ import pyscf.tools.molden from .decomposition import number_of_electrons_deco -def coeffs_to_cube(mol, coeffs, cubename, nx = 80, ny = 80, nz = 80, resolution = 0.1, margin = 3.0): - """Saves the electron density to a cube file format. - - Evaluates the density from expansion coefficients on a 3D grid and writes - it to a Gaussian cube file for visualization. +def coeffs_to_cube(mol, coeffs, cubename, nx=80, ny=80, nz=80, resolution=0.1, margin=3.0): + """Saves the electron density to a cube file. Args: mol (pyscf Mole): pyscf Mole object. @@ -24,25 +21,16 @@ def coeffs_to_cube(mol, coeffs, cubename, nx = 80, ny = 80, nz = 80, resolution None: Creates a file named .cube on disk. """ - # Make grid grid = Cube(mol, nx, ny, nz, resolution, margin) - - # Compute density on the .cube grid coords = grid.get_coords() - ao = eval_ao(mol, coords) orb_on_grid = np.dot(ao, coeffs) - orb_on_grid = orb_on_grid.reshape(grid.nx,grid.ny,grid.nz) - - # Write out orbital to the .cube file + orb_on_grid = orb_on_grid.reshape(grid.nx, grid.ny, grid.nz) grid.write(orb_on_grid, cubename, comment='Electron Density') def coeffs_to_molden(mol, coeffs, moldenname): - """Saves the electron density to a MOLDEN file format. - - Writes the density represented by expansion coefficients to a MOLDEN file - which can be visualized with various quantum chemistry visualization tools. + """Saves the electron density to a MOLDEN file. Args: mol (pyscf Mole): pyscf Mole object. diff --git a/qstack/fields/hf_otpd.py b/qstack/fields/hf_otpd.py index 58dfb541..88b13ffd 100644 --- a/qstack/fields/hf_otpd.py +++ b/qstack/fields/hf_otpd.py @@ -2,7 +2,8 @@ import numpy as np from .dm import make_grid_for_rho -def hf_otpd(mol, dm, grid_level = 3, save_otpd = False, return_all = False): + +def hf_otpd(mol, dm, grid_level=3, save_otpd=False, return_all=False): """Computes the Hartree-Fock uncorrelated on-top pair density (OTPD) on a grid. The on-top pair density is the probability density of finding two electrons @@ -32,9 +33,9 @@ def hf_otpd(mol, dm, grid_level = 3, save_otpd = False, return_all = False): if return_all: return hf_otpd, grid - return hf_otpd + def save_OTPD(mol, otpd, grid): """Saves on-top pair density computation results to a NumPy compressed file. diff --git a/qstack/mathutils/fps.py b/qstack/mathutils/fps.py index 2da8d6c5..b3cccaab 100644 --- a/qstack/mathutils/fps.py +++ b/qstack/mathutils/fps.py @@ -1,8 +1,13 @@ import numpy as np + def do_fps(x, d=0): """Perform Farthest Point Sampling on a set of points. + Dral P O, Owens A, Yurchenko S N and Thiel W 2017 J. Chem. Phys. 146 244108 doi:10.1063/1.4989536 + Imbalzano G, Anelli A, Giofré D, Klees S, Behler J and Ceriotti M 2018 J. Chem. Phys. 148 241730 doi:10.1063/1.5024611 + Rossi K, Jurásková V, Wischert R, Garel L, Corminboeuf C and Ceriotti M 2020 J. Chem. Theory Comput. 16 5139–49 doi:10.1021/acs.jctc.0c00362 + Code from Giulio Imbalzano. Args: diff --git a/qstack/mathutils/matrix.py b/qstack/mathutils/matrix.py index e3db14ca..b3b615d4 100644 --- a/qstack/mathutils/matrix.py +++ b/qstack/mathutils/matrix.py @@ -1,5 +1,6 @@ import numpy as np + def from_tril(mat_tril): """Restore a symmetric matrix from its lower-triangular form. @@ -16,15 +17,16 @@ def from_tril(mat_tril): mat = mat + mat.T - np.diag(np.diag(mat)) return mat + def sqrtm(m, eps=1e-13): """Compute the matrix square root of a symmetric matrix. Args: - m (numpy.ndarray): Symmetric matrix. + m (numpy.ndarray): 2D symmetric matrix. eps (float): Threshold for eigenvalues to be considered zero. Defaults to 1e-13. Returns: - numpy.ndarray: Symmetrized square root of the matrix. + numpy.ndarray: 2D symmetric matrix, the square root of the input matrix. """ e, b = np.linalg.eigh(m) e[abs(e) < eps] = 0.0 diff --git a/qstack/mathutils/rotation_matrix.py b/qstack/mathutils/rotation_matrix.py index eaaf9ea9..49176167 100644 --- a/qstack/mathutils/rotation_matrix.py +++ b/qstack/mathutils/rotation_matrix.py @@ -2,7 +2,7 @@ def _Rz(a): - """Computes the rotation matrix around absolute z-axis. + """Computes the rotation matrix around laboratory z-axis. Args: a (float): Rotation angle in radians. @@ -10,22 +10,16 @@ def _Rz(a): Returns: numpy.ndarray: 3x3 rotation matrix. """ - - A = np.zeros((3,3)) - A[0,0] = np.cos(a) - A[0,1] = -np.sin(a) - A[0,2] = 0 - A[1,0] = np.sin(a) - A[1,1] = np.cos(a) - A[1,2] = 0 - A[2,0] = 0 - A[2,1] = 0 - A[2,2] = 1 - return A + ca, sa = np.cos(a), np.sin(a) + return np.array([ + [ca, -sa, 0], + [sa, ca, 0], + [0, 0, 1] + ]) def _Ry(b): - """Computes the rotation matrix around absolute y-axis. + """Computes the rotation matrix around laboratory y-axis. Args: b (float): Rotation angle in radians. @@ -33,22 +27,16 @@ def _Ry(b): Returns: numpy.ndarray: 3x3 rotation matrix. """ - - A = np.zeros((3,3)) - A[0,0] = np.cos(b) - A[0,1] = 0 - A[0,2] = np.sin(b) - A[1,0] = 0 - A[1,1] = 1 - A[1,2] = 0 - A[2,0] = -np.sin(b) - A[2,1] = 0 - A[2,2] = np.cos(b) - return A + cb, sb = np.cos(b), np.sin(b) + return np.array([ + [ cb, 0, sb], + [ 0, 1, 0 ], + [-sb, 0, cb] + ]) def _Rx(g): - """Computes the rotation matrix around absolute x-axis. + """Computes the rotation matrix around laboratory x-axis. Args: g (float): Rotation angle in radians. @@ -56,22 +44,16 @@ def _Rx(g): Returns: numpy.ndarray: 3x3 rotation matrix. """ - - A = np.zeros((3,3)) - A[0,0] = 1 - A[0,1] = 0 - A[0,2] = 0 - A[1,0] = 0 - A[1,1] = np.cos(g) - A[1,2] = -np.sin(g) - A[2,0] = 0 - A[2,1] = np.sin(g) - A[2,2] = np.cos(g) - return A + cg, sg = np.cos(g), np.sin(g) + return np.array([ + [1, 0, 0 ], + [0, cg, -sg], + [0, sg, cg] + ]) def rotate_euler(a, b, g, rad=False): - """Computes the rotation matrix given Euler angles. + """Computes the rotation matrix given Cardan angles (x-y-z) Args: a (float): Alpha Euler angle. @@ -84,9 +66,7 @@ def rotate_euler(a, b, g, rad=False): """ if not rad: - a = a * np.pi / 180 - b = b * np.pi / 180 - g = g * np.pi / 180 + a, b, g = np.radians([a, b, g]) A = _Rz(a) B = _Ry(b) diff --git a/qstack/mathutils/wigner.py b/qstack/mathutils/wigner.py index 5a269da8..171dbeab 100755 --- a/qstack/mathutils/wigner.py +++ b/qstack/mathutils/wigner.py @@ -14,10 +14,11 @@ yx,yy,yz = symbols('yx yy yz') zx,zy,zz = symbols('zx zy zz') + def real_Y_correct_phase(l, m, theta, phi): - """Returns real spherical harmonic in Condon-Shortley phase convention. - - Note: sympy's Ynm uses a different convention. + """Returns real spherical harmonic in Condon--Shortley phase convention. + + Note: sympy's Znm uses a different convention. Args: l (int): Orbital angular momentum quantum number. @@ -37,6 +38,7 @@ def real_Y_correct_phase(l, m, theta, phi): elif m>0: return 1 / sp.sqrt(2) * (ym1 + ym2) + def get_polynom_Y(l, m): """Rewrites a real spherical harmonic as a polynomial of x, y, z. @@ -62,9 +64,13 @@ def get_polynom_Y(l, m): 3*x*x+3*y*y : 3-3*z*z }) return expr + def xyzint_wrapper(knm, integrals_xyz_dict): """Wrapper for xyz integrals with caching. + Computes the integral of x^k * y^n * z^m over the unit sphere. + Integral is zero if any power is odd. + Args: knm (tuple): Tuple of three integers (k, n, m) representing powers. integrals_xyz_dict (dict): Cache dictionary for computed integrals. @@ -81,6 +87,7 @@ def xyzint_wrapper(knm, integrals_xyz_dict): integrals_xyz_dict[knm] = xyzint(*knm) return integrals_xyz_dict[knm] + def product_Y(Y1,Y2): """Computes the product of two spherical harmonics. @@ -111,8 +118,10 @@ def print_wigner(D): print(f'D[{l}][{m1: d},{m2: d}] = {d[m1,m2]}') print() + def compute_wigner(lmax): - """Compute Wigner D matrices up to a maximum angular momentum. + """Compute Wigner D matrices for real spherical harmonics + up to a maximum angular momentum. Args: lmax (int): Maximum angular momentum quantum number. @@ -142,11 +151,11 @@ def compute_wigner(lmax): if __name__ == "__main__": - if len(sys.argv)<2: - lmax = 2 - else: - lmax = int(sys.argv[1]) + if len(sys.argv)<2: + lmax = 2 + else: + lmax = int(sys.argv[1]) - D = compute_wigner(lmax) - print_wigner(D) + D = compute_wigner(lmax) + print_wigner(D) diff --git a/qstack/regression/condition.py b/qstack/regression/condition.py index 0ce74d0b..795750d4 100644 --- a/qstack/regression/condition.py +++ b/qstack/regression/condition.py @@ -12,17 +12,17 @@ def condition(X, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, """ Compute kernel matrix condition number Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples + X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples read_kernel (bool): if 'X' is a kernel and not an array of representations sigma (float): width of the kernel eta (float): regularization strength for matrix inversion - akernel (str): local kernel (Laplacian, Gaussian, linear) - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels + akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') + gkernel (str): global kernel (None, 'REM', 'avg') + gdict (dict): parameters of the global kernels test_size (float or int): test set fraction (or number of samples) random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (list): list of indices for the test set (based on the sequence in X) - idx_train (list): list of indices for the training set (based on the sequence in X) + idx_test (numpy.1darray): list of indices for the test set (based on the sequence in X) + idx_train (numpy.1darray): list of indices for the training set (based on the sequence in X) sparse (int): the number of reference environnments to consider for sparse regression Returns: diff --git a/qstack/regression/cross_validate_results.py b/qstack/regression/cross_validate_results.py index b11b4288..22fbaf00 100644 --- a/qstack/regression/cross_validate_results.py +++ b/qstack/regression/cross_validate_results.py @@ -8,8 +8,8 @@ def cv_results(X, y, - sigmaarr=defaults.sigmaarr, etaarr=defaults.etaarr, gkernel=defaults.gkernel, - gdict=defaults.gdict, akernel=defaults.kernel, test_size=defaults.test_size, + sigmaarr=defaults.sigmaarr, etaarr=defaults.etaarr, akernel=defaults.kernel, + gkernel=defaults.gkernel, gdict=defaults.gdict, test_size=defaults.test_size, train_size=defaults.train_size, splits=defaults.splits, printlevel=0, adaptive=False, read_kernel=False, n_rep=defaults.n_rep, save=False, preffix='unknown', save_pred=False, progress=False, sparse=None, @@ -17,13 +17,13 @@ def cv_results(X, y, """ Computes various learning curves (LC) ,with random sampling, and returns the average performance. Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples + X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples - sigmaar (list): list of kernel widths for the hyperparameter optimization - etaar (list): list of regularization strength for the hyperparameter optimization - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels - akernel (str): local kernel (Laplacian, Gaussian, linear) + sigmaarr (list): list of kernel width for the grid search + etaarr (list): list of regularization strength for the grid search + akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') + gkernel (str): global kernel (None, 'REM', 'avg') + gdict (dict): parameters of the global kernels test_size (float or int): test set fraction (or number of samples) train_size (list): list of training set size fractions used to evaluate the points on the LC splits (int): K number of splits for the Kfold cross-validation diff --git a/qstack/regression/final_error.py b/qstack/regression/final_error.py index 68316be0..270f2548 100644 --- a/qstack/regression/final_error.py +++ b/qstack/regression/final_error.py @@ -15,18 +15,18 @@ def final_error(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, """ Perform prediction on the test set using the full training set. Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples + X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples read_kernel (bool): if 'X' is a kernel and not an array of representations sigma (float): width of the kernel eta (float): regularization strength for matrix inversion - akernel (str): local kernel (Laplacian, Gaussian, linear) - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels + akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') + gkernel (str): global kernel (None, 'REM', 'avg') + gdict (dict): parameters of the global kernels test_size (float or int): test set fraction (or number of samples) random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (list): list of indices for the test set (based on the sequence in X) - idx_train (list): list of indices for the training set (based on the sequence in X) + idx_test (numpy.1darray): list of indices for the test set (based on the sequence in X) + idx_train (numpy.1darray): list of indices for the training set (based on the sequence in X) sparse (int): the number of reference environnments to consider for sparse regression return_pred (bool) : return predictions return_alpha (bool) : return regression weights diff --git a/qstack/regression/global_kernels.py b/qstack/regression/global_kernels.py index 2ae4cebb..f4d0fe78 100644 --- a/qstack/regression/global_kernels.py +++ b/qstack/regression/global_kernels.py @@ -13,7 +13,7 @@ def get_global_K(X, Y, sigma, local_kernel, global_kernel, options): sigma (float): Kernel width parameter. local_kernel (callable): Local kernel function for atomic environments. global_kernel (callable): Global kernel function for combining local kernels. - options (dict): Dictionary of kernel options (normalize, verbose, etc.). + options (dict): Dictionary of global kernel options. Returns: numpy ndarray: Global kernel matrix of shape (len(X), len(Y)). @@ -67,7 +67,7 @@ def get_covariance(mol1, mol2, species, max_atoms, max_size, kernel, sigma=None) Args: mol1 (dict): First molecule represented as dictionary of atomic environments by species. mol2 (dict): Second molecule represented as dictionary of atomic environments by species. - species (numpy ndarray): Array of unique atomic species present. + species (numpy ndarray): Array of unique atomic species present in the dataset. max_atoms (dict): Maximum number of atoms per species across all molecules. max_size (int): Total size of the padded covariance matrix. kernel (callable): Local kernel function. diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py index ec115825..eabd4238 100644 --- a/qstack/regression/hyperparameters.py +++ b/qstack/regression/hyperparameters.py @@ -9,23 +9,23 @@ def hyperparameters(X, y, - sigma=defaults.sigmaarr, eta=defaults.etaarr, gkernel=defaults.gkernel, gdict=defaults.gdict, - akernel=defaults.kernel, test_size=defaults.test_size, splits=defaults.splits, idx_test=None, idx_train=None, + sigma=defaults.sigmaarr, eta=defaults.etaarr, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, + test_size=defaults.test_size, splits=defaults.splits, idx_test=None, idx_train=None, printlevel=0, adaptive=False, read_kernel=False, sparse=None, random_state=defaults.random_state): """ Performs a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter). Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples + X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples sigma (list): list of kernel width for the grid search eta (list): list of regularization strength for the grid search - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels - akernel (str): local kernel (Laplacian, Gaussian, linear) + akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') + gkernel (str): global kernel (None, 'REM', 'avg') + gdict (dict): parameters of the global kernels test_size (float or int): test set fraction (or number of samples) splits (int): K number of splits for the Kfold cross-validation - idx_test (list): list of indices for the test-set (based on the sequence in X - idx_train (list): list of indices for the training set (based on the sequence in X) + idx_test (numpy.1darray): list of indices for the test set (based on the sequence in X) + idx_train (numpy.1darray): list of indices for the training set (based on the sequence in X) printlevel (int): controls level of output printing adaptative (bool): to expand the grid search adaptatively read_kernel (bool): if 'X' is a kernel and not an array of representations diff --git a/qstack/regression/kernel.py b/qstack/regression/kernel.py index c58c0438..1b2e6157 100644 --- a/qstack/regression/kernel.py +++ b/qstack/regression/kernel.py @@ -9,12 +9,12 @@ def kernel(X, Y=None, sigma=defaults.sigma, akernel=defaults.kernel, gkernel=def """ Computes a kernel between sets A and B (or A and A) using their representations. Args: - X (list of arrays): Representation of A - Y (list of arrays): Representation of B. - sigma (): Sigma hyperparameter. - akernel (): Kernel type (G for Gaussian, L for Laplacian, and myL for Laplacian for open-shell systems). - gkernel (): Global kernel type (agv for average, rem for REMatch kernel, None for local kernels). - gdict (): Dictionary like input string to initialize global kernel parameters. Defaults to {'alpha':1.0, 'normalize':1}. + X (numpy.ndarray): Representation of A + Y (numpy.ndarray): Representation of B. + sigma (float): width of the kernel + akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') + gkernel (str): global kernel (None, 'REM', 'avg') + gdict (dict): parameters of the global kernels Returns: A numpy ndarray containing the kernel. diff --git a/qstack/regression/local_kernels.py b/qstack/regression/local_kernels.py index 9ab85075..d1419d25 100644 --- a/qstack/regression/local_kernels.py +++ b/qstack/regression/local_kernels.py @@ -123,8 +123,19 @@ def cosine_similarity_wrapper(x, y, *_kargs, **_kwargs): def local_laplacian_kernel_wrapper(X, Y, gamma): - """ Wrapper that acts as a generic laplacian kernel function - It simply decides which kernel implementation to call. + """ Wrapper that acts as a generic Laplacian kernel function. + It decides which kernel implementation to call. + + Args: + X (numpy ndarray): First set of samples (can be multi-dimensional). + Y (numpy ndarray): Second set of samples. + gamma (float): Kernel width parameter. + + Returns: + numpy ndarray: Laplacian kernel matrix of shape (len(X), len(Y)). + + Raises: + RuntimeError: If X and Y have incompatible shapes. """ X, Y = np.asarray(X), np.asarray(Y) if X.shape[1:] != Y.shape[1:]: diff --git a/qstack/regression/oos.py b/qstack/regression/oos.py index 1341a049..8dcc7a64 100644 --- a/qstack/regression/oos.py +++ b/qstack/regression/oos.py @@ -13,13 +13,13 @@ def oos(X, X_oos, alpha, sigma=defaults.sigma, """ Perform prediction on an out-of-sample (OOS) set. Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples - X_oos (numpy.2darray[Noos,Nfeat]): array of OOS representations. - alpha (numpy.1darray(Ntrain or sparse)): regression weights. + X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples + X_oos (numpy.ndarray[Noos,...]): array of OOS representations + alpha (numpy.1darray(Ntrain or sparse)): regression weights sigma (float): width of the kernel - akernel (str): local kernel (Laplacian, Gaussian, linear) - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels + akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') + gkernel (str): global kernel (None, 'REM', 'avg') + gdict (dict): parameters of the global kernels test_size (float or int): test set fraction (or number of samples) random_state (int): the seed used for random number generator (controls train/test splitting) idx_test (list): list of indices for the test set (based on the sequence in X) diff --git a/qstack/regression/parser.py b/qstack/regression/parser.py index a3e63814..79a94047 100644 --- a/qstack/regression/parser.py +++ b/qstack/regression/parser.py @@ -6,9 +6,7 @@ class RegressionParser(FlexParser): """Custom argument parser for kernel ridge regression tasks. - Provides pre-configured argument sets for machine learning workflows with - molecular representations. Supports single hyperparameter evaluation and - hyperparameter optimization via grid search. + Provides pre-configured argument sets for KRR routines. Args: hyperparameters_set (str, optional): Hyperparameter mode. Options: @@ -23,13 +21,14 @@ class RegressionParser(FlexParser): - x (--x): Path to molecular representations file - y (--y): Path to target properties file - akernel (--akernel): Local/atomic kernel type (Gaussian, Laplacian, etc.) - - gkernel (--gkernel): Global/molecular kernel type (avg, REMatch) + - gkernel (--gkernel): Global/molecular kernel type (average, REMatch) - gdict (--gdict): Global kernel parameters dictionary - test (--test): Test set fraction (0.0-1.0) - - train (--train): Training set size(s) - - ll (--ll): Thread correction flag + - train (--train): Training set fraction list for learning curvers + (0.0-1.0 where 1.0 means full training set minus test set) + - ll (--ll): Thread correction flag for running on clusters - readkernel (--readkernel): Flag if input is pre-computed kernel - - sparse (--sparse): Sparse learning basis size + - sparse (--sparse): Sparse regression basis size - random_state (--random_state): Random seed for reproducibility Additional for 'single' mode: diff --git a/qstack/regression/regression.py b/qstack/regression/regression.py index f8a12823..16908f17 100644 --- a/qstack/regression/regression.py +++ b/qstack/regression/regression.py @@ -14,22 +14,22 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, """ Produces learning curves (LC) data, for various training sizes, using kernel ridge regression and the user specified parameters Args: - X (numpy.2darray[Nsamples,Nfeat]): array containing the 1D representations of all Nsamples + X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples read_kernel (bool): if 'X' is a kernel and not an array of representations sigma (float): width of the kernel eta (float): regularization strength for matrix inversion - akernel (str): local kernel (Laplacian, Gaussian, linear) - gkernel (str): global kernel (REM, average) - gdit (dict): parameters of the global kernels + akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') + gkernel (str): global kernel (None, 'REM', 'avg') + gdict (dict): parameters of the global kernels test_size (float or int): test set fraction (or number of samples) train_size (list): list of training set size fractions used to evaluate the points on the LC n_rep (int): the number of repetition for each point (using random sampling) random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (list): list of indices for the test set (based on the sequence in X) - idx_train (list): list of indices for the training set (based on the sequence in X) + idx_test (numpy.1darray): list of indices for the test set (based on the sequence in X) + idx_train (numpy.1darray): list of indices for the training set (based on the sequence in X) sparse (int): the number of reference environnments to consider for sparse regression - debug (bool): to use a fixed seed for random sampling (for reproducibility) + debug (bool): to use a fixed seed for partial training set selection (for reproducibility) save_pred (bool): to return all predicted targets Returns: diff --git a/qstack/spahm/rho/atomic_density.py b/qstack/spahm/rho/atomic_density.py index 3ced2c97..447859f0 100644 --- a/qstack/spahm/rho/atomic_density.py +++ b/qstack/spahm/rho/atomic_density.py @@ -4,10 +4,10 @@ def fit(mol, dm, aux_basis, short=False, w_slicing=True, only_i=None): - """Fits atomic density matrices using Löwdin partitioning and density fitting. + """Creates atomic density representations using Löwdin partitioning and density fitting. Decomposes the molecular density matrix into atomic contributions using Löwdin - orthogonalization, then fits each atomic density with auxiliary basis functions. + orthogonalization, then fits each atomic density onto auxiliary basis set. Args: mol (pyscf Mole): pyscf Mole object. diff --git a/qstack/spahm/rho/bond_selected.py b/qstack/spahm/rho/bond_selected.py index e2ba913c..277fe6c0 100644 --- a/qstack/spahm/rho/bond_selected.py +++ b/qstack/spahm/rho/bond_selected.py @@ -22,10 +22,10 @@ def get_spahm_b_selected(mols, bondidx, xyzlist, readdm (str, optional): Directory to load pre-computed density matrices. Defaults to None. guess (str): Guess Hamiltonian method name. Defaults to defaults.guess. xc (str): Exchange-correlation functional. Defaults to defaults.xc. - spin (numpy ndarray, optional): Array of spin multiplicities per molecule. Defaults to None. + spin (numpy ndarray, optional): Array of numbers of unpaired electrons per molecule. Defaults to None. cutoff (float): Maximum bond distance in Angstrom. Defaults to defaults.cutoff. printlevel (int): Verbosity level. Defaults to 0. - omods (list): Open-shell modes ('alpha', 'beta'). Defaults to defaults.omod. + omods (list): Open-shell modes (e.g. 'alpha', 'beta'). Defaults to defaults.omod. bpath (str): Path to bond basis set directory. Defaults to defaults.bpath. only_m0 (bool): Use only m=0 basis functions. Defaults to False. same_basis (bool): Use generic CC.bas for all pairs. Defaults to False. diff --git a/qstack/spahm/rho/parser.py b/qstack/spahm/rho/parser.py index 367ecd28..931d5890 100644 --- a/qstack/spahm/rho/parser.py +++ b/qstack/spahm/rho/parser.py @@ -8,8 +8,7 @@ class SpahmParser(FlexParser): """Custom argument parser for SPAHM command-line tools. - Provides pre-configured argument sets for atomic and bond SPAHM computations - with consistent interface across different entry points. + Provides pre-configured argument sets for atomic and bond SPAHM computations. Args: unified (bool): Enable unified file/list interface. Defaults to False. From f9fd0aaf1c6c6fed109863a1c66b4ce0ddf95f17 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Sun, 2 Nov 2025 18:47:44 +0100 Subject: [PATCH 06/23] Refactor compound.py --- qstack/c2mio.py | 4 +- qstack/compound.py | 108 ++++++++++++++--------------------------- tests/test_compound.py | 15 ++++-- 3 files changed, 50 insertions(+), 77 deletions(-) diff --git a/qstack/c2mio.py b/qstack/c2mio.py index 9a4abe08..78df4bd4 100644 --- a/qstack/c2mio.py +++ b/qstack/c2mio.py @@ -74,7 +74,7 @@ def get_mol(cell, mol_idx=0, basis='minao', ecp=None): """ mol = cell.moleclist[mol_idx] xyz, charge, spin = get_cell2mol_xyz(mol) - return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp, read_string=True) + return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp) def get_ligand(cell, mol_idx=0, lig_idx=0, basis='minao', ecp=None): @@ -92,4 +92,4 @@ def get_ligand(cell, mol_idx=0, lig_idx=0, basis='minao', ecp=None): """ mol = cell.moleclist[mol_idx].ligands[lig_idx] xyz, charge, spin = get_cell2mol_xyz(mol) - return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp, read_string=True) + return xyz_to_mol(xyz, charge=charge, spin=spin, basis=basis, ecp=ecp) diff --git a/qstack/compound.py b/qstack/compound.py index f79c073e..f0446373 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -1,7 +1,3 @@ -""" -Module containing all the operations to load, transform, and save molecular objects. -""" - import json import re import warnings @@ -11,10 +7,8 @@ from qstack.mathutils.rotation_matrix import rotate_euler - # detects a charge-spin line, containing only two ints (one positive or negative, the other positive and nonzero) _re_spincharge = re.compile(r'(?P[-+]?[0-9]+)\s+(?P[1-9][0-9]*)') - # fetches a single key=value or key:value pair, then matches a full line, for space-separated pairs _re_singlekey = re.compile(r'\s*(?P\w+)[=:](?P[^\s,]+)\s*') _re_keyline = re.compile(r'\s*(\w+[=:][^\s,]+\s+)*(\w+[=:][^\s,]+)\s*') @@ -40,14 +34,14 @@ def xyz_comment_line_parser(line): return {} elif _re_spincharge.fullmatch(line): # possibility 1: the line only has charge and spin multiplicity + # note: this skips the futher processing matcher = _re_spincharge.fullmatch(line) spinmult = int(matcher.group('spinmult')) charge = int(matcher.group('charge')) - # note: this skips the futher processing return {'charge':charge, 'spin':spinmult-1} elif _re_keyline.fullmatch(line): # possibility 2: space-separated key/value pairs - line_parts = line.split() # split across any whitespace + line_parts = line.split() part_matching = _re_singlekey props = {} elif _re_keyline2.fullmatch(line): @@ -84,17 +78,17 @@ def xyz_comment_line_parser(line): val = float(val) props[part_matcher.group('key')] = val - if 'spin' in props: # we want a difference in electons (alpha-beta), but we expect the file to contain a spin multiplicity props['spin'] = props['spin']-1 return props -def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit=None, ecp=None, parse_comment=False, read_string=False): + +def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit=None, ecp=None, parse_comment=False): """Reads a molecular file in xyz format and returns a pyscf Mole object. Args: - inp (str): Path of the xyz file to read, or xyz file contents if read_string==True. + inp (str): Path of the xyz file to read, or xyz file contents. basis (str or dict): Basis set. Defaults to "def2-svp". charge (int): Provide/override charge of the molecule. Defaults to None. spin (int): Provide/override spin of the molecule (alpha electrons - beta electrons). Defaults to None. @@ -102,7 +96,6 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit unit (str): Provide/override units (Ang or Bohr). Defaults to None. ecp (str): ECP to use. Defaults to None. parse_comment (bool): Whether to parse the comment line for properties. Defaults to False. - read_string (bool): Whether inp is a string containing xyz data rather than a file path. Defaults to False. Returns: pyscf.gto.Mole: pyscf Mole object containing the molecule information. @@ -111,28 +104,27 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit RuntimeError: If units are not recognized or if minao basis requires ECP for heavy atoms. """ - if read_string: + if '\n' in inp: molxyz = gto.fromstring(inp) else: molxyz = gto.fromfile(inp) if parse_comment: - if read_string: + if '\n' in inp: comment_line = inp.split('\n')[1] else: with open(inp) as f: - _ = f.readline() - comment_line = f.readline() + _, comment_line = f.readline(), f.readline() props = xyz_comment_line_parser(comment_line) else: - props = [None] + props = {} - # Define attributes to the Mole object and build it mol = gto.Mole() mol.atom = molxyz mol.basis = basis + if ecp is not None: + mol.ecp = ecp - # Check the units for the pyscf driver if unit is not None: pass elif 'unit' in props: @@ -141,7 +133,7 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit unit = 'Angstrom' unit = unit.upper()[0] if unit not in ['B', 'A']: - raise RuntimeError("Unknown units (use Ängstrom or Bohr)") + raise RuntimeError("Unknown units (use A[ngstrom] or B[ohr])") mol.unit = unit if ignore: @@ -155,8 +147,6 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit elif 'charge' in props: mol.charge = props['charge'] else: - # no ignore, no charge/spin specified: - # let's hope we have a set of neutral, closed shell compounds! mol.charge = 0 if spin is not None: @@ -166,9 +156,6 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit else: mol.spin = 0 - if ecp is not None: - mol.ecp = ecp - mol.build() species_charges = [data.elements.charge(z) for z in mol.elements] if mol.basis == 'minao' and ecp is None and (np.array(species_charges) > 36).any(): @@ -193,26 +180,18 @@ def mol_to_xyz(mol, fout, fmt="xyz"): """ fmt = fmt.lower() + output = [] if fmt == "xyz": coords = mol.atom_coords() * constants.BOHR2ANGS - output = [] - if fmt == "xyz": - output.append(str(mol.natm)) - output.append(f"{mol.charge} {mol.multiplicity}") - - for i in range(mol.natm): - symb = mol.atom_pure_symbol(i) - x, y, z = coords[i] - output.append(f"{symb:4s} {x:14.5f} {y:14.5f} {z:14.5f}") - string = "\n".join(output) - + output.append(f"{mol.natm}\n{mol.charge} {mol.multiplicity}") + output.extend([f"{mol.atom_pure_symbol(i):4s} {r[0]:14.5f} {r[1]:14.5f} {r[2]:14.5f}" for i, r in enumerate(coords)]) + output = "\n".join(output) else: raise NotImplementedError with open(fout, "w") as f: - f.write(string) - f.write("\n") - return string + f.write(output+"\n") + return output def make_auxmol(mol, basis, copy_ecp=False): @@ -226,8 +205,6 @@ def make_auxmol(mol, basis, copy_ecp=False): Returns: pyscf.gto.Mole: Auxiliary pyscf Mole object. """ - - # Define attributes to the auxiliary Mole object and build it auxmol = gto.Mole() auxmol.atom = mol.atom auxmol.charge = mol.charge @@ -236,12 +213,11 @@ def make_auxmol(mol, basis, copy_ecp=False): if copy_ecp: auxmol.ecp = mol.ecp auxmol.build() - return auxmol def rotate_molecule(mol, a, b, g, rad=False): - """Rotate a molecule: transform nuclear coordinates given a set of Euler angles. + """Rotate a molecule: transform nuclear coordinates given a set of Cardan angles. Args: mol (pyscf.gto.Mole): Original pyscf Mole object. @@ -253,27 +229,22 @@ def rotate_molecule(mol, a, b, g, rad=False): Returns: pyscf.gto.Mole: pyscf Mole object with transformed coordinates. """ - - orig_coords = mol.atom_coords() - rotated_coords = orig_coords @ rotate_euler(a, b, g, rad) * constants.BOHR2ANGS - atom_types = mol.elements - + rotated_coords = mol.atom_coords() @ rotate_euler(a, b, g, rad) * constants.BOHR2ANGS rotated_mol = gto.Mole() - rotated_mol.atom = list(zip(atom_types, rotated_coords.tolist(), strict=True)) + rotated_mol.atom = [*zip(mol.elements, rotated_coords, strict=True)] rotated_mol.charge = mol.charge rotated_mol.spin = mol.spin rotated_mol.basis = mol.basis + rotated_mol.ecp = mol.ecp rotated_mol.build() - return rotated_mol - def fragments_read(frag_file): - """Loads fragment definition from a frag file. + """Loads fragment definition from a file. Args: - frag_file (str): Name (including path) of the frag file to read. + frag_file (str): Path to the fragment file containing space-separated atom indices (1-based). Returns: list: List of numpy arrays containing the fragment indices. @@ -282,48 +253,38 @@ def fragments_read(frag_file): fragments = [np.fromstring(line, dtype=int, sep=' ')-1 for line in f] return fragments + def fragment_partitioning(fragments, prop_atom_inp, normalize=True): """Computes the contribution of each fragment. Args: fragments (list): Fragment definition as list of numpy arrays. - prop_atom_inp (list or numpy.ndarray): Coefficients densities, either as list of arrays or single array. + prop_atom_inp (numpy.ndarray or list of numpy.ndarray): Atomic contributions to property(ies). normalize (bool): Whether to normalize fragment partitioning. Defaults to True. Returns: list or numpy.ndarray: Contribution of each fragment. Returns list if input was list, array otherwise. """ - if type(prop_atom_inp) is list: - props_atom = prop_atom_inp - else: - props_atom = [prop_atom_inp] + props_atom = prop_atom_inp if type(prop_atom_inp) is list else [prop_atom_inp] props_frag = [] for prop_atom in props_atom: - prop_frag = np.zeros(len(fragments)) - for i, k in enumerate(fragments): - prop_frag[i] = prop_atom[k].sum() - prop_frag[i] = prop_atom[k].sum() + prop_frag = np.array([prop_atom[k].sum() for i, k in enumerate(fragments)]) + if normalize: + prop_frag *= 100.0 / prop_frag.sum() props_frag.append(prop_frag) - if normalize: - for i, prop_frag in enumerate(props_frag): - tot = prop_frag.sum() - props_frag[i] *= 100.0 / tot - - if type(prop_atom_inp) is list: - return props_frag - else: - return props_frag[0] + return props_frag if type(prop_atom_inp) is list else props_frag[0] -def make_atom(q, basis): +def make_atom(q, basis, ecp=None): """Create a single-atom molecule at the origin. Args: q (str): Element symbol. basis (str or dict): Basis set. + ecp (str): ECP to use. Defaults to None. Returns: pyscf.gto.Mole: Single-atom pyscf Mole object. @@ -333,9 +294,12 @@ def make_atom(q, basis): mol.charge = 0 mol.spin = data.elements.ELEMENTS_PROTON[q] % 2 mol.basis = basis + if ecp is not None: + mol.ecp = ecp mol.build() return mol + def singleatom_basis_enumerator(basis): """Enumerates the different tensors of atomic orbitals within a 1-atom basis set. diff --git a/tests/test_compound.py b/tests/test_compound.py index 1e3f7313..a5ec367b 100755 --- a/tests/test_compound.py +++ b/tests/test_compound.py @@ -14,6 +14,7 @@ def test_reader(): assert mol.elements == ['O', 'H', 'H'] assert np.linalg.norm(mol.atom_coords()-check_atom_coord) < 1e-8 + def test_makeauxmol(): path = os.path.dirname(os.path.realpath(__file__)) mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'def2svp', charge=0, spin=0) @@ -24,6 +25,7 @@ def test_makeauxmol(): assert type(auxmol.elements) is type([]) assert auxmol.basis == "cc-pvtz-jkfit" + def test_rotate_molecule(): path = os.path.dirname(os.path.realpath(__file__)) mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'def2svp', charge=0, spin=0) @@ -31,22 +33,29 @@ def test_rotate_molecule(): rotated = compound.rotate_molecule(mol, 90, 0, 0) assert np.linalg.norm(rotated_mol.atom_coords()-rotated.atom_coords()) < 1e-10 + def test_mol_to_xyz(): path = os.path.dirname(os.path.realpath(__file__)) - mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'def2svp', charge=0, spin=0) - compound.mol_to_xyz(mol, path+'/data/H2O_saved.xyz') + molpath = path+'/data/H2O_saved.xyz' + with open(molpath, 'r') as f: + xyz0 = f.read().strip() + mol = compound.xyz_to_mol(molpath, 'def2svp', charge=0, spin=0) + xyz = compound.mol_to_xyz(mol, '/dev/null') + assert np.all(xyz == xyz0) + def test_commentline(): path = os.path.dirname(os.path.realpath(__file__)) names = ["HO_json.xyz", "HO_keyvalcomma.xyz", "HO_keyvalspace.xyz", "HO_spinline.xyz"] for name in names: - print(name) mol = compound.xyz_to_mol(os.path.join(path,'data',name), 'def2svp', parse_comment=True) assert mol.spin == 0 assert mol.charge == -1 + if __name__ == '__main__': test_reader() test_makeauxmol() test_rotate_molecule() test_mol_to_xyz() + test_commentline() From b3889d18930880fa3c4ab8c92870f8a72cc5d7be Mon Sep 17 00:00:00 2001 From: Ksenia Date: Sun, 2 Nov 2025 19:12:21 +0100 Subject: [PATCH 07/23] Refactor dm.py --- qstack/fields/dm.py | 64 ++++++++++++--------------- qstack/spahm/rho/compute_rho_spahm.py | 35 +++++++-------- 2 files changed, 46 insertions(+), 53 deletions(-) diff --git a/qstack/fields/dm.py b/qstack/fields/dm.py index 4d59494a..f91756c4 100644 --- a/qstack/fields/dm.py +++ b/qstack/fields/dm.py @@ -2,16 +2,20 @@ from qstack import constants import numpy as np -def get_converged_dm(mol, xc, verbose=False): - """Performs restricted SCF and returns density matrix, given pyscf mol object and an XC density functional. + +def get_converged_mf(mol, xc, dm0=None, verbose=False): + """Performs SCF calculation. Args: mol (pyscf Mole): pyscf Mole object. xc (str): Exchange-correlation functional. - verbose (bool): If print more info + dm0 (numpy ndarray, optional): Initial guess for density matrix. Defaults to None. + verbose (bool): If print more information. Returns: - A numpy ndarray containing the density matrix in AO-basis. + tuple: A tuple containing: + - mf (pyscf.dft.rks.RKS or pyscf.dft.uks.UKS): Converged mean-field object. + - dm (numpy ndarray): Converged density matrix in AO-basis. """ if mol.multiplicity == 1: @@ -21,20 +25,34 @@ def get_converged_dm(mol, xc, verbose=False): mf.xc = xc if verbose: - print("Starting Kohn-Sham computation at "+str(mf.xc)+"/"+str(mol.basis)+" level.") + print(f"Starting Kohn-Sham computation at {mf.xc}/{mol.basis} level.") mf.verbose = 1 - mf.kernel() + mf.kernel(dm0=dm0) if verbose: - print("Convergence: ",mf.converged) - print("Energy: ",mf.e_tot) + print(f"Convergence: {mf.converged}") + print(f"Energy: {mf.e_tot}") - # Make the one-particle density matrix in ao-basis dm = mf.make_rdm1() + return (mf, dm) + + +def get_converged_dm(mol, xc, verbose=False): + """Wrapper around get_converged_mf to get the DM. + + Args: + mol (pyscf Mole): pyscf Mole object. + xc (str): Exchange-correlation functional. + verbose (bool): If print more information. + + Returns: + A numpy ndarray containing the density matrix in AO-basis. + """ + + return get_converged_mf(mol, xc, dm0=None, verbose=verbose)[1] - return dm -def make_grid_for_rho(mol, grid_level = 3): +def make_grid_for_rho(mol, grid_level=3): """Generates a grid of real space coordinates and weights for integration. Args: @@ -48,7 +66,6 @@ def make_grid_for_rho(mol, grid_level = 3): grid = dft.gen_grid.Grids(mol) grid.level = grid_level grid.build() - return grid def sphericalize_density_matrix(mol, dm): @@ -87,26 +104,3 @@ def sphericalize_density_matrix(mol, dm): return spherical_dm -def get_converged_mf(mol, func, dm0=None): - """Performs SCF calculation and returns both the mean-field object and density matrix. - - Args: - mol (pyscf Mole): pyscf Mole object. - func (str): Exchange-correlation functional. - dm0 (numpy ndarray, optional): Initial guess for density matrix. Defaults to None. - - Returns: - tuple: A tuple containing: - - mf (pyscf.dft.rks.RKS or pyscf.dft.uks.UKS): Converged mean-field object. - - dm (numpy ndarray): Converged density matrix in AO-basis. - """ - - if mol.multiplicity == 1: - mf = dft.RKS(mol) - else: - mf = dft.UKS(mol) - mf.xc = func - mf.kernel(dm0=dm0) - dm = mf.make_rdm1() - return (mf, dm) - diff --git a/qstack/spahm/rho/compute_rho_spahm.py b/qstack/spahm/rho/compute_rho_spahm.py index 3533983c..147f26b4 100644 --- a/qstack/spahm/rho/compute_rho_spahm.py +++ b/qstack/spahm/rho/compute_rho_spahm.py @@ -15,35 +15,32 @@ def spahm_a_b(rep_type, mols, dms, pairfile=None, dump_and_exit=False, same_basis=False, only_z=None): """Computes SPAHM(a) or SPAHM(b) representations for a set of molecules. - Core computation function that generates atom-centered or bond-centered - molecular representations using density fitting on guess Hamiltonians. - Args: rep_type (str): Representation type: 'atom' for SPAHM(a) or 'bond' for SPAHM(b). mols (list): List of pyscf Mole objects. dms (list): List of density matrices (2D or 3D numpy arrays) for each molecule. - bpath (str): Directory path containing bond-optimized basis files (.bas). Defaults to defaults.bpath. + bpath (str): Directory path containing bond-optimized basis files (.bas) for SPAHM(b). Defaults to defaults.bpath. cutoff (float): Bond cutoff distance in Angstrom for SPAHM(b). Defaults to defaults.cutoff. omods (list): Open-shell modes ('alpha', 'beta', 'sum', 'diff'). Defaults to defaults.omod. elements (list, optional): Element symbols present in dataset. Auto-detected if None. Defaults to None. - only_m0 (bool): Use only m=0 angular momentum components. Defaults to False. + only_m0 (bool): Use only m=0 angular momentum component for SPAHM(b). Defaults to False. zeros (bool): Pad with zeros for non-existent bond pairs in SPAHM(b). Defaults to False. printlevel (int): Verbosity level (0=silent, >0=verbose). Defaults to 0. auxbasis (str): Auxiliary basis set for SPAHM(a). Defaults to defaults.auxbasis. model (str): Atomic density fitting model for SPAHM(a). Defaults to defaults.model. - pairfile (str, optional): Path to atom pair file for SPAHM(b). Auto-detected if None. Defaults to None. - dump_and_exit (bool): Save pairfile and exit without computing. Defaults to False. - same_basis (bool): Use generic CC.bas for all atom pairs. Defaults to False. + pairfile (str, optional): Path to atom pair file for SPAHM(b). Atom pairs are computed from mols if None. Defaults to None. + dump_and_exit (bool): Save atom pair file for SPAHM(b) to pairfile and exit without computing. Defaults to False. + same_basis (bool): Use generic CC.bas for all atom pairs for SPAHM(b). Defaults to False. only_z (list, optional): Restrict to specific atom types. Defaults to None. Returns: numpy ndarray: 4D array (n_omods, n_mols, max_atoms, n_features) where: - - n_omods: Number of open-shell components (1 for closed-shell, 2 for UHF) + - n_omods: Number of open-shell components (1 for closed-shell, len(omods) for open-shell) - n_mols: Number of molecules in dataset - max_atoms: Maximum number of atoms/bonds across all molecules - n_features: Representation dimension """ - maxlen = 0 # This needs fixing `UnboundLocalError` + maxlen = 0 if only_z is None: only_z = [] if rep_type == 'bond': @@ -94,6 +91,7 @@ def spahm_a_b(rep_type, mols, dms, return allvec + def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=None, pairfile=None, dump_and_exit=False, same_basis=True, bpath=defaults.bpath, cutoff=defaults.cutoff, omods=defaults.omod, @@ -109,23 +107,23 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= rep_type (str): Representation type ('atom' or 'bond'). mols (list): List of pyscf Mole objects. xyzlist (list): List of XYZ file paths corresponding to mols. - guess (str): Guess Hamiltonian method name. + guess (str): Guess Hamiltonian name. xc (str): Exchange-correlation functional. Defaults to defaults.xc. spin (list, optional): List of spin multiplicities per molecule. Defaults to None. readdm (str, optional): Directory path to load pre-computed density matrices. Defaults to None. pairfile (str, optional): Path to atom pair file for SPAHM(b). Defaults to None. - dump_and_exit (bool): Save pairfile and exit without computing. Defaults to False. - same_basis (bool): Use generic CC.bas for all atom pairs. Defaults to True. - bpath (str): Directory with bond-optimized basis files. Defaults to defaults.bpath. - cutoff (float): Bond cutoff distance in Angstrom. Defaults to defaults.cutoff. + dump_and_exit (bool): Save atom pair file for SPAHM(b) to pairfile and exit without computing. Defaults to False. + same_basis (bool): Use generic CC.bas for all atom pairs for SPAHM(b). Defaults to False. + bpath (str): Directory path containing bond-optimized basis files (.bas) for SPAHM(b). Defaults to defaults.bpath. + cutoff (float): Bond cutoff distance in Angstrom for SPAHM(b). Defaults to defaults.cutoff. omods (list): Open-shell modes ('alpha', 'beta', 'sum', 'diff'). Defaults to defaults.omod. elements (list, optional): Element symbols in dataset. Auto-detected if None. Defaults to None. - only_m0 (bool): Use only m=0 basis functions. Defaults to False. - zeros (bool): Pad with zeros for non-existent bonds. Defaults to False. + only_m0 (bool): Use only m=0 angular momentum component for SPAHM(b). Defaults to False. + zeros (bool): Pad with zeros for non-existent bond pairs in SPAHM(b). Defaults to False. split (bool): Split output by molecule. Defaults to False. printlevel (int): Verbosity level. Defaults to 0. auxbasis (str): Auxiliary basis for SPAHM(a). Defaults to defaults.auxbasis. - model (str): Atomic density model. Defaults to defaults.model. + model (str): Atomic density fitting model for SPAHM(a). Defaults to defaults.model. with_symbols (bool): Include atomic symbols with representations. Defaults to False. only_z (list, optional): Restrict to specific atom types. Defaults to None. merge (bool): Merge alpha/beta into single vector. Defaults to True. @@ -216,6 +214,7 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= return allvec + def main(args=None): """Command-line interface for computing SPAHM representations (atom or bond centered). From 092baeac906dddad401d2e7f79614b5dbc167d5a Mon Sep 17 00:00:00 2001 From: Ksenia Date: Mon, 3 Nov 2025 18:13:54 +0100 Subject: [PATCH 08/23] Refactor moments --- examples/example_deco.py | 5 +- qstack/compound.py | 38 ++++++ qstack/equio.py | 22 +--- qstack/fields/decomposition.py | 56 +-------- qstack/fields/density2file.py | 4 +- qstack/fields/excited.py | 4 +- qstack/fields/moments.py | 189 +++++++++++++++++----------- qstack/mathutils/array.py | 60 +++++++++ qstack/mathutils/rotation_matrix.py | 6 +- qstack/reorder.py | 17 +++ qstack/spahm/rho/dmb_rep_bond.py | 2 +- qstack/spahm/rho/sym.py | 9 +- tests/test_fitting.py | 4 +- tests/test_moments.py | 43 +++++++ 14 files changed, 297 insertions(+), 162 deletions(-) create mode 100644 qstack/mathutils/array.py create mode 100644 tests/test_moments.py diff --git a/examples/example_deco.py b/examples/example_deco.py index 2f77d542..cbedb85e 100644 --- a/examples/example_deco.py +++ b/examples/example_deco.py @@ -1,6 +1,7 @@ import os import numpy as np from qstack import compound, fields +from qstack.fields import moments from qstack.fields.decomposition import decompose, correct_N, correct_N_atomic from qstack.fields.density2file import coeffs_to_cube, coeffs_to_molden @@ -10,7 +11,7 @@ auxmol, c = decompose(mol, dm, 'cc-pvqz jkfit') print("Expansion Coefficients:", c) -N = fields.decomposition.number_of_electrons_deco(auxmol, c) +N = moments.r2_c(auxmol, c, moments=[0])[0] print("Number of electrons after decomposition: ", N) @@ -21,7 +22,7 @@ print('density saved to H2O.molden') c = correct_N(auxmol, c) -N = fields.decomposition.number_of_electrons_deco(auxmol, c) +N = moments.r2_c(auxmol, c, moments=[0])[0] print(N) diff --git a/qstack/compound.py b/qstack/compound.py index f0446373..fa6249b6 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -4,6 +4,8 @@ import numpy as np from pyscf import gto, data from qstack import constants +from qstack.reorder import get_mrange +from qstack.mathutils.array import vstack_padding from qstack.mathutils.rotation_matrix import rotate_euler @@ -338,3 +340,39 @@ def singleatom_basis_enumerator(basis): cursor += msize*n_count return l_per_bas, n_per_bas, ao_starts + +def basis_flatten(mol): + """Flattens a basis set definition for AOs. + + Args: + mol (pyscf.gto.Mole): pyscf Mole object. + + Returns: + tuple: A tuple containing: + - numpy.ndarray: 3×mol.nao int array where each column corresponds to an AO and rows are: + - 0: atom index + - 1: angular momentum quantum number l + - 2: magnetic quantum number m + - numpy.ndarray: 2×mol.nao×max_n float array where index (i,j,k) means: + - i: 0 for exponent, 1 for contraction coefficient of a primitive Gaussian + - j: AO index + - k: radial function index (padded with zeros if necessary) + """ + + x = [] + y = np.zeros((3, mol.nao), dtype=int) + i = 0 + a = mol.bas_exps() + for iat in range(mol.natm): + for bas_id in mol.atom_shell_ids(iat): + l = mol.bas_angular(bas_id) + cs = mol.bas_ctr_coeff(bas_id) + msize = 2*l+1 + for c in cs.T: + ac = np.array([a[bas_id], c]) + x.extend([ac]*msize) + y[:2,i:i+msize] = np.array([[iat, l]]*msize).T + y[2,i:i+msize] = get_mrange(l) + i += msize + x = vstack_padding(x).transpose((1,0,2)) + return y, x diff --git a/qstack/equio.py b/qstack/equio.py index 19fd8715..9bccd1c7 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -4,6 +4,7 @@ from pyscf import data import metatensor import numbers +from qstack.reorder import get_mrange vector_label_names = SimpleNamespace( tm = ['spherical_harmonics_l', 'species_center'], @@ -24,21 +25,6 @@ _pyscf2gpr_l1_order = [1,2,0] -def _get_mrange(l): - """Get the m quantum number range for a given angular momentum l. - - For l=1, returns pyscf order: x,y,z which is (1,-1,0). - - Args: - l (int): Angular momentum quantum number. - - Returns: - tuple or range: Magnetic quantum numbers for the given l. - """ - if l==1: - return (1,-1,0) - else: - return range(-l,l+1) def _get_llist(q, mol): @@ -195,7 +181,7 @@ def tensormap_to_vector(mol, tensor): block = tensor.block(spherical_harmonics_l=l, species_center=q) id_samp = block.samples.position((iat,)) id_prop = block.properties.position((il[l],)) - for m in _get_mrange(l): + for m in get_mrange(l): id_comp = block.components[0].position((m,)) c[i] = block.values[id_samp,id_comp,id_prop] i += 1 @@ -359,7 +345,7 @@ def tensormap_to_matrix(mol, tensor): llist1 = _get_llist(q1, mol) il1 = dict.fromkeys(range(max(llist1) + 1), 0) for l1 in llist1: - for m1 in _get_mrange(l1): + for m1 in get_mrange(l1): i2 = 0 for iat2, q2 in enumerate(atom_charges): @@ -371,7 +357,7 @@ def tensormap_to_matrix(mol, tensor): id_samp = block.samples.position((iat1, iat2)) id_prop = block.properties.position((il1[l1], il2[l2])) - for m2 in _get_mrange(l2): + for m2 in get_mrange(l2): id_comp1 = block.components[0].position((m1,)) id_comp2 = block.components[1].position((m2,)) dm[i1, i2] = block.values[id_samp, id_comp1, id_comp2, id_prop] diff --git a/qstack/fields/decomposition.py b/qstack/fields/decomposition.py index 100481de..61a7ef2c 100644 --- a/qstack/fields/decomposition.py +++ b/qstack/fields/decomposition.py @@ -2,6 +2,7 @@ import scipy from pyscf import scf from qstack import compound +from . import moments def decompose(mol, dm, auxbasis): """Fit molecular density onto an atom-centered basis. @@ -144,7 +145,7 @@ def correct_N_atomic(mol, N, c0, metric='u'): numpy ndarray: Corrected decomposition coefficients (1D array). """ - Q = number_of_electrons_deco_vec(mol, per_atom=True) + Q = moments.r2_c(mol, None, moments=[0], per_atom=True)[0] N0 = c0 @ Q O1q = _get_inv_metric(mol, metric, Q) la = scipy.linalg.solve(Q.T @ O1q, N-N0) @@ -167,7 +168,7 @@ def correct_N(mol, c0, N=None, mode='Lagrange', metric='u'): """ mode = mode.lower() - q = number_of_electrons_deco_vec(mol) + q = moments.r2_c(mol, None, moments=[0]) N0 = c0 @ q if N is None: @@ -181,54 +182,3 @@ def correct_N(mol, c0, N=None, mode='Lagrange', metric='u'): la = (N - N0) / (q @ O1q) c = c0 + la * O1q return c - - -def number_of_electrons_deco_vec(mol, per_atom=False): - """Computes the electron number decomposition vector for basis functions. - - For s-functions (l=0), computes the integral of the basis function which - corresponds to its contribution to the electron count. - - Args: - mol (pyscf Mole): pyscf Mole object. - per_atom (bool): If True, returns a 2D array with per-atom contributions. - If False, returns a 1D array. Defaults to False. - - Returns: - numpy ndarray: If per_atom is False, 1D array of shape (nao,) with electron - contributions for each basis function. If per_atom is True, - 2D array of shape (nao, natm) with per-atom contributions. - """ - if per_atom: - Q = np.zeros((mol.nao,mol.natm)) - else: - Q = np.zeros(mol.nao) - i = 0 - for iat in range(mol.natm): - for bas_id in mol.atom_shell_ids(iat): - l = mol.bas_angular(bas_id) - n = mol.bas_nctr(bas_id) - if l==0: - w = mol.bas_ctr_coeff(bas_id) - a = mol.bas_exp(bas_id) - q = np.pow(2.0*np.pi/a, 0.75) @ w - if per_atom: - Q[i:i+n,iat] = q - else: - Q[i:i+n] = q - i += (2*l+1)*n - return Q - -def number_of_electrons_deco(auxmol, c): - """Computes the number of electrons of a molecule given a set of expansion coefficients and a Mole object. - - Args: - auxmol (pyscf Mole): pyscf mol object holding molecular structure, composition and the auxiliary basis set. - c (numpy ndarray): expansion coefficients of the density onto the auxiliary basis. - - Returns: - The number of electrons as an integer value. - """ - - q = number_of_electrons_deco_vec(auxmol) - return q @ c diff --git a/qstack/fields/density2file.py b/qstack/fields/density2file.py index e3376c84..b695f9a2 100644 --- a/qstack/fields/density2file.py +++ b/qstack/fields/density2file.py @@ -2,7 +2,7 @@ from pyscf.dft.numint import eval_ao from pyscf.tools.cubegen import Cube import pyscf.tools.molden -from .decomposition import number_of_electrons_deco +from . import moments def coeffs_to_cube(mol, coeffs, cubename, nx=80, ny=80, nz=80, resolution=0.1, margin=3.0): """Saves the electron density to a cube file. @@ -42,6 +42,6 @@ def coeffs_to_molden(mol, coeffs, moldenname): """ with open(moldenname, 'w') as f: - N = number_of_electrons_deco(mol, coeffs) + N = moments.r2_c(mol, coeffs, moments=[0])[0] pyscf.tools.molden.header(mol, f, True) pyscf.tools.molden.orbital_coeff(mol, f, np.array([coeffs]).T, ene=[0.0], occ=[N], ignore_h=True) diff --git a/qstack/fields/excited.py b/qstack/fields/excited.py index 98e99068..67924332 100644 --- a/qstack/fields/excited.py +++ b/qstack/fields/excited.py @@ -79,8 +79,8 @@ def exciton_properties_c(mol, hole, part): Three floats: the hole-particle distance, the hole size, and the particle size respectively. """ - _hole_N, hole_r, hole_r2 = moments.r2_c(hole, mol) - _part_N, part_r, part_r2 = moments.r2_c(part, mol) + _hole_N, hole_r, hole_r2 = moments.r2_c(mol, hole) + _part_N, part_r, part_r2 = moments.r2_c(mol, part) dist = np.linalg.norm(hole_r-part_r) hole_extent = np.sqrt(hole_r2-hole_r@hole_r) diff --git a/qstack/fields/moments.py b/qstack/fields/moments.py index c8d18d1e..77bb739c 100644 --- a/qstack/fields/moments.py +++ b/qstack/fields/moments.py @@ -1,72 +1,46 @@ import numpy as np +from qstack.compound import basis_flatten +from qstack.mathutils.array import safe_divide, scatter + def first(mol, rho): - """ Computes the transition dipole moments. + r"""Wrapper to compute the first moment of a molecular density needed for dipole moments. + + $$\int r \rho(r) dr$$ Args: mol (pyscf Mole): pyscf Mole object. - rho (numpy ndarray): Density Matrix (trnasition if given ) or fitting coefficnts for the same matrix. + rho (numpy ndarray): 2D (mol.nao×mol.nao) density matrix or 1D (mol.nao) fitting coefficients. Returns: - A numpy ndarray with the transition dipole moments. If rho is a 1D matrix, returns the Decomposed/predicted transition dipole moments; if rho is a 2D matrix, returns the ab initio transition dipole moments. + numpy ndarray: Electronic dipole moment vector (3 components). """ if rho.ndim==1: - return r_c(mol, rho) #coefficient + return r2_c(mol, rho, moments=(1,))[0] elif rho.ndim==2: - return r_dm(mol, rho) #matrix + return r_dm(mol, rho) else: - raise RuntimeError('Dimension mismatch') + raise RuntimeError(f'Dimension mismatch {rho.shape}') def r_dm(mol, dm): - """Computes the electric dipole moment from a density matrix. + """Computes the first moment of a density matrix. Args: mol (pyscf Mole): pyscf Mole object. dm (numpy ndarray): 2D density matrix in AO basis. Returns: - numpy ndarray: Electric dipole moment vector (3 components). + numpy ndarray: Electronic dipole moment vector (3 components). """ with mol.with_common_orig((0,0,0)): ao_dip = mol.intor_symmetric('int1e_r', comp=3) el_dip = np.einsum('xij,ji->x', ao_dip, dm) return el_dip -def r_c(mol, rho): - """Computes the electric dipole moment from fitting coefficients. - - Args: - mol (pyscf Mole): pyscf Mole object. - rho (numpy ndarray): 1D array of density-fitting coefficients. - Returns: - numpy ndarray: Electric dipole moment vector (3 components). - - Note: - Currently only supports contracted basis sets. - """ - r = np.zeros(3) - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - coord = mol.atom_coords()[iat] - for gto in mol._basis[q]: - l, [a, c] = gto - if(l==0): - I0 = c * (2.0*np.pi/a)**0.75 - r += I0 * rho[i] * coord - i+=1 - elif(l==1): - I1 = c * (2.0*np.pi)**0.75 / (a**1.25) - r += I1 * rho[i:i+3] - i+=3 - else: - i+=2*l+1 - return r - -def r2_c(rho, mol): - """Compute the zeroth ( :math:`<1>` ), first ( :math:`` ), and second ( :math:``) moments of electron density distribution. +def r2_c(mol, rho, moments=(0,1,2), per_atom=False): + """Compute the zeroth ( :math:`<1>` ), first ( :math:`` ), and second ( :math:``) moments of a fitted density. .. math:: @@ -81,42 +55,111 @@ def r2_c(rho, mol): = \\int \\hat{r}^{2} \\rho d r Args: - rho (numpy ndarray): 1D array of density-fitting coefficients. mol (pyscf Mole): pyscf Mole object. + rho (numpy ndarray): 1D array of density-fitting coefficients. Can be None to compute AO integrals instead. + moments (tuple): Moments to compute (0, 1, and/or 2). Returns: - tuple: Three values (N, r, r2) representing: - - N (float): Zeroth moment (integrated density). - - r (numpy ndarray): First moment (3-component dipole vector). - - r2 (float): Second moment (mean square radius). + tuple: If rho!=None, values representing the requested moments, possibly containing: + - float: Zeroth moment (integrated density). + - numpy ndarray: First moment (3-component dipole vector). + - float: Second moment (mean square radius). + If rho is None, arrays representing the requested moments in AO basis so that + they can be contracted with the coefficients usin (returned array)@(rho). + + if rho is None and per_atom is True: + 0st moment: (mol.nao, mol.natm) + 1st moment: (3, mol.nao, mol.natm) + 2nd moment: (mol.nao, mol.natm) + + if rho is None and per_atom is False: + 0st moment: (mol.nao,) + 1st moment: (3, mol.nao) + 2nd moment: (mol.nao,) + + if rho is not None and per_atom is True: + 0st moment: (mol.natm,) + 1st moment: (3, mol.natm) + 2nd moment: (mol.natm,) + - Note: - Currently only supports contracted basis sets. """ - N = 0.0 # <1> zeroth - r = np.zeros(3) # first - r2 = 0.0 # second moments electron density distribution - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - coord = mol.atom_coords()[iat] - for gto in mol._basis[q]: - l, [a, c] = gto - if(l==0): - I0 = c * (2.0*np.pi/a)**0.75 - I2 = c * 3.0 * (np.pi**0.75) / (a**1.75 * 2.0**0.25) - N += I0 * rho[i] - r += I0 * rho[i] * coord - r2 += I0 * rho[i] * (coord@coord) - r2 += I2 * rho[i] - i+=1 - elif(l==1): - I1 = c * (2.0*np.pi)**0.75 / (a**1.25) - temp = I1 * rho[i:i+3] - r += temp - r2 += 2.0*(temp@coord) - i+=3 + if max(moments)>2: + raise RuntimeError('Only moments 0, 1, and 2 are supported.') + ret = {} + + (iat, l, _), (a, c) = basis_flatten(mol) + coords = mol.atom_coords()[iat] + + idx_l0 = np.where(l==0)[0] + ta = safe_divide(2.0*np.pi, a[idx_l0])**0.75 + I0 = (c[idx_l0] * ta).sum(axis=1) + if rho is None: + if 0 in moments: + moments_ao = np.zeros(mol.nao) + moments_ao[idx_l0] = I0 + if per_atom: + ret[0] = scatter(moments_ao, iat) + else: + ret[0] = moments_ao + + else: + t0 = rho[idx_l0] * I0 + if 0 in moments: + if per_atom: + ret[0] = np.zeros(mol.natm) + np.add.at(ret[0], iat[idx_l0], t0) else: - i+=2*l+1 - return N, r, r2 + ret[0] = t0.sum() + + if 1 in moments or 2 in moments: + idx_l1 = np.where(l==1)[0] + I1 = (c[idx_l1] * safe_divide((2.0*np.pi)**0.75, a[idx_l1]**1.25)).sum(axis=1) + mask = np.tile([[1,0,0,0,1,0,0,0,1]], len(I1)//3).reshape(-1,3).T + I1 = I1*mask + if rho is not None: + t1 = (rho[idx_l1]*I1).T + + if 1 in moments: + if rho is None: + moments_ao = np.zeros((3, mol.nao)) + moments_ao[:,idx_l0] = I0 * coords[idx_l0].T + moments_ao[:,idx_l1] = I1 + moments_ao = moments_ao + if per_atom: + ret[1] = scatter(moments_ao, iat) + else: + ret[1] = moments_ao + else: + if per_atom: + ret[1] = np.zeros((3, mol.natm)) + np.add.at(ret[1], iat[idx_l0], coords[idx_l0] * t0[:,None]) + np.add.at(ret[1], iat[idx_l1], t1) + else: + ret[1] = (t0 * coords[idx_l0].T).sum(axis=1) \ + + t1.sum(axis=0) + + if 2 in moments: + I2 = (c[idx_l0] * ta * safe_divide(1.5, a[idx_l0])).sum(axis=1) + if rho is None: + moments_ao = np.zeros(mol.nao) + moments_ao[idx_l0] = I2 + (I0 * (coords[idx_l0]**2).sum(axis=1)) + moments_ao[idx_l1] = 2.0 * (I1.T * coords[idx_l1]).sum(axis=1) + if per_atom: + ret[2] = scatter(moments_ao, iat) + else: + ret[2] = moments_ao + + else: + if per_atom: + ret[2] = np.zeros(mol.natm) + np.add.at(ret[2], iat[idx_l0], t0 * (coords[idx_l0]**2).sum(axis=1)) + np.add.at(ret[2], iat[idx_l0], rho[idx_l0] * I2) + np.add.at(ret[2], iat[idx_l1], 2.0 * (t1 * coords[idx_l1]).sum(axis=1)) + else: + ret[2] = t0 @ (coords[idx_l0]**2).sum(axis=1) \ + + rho[idx_l0] @ I2 \ + + 2.0 * (t1 * coords[idx_l1]).sum() + + return tuple(ret[i] for i in moments) diff --git a/qstack/mathutils/array.py b/qstack/mathutils/array.py new file mode 100644 index 00000000..b593e261 --- /dev/null +++ b/qstack/mathutils/array.py @@ -0,0 +1,60 @@ +import numpy as np + + +def scatter(values, indices): + """Scatter values into a new array based on provided indices. + + Does the same as + ``` + for i, j in enumerate(indices): + x[...,i,j] = values[...,i] + ``` + + Args: + values (numpy.ndarray): Array of values to be scattered of shape (..., N). + indices (numpy.ndarray): Array of indices indicating where to scatter the values of shape (N,). + + Returns: + numpy.ndarray: New array with scattered values of shape (..., N, max(indices)+1). + + + """ + x = np.zeros((*values.shape, max(indices)+1)) + x[...,np.arange(len(indices)),indices] = values + return x + + +def safe_divide(a, b): + """Wrapper for numpy divide that avoids division by zero. + + Args: + a (numpy.ndarray): Numerator array. + b (numpy.ndarray): Denominator array. + + Returns: + numpy.ndarray: Result of element-wise division of a by b, with zeros where b is zero. + """ + + return np.divide(a, b, out=np.zeros_like(b), where=b!=0) + + +def vstack_padding(xs): + """Vertically stack arrays with different shapes by padding smaller arrays with zeros. + + Args: + xs (list): List of numpy arrays to be stacked. + + Returns: + numpy.ndarray : A stacked array with shape (len(xs), *max_shape). + + Raises: + ValueError: If input arrays have different number of dimensions. + """ + if len({x.ndim for x in xs}) > 1: + raise ValueError("All input arrays must have the same number of dimensions.") + max_size = max(x.shape for x in xs) + X = np.zeros((len(xs), *max_size)) + for i, x in enumerate(xs): + slices = tuple(np.s_[0:s] for s in x.shape) + X[i][slices] = x + return X diff --git a/qstack/mathutils/rotation_matrix.py b/qstack/mathutils/rotation_matrix.py index 49176167..54b504b0 100644 --- a/qstack/mathutils/rotation_matrix.py +++ b/qstack/mathutils/rotation_matrix.py @@ -14,7 +14,7 @@ def _Rz(a): return np.array([ [ca, -sa, 0], [sa, ca, 0], - [0, 0, 1] + [0, 0, 1], ]) @@ -31,7 +31,7 @@ def _Ry(b): return np.array([ [ cb, 0, sb], [ 0, 1, 0 ], - [-sb, 0, cb] + [-sb, 0, cb], ]) @@ -48,7 +48,7 @@ def _Rx(g): return np.array([ [1, 0, 0 ], [0, cg, -sg], - [0, sg, cg] + [0, sg, cg], ]) diff --git a/qstack/reorder.py b/qstack/reorder.py index c052166e..151313ff 100644 --- a/qstack/reorder.py +++ b/qstack/reorder.py @@ -1,6 +1,23 @@ import numpy as np +def get_mrange(l): + """Get the m quantum number range for a given angular momentum l. + + For l=1, returns pyscf order: x,y,z which is (1,-1,0). + + Args: + l (int): Angular momentum quantum number. + + Returns: + tuple or range: Magnetic quantum numbers for the given l. + """ + if l==1: + return (1,-1,0) + else: + return range(-l,l+1) + + def _orca2gpr_idx(mol): """Given a molecule returns a list of reordered indices to tranform orca AO ordering into SA-GPR. diff --git a/qstack/spahm/rho/dmb_rep_bond.py b/qstack/spahm/rho/dmb_rep_bond.py index 1ec74104..9cce9be4 100644 --- a/qstack/spahm/rho/dmb_rep_bond.py +++ b/qstack/spahm/rho/dmb_rep_bond.py @@ -223,7 +223,7 @@ def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): Returns: tuple: ([v0, v1], bname) where: - v0: Representation from atom i0's perspective - - v1: Representation from atom i1's perspective + - v1: Representation from atom i1's perspective - bname: Bond name (e.g., 'CH') Returns (None, None) if distance exceeds cutoff. """ diff --git a/qstack/spahm/rho/sym.py b/qstack/spahm/rho/sym.py index 7c570483..12ce739c 100644 --- a/qstack/spahm/rho/sym.py +++ b/qstack/spahm/rho/sym.py @@ -1,6 +1,7 @@ import numpy as np from qstack import compound from qstack.mathutils.matrix import sqrtm +from qstack.reorder import get_mrange def idxl0(i, l, ao): @@ -44,12 +45,8 @@ def get_S(q, basis): ao = {'l': [], 'm': []} for l in l_per_bas: - msize = 2*l+1 - ao['l'].extend([l]*msize) - if l != 1: - ao['m'].extend(np.arange(msize)-l) - else: - ao['m'].extend([1, -1, 0]) # x, y, z + ao['l'].extend([l]*(2*l+1)) + ao['m'].extend(get_mrange(l)) return S, ao, ao_start diff --git a/tests/test_fitting.py b/tests/test_fitting.py index 10443eff..c908d60c 100755 --- a/tests/test_fitting.py +++ b/tests/test_fitting.py @@ -3,7 +3,7 @@ import os import numpy as np from qstack import compound -from qstack.fields import decomposition +from qstack.fields import decomposition, moments def test_fitting(): @@ -49,7 +49,7 @@ def test_fitting_noe(): path = os.path.dirname(os.path.realpath(__file__)) auxmol = compound.xyz_to_mol(path+'/data/H2O_dist.xyz', 'cc-pvdz jkfit', charge=0, spin=0) c = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') - N = decomposition.number_of_electrons_deco(auxmol, c) + N = moments.r2_c(auxmol, c, moments=[0])[0] N0 = 10.000199558313856 assert np.allclose(N,N0) diff --git a/tests/test_moments.py b/tests/test_moments.py new file mode 100644 index 00000000..e72f3db1 --- /dev/null +++ b/tests/test_moments.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +import os +import numpy as np +from qstack import compound +from qstack.fields import decomposition, moments + + +def test_moments(): + path = os.path.dirname(os.path.realpath(__file__)) + mol = compound.xyz_to_mol(path+'/data/H2O_dist.xyz', 'cc-pvdz', charge=0, spin=0) + dm = np.load(path+'/data/H2O_dist.ccpvdz.dm.npy') + c = decomposition.decompose(mol, dm, 'cc-pvdz')[1] + + R0 = 9.930396060748974 + R0_atom = [5.6426496, 1.88412837, 2.4036181 ] + R1 = [ 1.53224245e-01, 1.70535989e-01, -8.51874261e-16] + R2 = 12.352661975356678 + + r0, r1, r2 = moments.r2_c(mol, c) + assert(np.allclose(r0, R0)) + assert(np.allclose(r1, R1)) + assert(np.allclose(r2, R2)) + + I0, I1, I2 = moments.r2_c(mol, None) + assert(np.allclose(r0, I0@c)) + assert(np.allclose(r1, I1@c)) + assert(np.allclose(r2, I2@c)) + + I0, I1, I2 = moments.r2_c(mol, None, per_atom=True) + r0_atom = c @ I0 + assert(np.allclose(r0_atom, R0_atom)) + r1_atom = np.einsum('p,xpa->ax', c, I1) # (atom, component) + assert(np.allclose(r1_atom.sum(axis=0), R1)) + + r0_atom, r1_atom, r2_atom = moments.r2_c(mol, c, per_atom=True) + assert(np.allclose(r0_atom, R0_atom)) + assert(np.allclose(r1_atom.sum(axis=0), R1)) + assert(np.allclose(r2_atom.sum(), R2)) + + +if __name__ == '__main__': + test_moments() From bc530788394f61dd678823e37381dc98275b5bcd Mon Sep 17 00:00:00 2001 From: Ksenia Date: Mon, 3 Nov 2025 23:50:30 +0100 Subject: [PATCH 09/23] Refactor other basis loops --- qstack/basis_opt/opt.py | 20 ++++---- qstack/compound.py | 37 ++++++++------ qstack/equio.py | 39 ++++++--------- qstack/reorder.py | 106 +++++++++++----------------------------- 4 files changed, 74 insertions(+), 128 deletions(-) diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index 2b9b46eb..2e097dc0 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -4,6 +4,7 @@ import scipy.optimize from pyscf import gto import pyscf.data +from ..compound import basis_flatten from . import basis_tools as qbbt @@ -159,16 +160,15 @@ def make_moldata(fname): self = np.einsum('p,p,p->', weights, rho, rho) mol = gto.M(atom=str(molecule), basis=basis) - idx = [] - centers = [] - for iat in range(mol.natm): - q = mol._atom[iat][0] - ib0 = bf_bounds[q][0] - for ib, b in enumerate(mol._basis[q]): - l = b[0] - idx += [ib+ib0] * (2*l+1) - centers += [iat] * (2*l+1) - idx = np.array(idx) + centers, l, _ = basis_flatten(mol, return_both=False) + idx = np.zeros_like(centers) + i = 0 + while i < mol.nao: + q = mol.atom_symbol(centers[i]) + for ib in range(*bf_bounds[q]): + msize = 2*l[i]+1 + idx[i:i+msize] = [ib] * msize + i += msize distances = np.zeros((mol.natm, len(rho))) for iat in range(mol.natm): diff --git a/qstack/compound.py b/qstack/compound.py index fa6249b6..d061680d 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -341,24 +341,24 @@ def singleatom_basis_enumerator(basis): return l_per_bas, n_per_bas, ao_starts -def basis_flatten(mol): +def basis_flatten(mol, return_both=True): """Flattens a basis set definition for AOs. Args: mol (pyscf.gto.Mole): pyscf Mole object. + return_both (bool): Whether to return both AO info and primitive Gaussian info. Defaults to True. Returns: - tuple: A tuple containing: - - numpy.ndarray: 3×mol.nao int array where each column corresponds to an AO and rows are: + - numpy.ndarray: 3×mol.nao int array where each column corresponds to an AO and rows are: - 0: atom index - 1: angular momentum quantum number l - 2: magnetic quantum number m - - numpy.ndarray: 2×mol.nao×max_n float array where index (i,j,k) means: - - i: 0 for exponent, 1 for contraction coefficient of a primitive Gaussian - - j: AO index - - k: radial function index (padded with zeros if necessary) + If return_both is True, also returns: + - numpy.ndarray: 2×mol.nao×max_n float array where index (i,j,k) means: + - i: 0 for exponent, 1 for contraction coefficient of a primitive Gaussian + - j: AO index + - k: radial function index (padded with zeros if necessary) """ - x = [] y = np.zeros((3, mol.nao), dtype=int) i = 0 @@ -366,13 +366,18 @@ def basis_flatten(mol): for iat in range(mol.natm): for bas_id in mol.atom_shell_ids(iat): l = mol.bas_angular(bas_id) + n = mol.bas_nctr(bas_id) cs = mol.bas_ctr_coeff(bas_id) msize = 2*l+1 - for c in cs.T: - ac = np.array([a[bas_id], c]) - x.extend([ac]*msize) - y[:2,i:i+msize] = np.array([[iat, l]]*msize).T - y[2,i:i+msize] = get_mrange(l) - i += msize - x = vstack_padding(x).transpose((1,0,2)) - return y, x + if return_both: + for c in cs.T: + ac = np.array([a[bas_id], c]) + x.extend([ac]*msize) + y[:2,i:i+msize*n] = np.array([[iat, l]]*msize*n).T + y[2,i:i+msize*n] = [*get_mrange(l)]*n + i += msize*n + if return_both: + x = vstack_padding(x).transpose((1,0,2)) + return y, x + else: + return y diff --git a/qstack/equio.py b/qstack/equio.py index 9bccd1c7..a45d7533 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -3,8 +3,8 @@ from types import SimpleNamespace from pyscf import data import metatensor -import numbers from qstack.reorder import get_mrange +from qstack.compound import singleatom_basis_enumerator vector_label_names = SimpleNamespace( tm = ['spherical_harmonics_l', 'species_center'], @@ -25,30 +25,16 @@ _pyscf2gpr_l1_order = [1,2,0] - - -def _get_llist(q, mol): - """Get list of angular momentum quantum numbers for basis functions of an element. +def _get_llist(mol): + """Get list of angular momentum quantum numbers for basis functions of each element of a molecule. Args: - q (int or str): Atomic number or element symbol. mol (pyscf.gto.Mole): pyscf Mole object. Returns: - list: List of angular momentum quantum numbers for each basis function. + dict: Dictionary with atom numbers as keys and List of angular momentum quantum numbers for each basis function as values. """ - - # TODO other basis formats? -# for bas_id in mol.atom_shell_ids(iat): -# l = mol.bas_angular(bas_id) -# nc = mol.bas_nctr(bas_id) -# for n in range(nc): - if isinstance(q, numbers.Integral): - q = data.elements.ELEMENTS[q] - llist = [] - for l, *prim in mol._basis[q]: - llist.extend([l]*(len(prim[0])-1)) - return llist + return {int(q): singleatom_basis_enumerator(mol._basis[data.elements.ELEMENTS[q]])[0] for q in np.unique(mol.atom_charges())} def _get_tsize(tensor): @@ -99,9 +85,10 @@ def vector_to_tensormap(mol, c): # Create labels for TensorMap, lables for blocks, and empty blocks + llists = _get_llist(mol) + for q in elements: - llist = _get_llist(q, mol) - llists[q] = llist + llist = llists[q] for l in sorted(set(llist)): label = (l, q) tm_label_vals.append(label) @@ -173,9 +160,10 @@ def tensormap_to_vector(mol, tensor): c = np.zeros(mol.nao) atom_charges = mol.atom_charges() + llists = _get_llist(mol) i = 0 for iat, q in enumerate(atom_charges): - llist = _get_llist(q, mol) + llist = llists[q] il = dict.fromkeys(range(max(llist) + 1), 0) for l in llist: block = tensor.block(spherical_harmonics_l=l, species_center=q) @@ -221,7 +209,7 @@ def pairs(list1, list2): block_comp_label_vals = {} blocks = {} - llists = {q: _get_llist(q, mol) for q in elements} + llists = _get_llist(mol) # Create labels for TensorMap, lables for blocks, and empty blocks @@ -340,16 +328,17 @@ def tensormap_to_matrix(mol, tensor): dm = np.zeros((mol.nao, mol.nao)) atom_charges = mol.atom_charges() + llists = _get_llist(mol) i1 = 0 for iat1, q1 in enumerate(atom_charges): - llist1 = _get_llist(q1, mol) + llist1 = llists[q1] il1 = dict.fromkeys(range(max(llist1) + 1), 0) for l1 in llist1: for m1 in get_mrange(l1): i2 = 0 for iat2, q2 in enumerate(atom_charges): - llist2 = _get_llist(q2, mol) + llist2 = llists[q2] il2 = dict.fromkeys(range(max(llist2) + 1), 0) for l2 in llist2: diff --git a/qstack/reorder.py b/qstack/reorder.py index 151313ff..095ef05d 100644 --- a/qstack/reorder.py +++ b/qstack/reorder.py @@ -18,7 +18,7 @@ def get_mrange(l): return range(-l,l+1) -def _orca2gpr_idx(mol): +def _orca2gpr_idx(l, m): """Given a molecule returns a list of reordered indices to tranform orca AO ordering into SA-GPR. Args: @@ -27,54 +27,20 @@ def _orca2gpr_idx(mol): Returns: numpy.ndarray: Re-arranged indices array. """ - #def _M1(n): - # return (n+1)//2 if n%2 else -((n+1)//2) - idx = np.arange(mol.nao, dtype=int) + idx = np.arange(len(l)) i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - for gto in mol._basis[q]: - l = gto[0] - msize = 2*l+1 - nf = max([len(prim)-1 for prim in gto[1:]]) - for _n in range(nf): - #for m in range(-l, l+1): - # m1 = _M1(m+l) - # idx[(i+(m1-m))] = i - # i+=1 - I = np.s_[i:i+msize] - idx[I] = np.concatenate((idx[I][::-2], idx[I][1::2])) - i += msize - return idx - - -def _orca2gpr_sign(mol): - """Given a molecule returns a list of multipliers needed to tranform from orca AO. - - Args: - mol (pyscf.gto.Mole): pyscf Mole object. - - Returns: - numpy.ndarray: Array of +1/-1 multipliers. - """ - signs = np.ones(mol.nao, dtype=int) - i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - for gto in mol._basis[q]: - l = gto[0] - msize = 2*l+1 - nf = max([len(prim)-1 for prim in gto[1:]]) - if l<3: - i += msize*nf - else: - for _n in range(nf): - signs[i+5:i+msize] = -1 # |m| >= 3 - i+= msize - return signs - - -def _pyscf2gpr_idx(mol): + while(i < len(idx)): + msize = 2*l[i]+1 + j = np.s_[i:i+msize] + idx[j] = np.concatenate((idx[j][::-2], idx[j][1::2])) + i += msize + signs = np.ones_like(idx) + signs[np.where(np.abs(m)>=3)] = -1 # in pyscf order + signs[idx] = signs # in orca order + return idx, signs + + +def _pyscf2gpr_idx(l): """Given a molecule returns a list of reordered indices to tranform pyscf AO ordering into SA-GPR. Args: @@ -84,21 +50,14 @@ def _pyscf2gpr_idx(mol): numpy.ndarray: Re-arranged indices array. """ - idx = np.arange(mol.nao, dtype=int) + idx = np.arange(len(l)) i=0 - for iat in range(mol.natm): - q = mol._atom[iat][0] - for gto in mol._basis[q]: - l = gto[0] - msize = 2*l+1 - nf = max([len(prim)-1 for prim in gto[1:]]) - if l==1: - for _n in range(nf): - idx[i:i+3] = [i+1,i+2,i] - i += 3 - else: - i += msize * nf - return idx + while(i < len(idx)): + msize = 2*l[i]+1 + if l[i]==1: + idx[i:i+3] = [i+1,i+2,i] + i += msize + return idx, np.ones_like(idx) def reorder_ao(mol, vector, src='pyscf', dest='gpr'): @@ -121,29 +80,22 @@ def reorder_ao(mol, vector, src='pyscf', dest='gpr'): ValueError: If vector dimension is not 1 or 2. """ - def get_idx(mol, convention): + def get_idx(l, m, convention): convention = convention.lower() if convention == 'gpr': - return np.arange(mol.nao) + return np.arange(len(l)), np.ones_like(l) elif convention == 'pyscf': - return _pyscf2gpr_idx(mol) + return _pyscf2gpr_idx(l) elif convention == 'orca': - return _orca2gpr_idx(mol) + return _orca2gpr_idx(l, m) else: errstr = f'Conversion to/from the {convention} convention is not implemented' raise NotImplementedError(errstr) - def get_sign(mol, convention): - convention = convention.lower() - if convention in ['gpr', 'pyscf']: - return np.ones(mol.nao, dtype=int) - elif convention == 'orca': - return _orca2gpr_sign(mol) - - idx_src = get_idx(mol, src) - idx_dest = get_idx(mol, dest) - sign_src = get_sign(mol, src) - sign_dest = get_sign(mol, dest) + from .compound import basis_flatten + _, l, m = basis_flatten(mol, return_both=False) + idx_src, sign_src = get_idx(l, m, src) + idx_dest, sign_dest = get_idx(l, m, dest) if vector.ndim == 2: sign_src = np.einsum('i,j->ij', sign_src, sign_src) From 19b3e117d77f69e85384670d30a511b78f9fd7ba Mon Sep 17 00:00:00 2001 From: Ksenia Date: Tue, 4 Nov 2025 16:08:45 +0100 Subject: [PATCH 10/23] More docs --- qstack/fields/hirshfeld.py | 30 ++++++++++++------------------ qstack/reorder.py | 26 ++++++++++++++++++++------ 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/qstack/fields/hirshfeld.py b/qstack/fields/hirshfeld.py index cde98ca0..23c28081 100644 --- a/qstack/fields/hirshfeld.py +++ b/qstack/fields/hirshfeld.py @@ -23,7 +23,8 @@ def spherical_atoms(elements, atm_bas): dm_atoms[q] = pyscf.scf.hf.init_guess_by_atom(mol_atm) return dm_atoms -def _hirshfeld_weights(mol_full, grid_coord, atm_dm, atm_bas, dominant): + +def _hirshfeld_weights(mol, grid_coord, atm_dm, atm_bas, dominant): """Computes Hirshfeld partitioning weights for each atom at grid points. Hirshfeld partitioning divides the molecular density among atoms based on @@ -31,7 +32,7 @@ def _hirshfeld_weights(mol_full, grid_coord, atm_dm, atm_bas, dominant): each grid point exclusively to the atom with the highest weight. Args: - mol_full (pyscf Mole): Complete molecular pyscf Mole object. + mol (pyscf Mole): molecular pyscf Mole object. grid_coord (numpy ndarray): 2D array (ngrids, 3) of grid point coordinates in Bohr. atm_dm (dict): Dictionary mapping element symbols to atomic density matrices from `spherical_atoms`. atm_bas (str or dict): Basis set name or dictionary used for atomic density matrices. @@ -43,10 +44,10 @@ def _hirshfeld_weights(mol_full, grid_coord, atm_dm, atm_bas, dominant): # promolecular density grid_n = len(grid_coord) - rho_atm = np.zeros((mol_full.natm, grid_n), dtype=float) - for i in range(mol_full.natm): - q = mol_full._atom[i][0] - mol_atm = pyscf.gto.M(atom=mol_full._atom[i:i+1], basis=atm_bas, spin=pyscf.data.elements.ELEMENTS_PROTON[q]%2, unit='Bohr') + rho_atm = np.zeros((mol.natm, grid_n), dtype=float) + for i in range(mol.natm): + q = mol._atom[i][0] + mol_atm = pyscf.gto.M(atom=mol._atom[i:i+1], basis=atm_bas, spin=pyscf.data.elements.ELEMENTS_PROTON[q]%2, unit='Bohr') ao_atm = pyscf.dft.numint.eval_ao(mol_atm, grid_coord) rho_atm[i] = pyscf.dft.numint.eval_rho(mol_atm, ao_atm, atm_dm[q]) @@ -54,14 +55,13 @@ def _hirshfeld_weights(mol_full, grid_coord, atm_dm, atm_bas, dominant): rho = rho_atm.sum(axis=0) idx = np.where(rho > 0)[0] h_weights = np.zeros_like(rho_atm) - for i in range(mol_full.natm): + for i in range(mol.natm): h_weights[i,idx] = rho_atm[i,idx] /rho[idx] if dominant: - # get dominant hirshfeld weights for point in range(grid_n): i = np.argmax(h_weights[:,point]) - h_weights[:,point] = np.zeros(mol_full.natm) + h_weights[:,point] = np.zeros(mol.natm) h_weights[i,point] = 1.0 return h_weights @@ -73,7 +73,7 @@ def hirshfeld_charges(mol, cd, dm_atoms=None, atm_bas=None, Partitions the molecular electron density among atoms using Hirshfeld weights based on free atom densities. Can work with either density-fitting coefficients - or full density matrices, and supports both standard and dominant partitioning. + or density matrices, and supports both standard and dominant partitioning. Args: mol (pyscf Mole): pyscf Mole object for the molecule. @@ -105,10 +105,7 @@ def atom_contributions(cd, ao, tot_weights): return np.einsum('x,ax->a', tmp, tot_weights) # check input - if type(cd) is list: - cd_list = cd - else: - cd_list = [cd] + cd_list = cd if type(cd) is list else [cd] # spherical atoms if atm_bas is None: @@ -129,7 +126,4 @@ def atom_contributions(cd, ao, tot_weights): if not occupations: charges_list = [mol.atom_charges()-charges for charges in charges_list] - if type(cd) is list: - return charges_list - else: - return charges_list[0] + return charges_list if type(cd) is list else charges_list[0] diff --git a/qstack/reorder.py b/qstack/reorder.py index 095ef05d..7290e41a 100644 --- a/qstack/reorder.py +++ b/qstack/reorder.py @@ -5,6 +5,7 @@ def get_mrange(l): """Get the m quantum number range for a given angular momentum l. For l=1, returns pyscf order: x,y,z which is (1,-1,0). + For other l, returns the standard range from -l to +l. Args: l (int): Angular momentum quantum number. @@ -19,13 +20,20 @@ def get_mrange(l): def _orca2gpr_idx(l, m): - """Given a molecule returns a list of reordered indices to tranform orca AO ordering into SA-GPR. + """Given a molecule returns a list of reordered indices to tranform Orca AO ordering into SA-GPR. + + In Orca, orbital ordering corresponds to: + m=0, +1, +2, ..., l, -1, -2, ..., -l + while in SA-GPR it is: + m=-l, -l+1, ..., -1, 0, +1, ..., l-1, l + Additionally, Orca uses a different sign convention for |m|>=3. Args: - mol (pyscf.gto.Mole): pyscf Mole object. + l (np.ndarray): Array of angular momentum quantum numbers. + m (np.ndarray): Array of magnetic quantum numbers. Returns: - numpy.ndarray: Re-arranged indices array. + tuple: Re-arranged indices array and sign array. """ idx = np.arange(len(l)) i=0 @@ -43,11 +51,17 @@ def _orca2gpr_idx(l, m): def _pyscf2gpr_idx(l): """Given a molecule returns a list of reordered indices to tranform pyscf AO ordering into SA-GPR. + In SA-GPR, orbital ordering corresponds to: + m=-l, -l+1, ..., -1, 0, +1, ..., l-1, l + In PySCF, it is the same except for p-orbitals which are ordered as: + m=+1, -1, 0 (i.e., x,y,z). + Signs are the same in both conventions, so they are returned for compatibility. + Args: - mol (pyscf.gto.Mole): pyscf Mole object. + l (np.ndarray): Array of angular momentum quantum numbers. Returns: - numpy.ndarray: Re-arranged indices array. + tuple: Re-arranged indices array and sign array. """ idx = np.arange(len(l)) @@ -68,7 +82,7 @@ def reorder_ao(mol, vector, src='pyscf', dest='gpr'): Args: mol (pyscf.gto.Mole): pyscf Mole object. - vector (numpy.ndarray): Vector or matrix to reorder. + vector (numpy.ndarray): Vector (nao,) or matrix (mol.nao,mol.nao) to reorder. src (str): Current convention. Defaults to 'pyscf'. dest (str): Convention to convert to (available: 'pyscf', 'gpr', 'orca'). Defaults to 'gpr'. From 98c916408600572095184d8dfcd9303c429c6636 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Wed, 5 Nov 2025 01:40:09 +0100 Subject: [PATCH 11/23] More docs --- qstack/mathutils/fps.py | 15 +- qstack/spahm/compute_spahm.py | 52 +-- qstack/spahm/rho/Dmatrix.py | 576 +++++++++++++------------- qstack/spahm/rho/compute_rho_spahm.py | 5 + 4 files changed, 318 insertions(+), 330 deletions(-) diff --git a/qstack/mathutils/fps.py b/qstack/mathutils/fps.py index b3cccaab..9655f5fa 100644 --- a/qstack/mathutils/fps.py +++ b/qstack/mathutils/fps.py @@ -4,9 +4,18 @@ def do_fps(x, d=0): """Perform Farthest Point Sampling on a set of points. - Dral P O, Owens A, Yurchenko S N and Thiel W 2017 J. Chem. Phys. 146 244108 doi:10.1063/1.4989536 - Imbalzano G, Anelli A, Giofré D, Klees S, Behler J and Ceriotti M 2018 J. Chem. Phys. 148 241730 doi:10.1063/1.5024611 - Rossi K, Jurásková V, Wischert R, Garel L, Corminboeuf C and Ceriotti M 2020 J. Chem. Theory Comput. 16 5139–49 doi:10.1021/acs.jctc.0c00362 + References: + P. O. Dral, A. Owens, S. N. Yurchenko, W. Thiel, + "Structure-based sampling and self-correcting machine learning for accurate calculations of potential energy surfaces and vibrational levels", + J. Chem. Phys. 146 244108 (2017), doi:10.1063/1.4989536. + + G. Imbalzano, A. Anelli, D. Giofré, S. Klees, J. Behler, M. Ceriotti + "Automatic selection of atomic fingerprints and reference configurations for machine-learning potentials", + J. Chem. Phys. 148 241730 (2018), doi:10.1063/1.5024611. + + K. Rossi, V. Jurásková, R. Wischert, L. Garel, C. Corminboeuf, M. Ceriotti + "Simulating solvation and acidity in complex mixtures with first-principles accuracy: the case of CH3SO3H and H2O2 in phenol", + J. Chem. Theory Comput. 16 5139–5149 (2020), doi:10.1021/acs.jctc.0c00362. Code from Giulio Imbalzano. diff --git a/qstack/spahm/compute_spahm.py b/qstack/spahm/compute_spahm.py index 2576e46a..469c6963 100644 --- a/qstack/spahm/compute_spahm.py +++ b/qstack/spahm/compute_spahm.py @@ -4,10 +4,7 @@ def get_guess_orbitals(mol, guess, xc="pbe", field=None, return_ao_dip=False): - """Computes molecular orbitals from a guess Hamiltonian with optional electric field. - - Solves the generalized eigenvalue problem for the guess Hamiltonian, optionally - including an external uniform electric field perturbation. + """Computes MO energies and vectors using an initial guess Hamiltonian. Args: mol (pyscf Mole): pyscf Mole object. @@ -20,9 +17,10 @@ def get_guess_orbitals(mol, guess, xc="pbe", field=None, return_ao_dip=False): Returns: tuple: Depending on return_ao_dip: - If False: (e, v) where: - - e (numpy ndarray): 1D array of orbital eigenvalues + - e (numpy ndarray): 1D array (nao,) of orbital eigenvalues - v (numpy ndarray): 2D array (nao, nao) of MO coefficients - If True: (e, v, ao_dip) where ao_dip is 3D array (3, nao, nao) of AO dipole integrals + if field is not None, else None Raises: NotImplementedError: If field is specified with Hückel guess. @@ -72,8 +70,8 @@ def ext_field_generator(mol, field): def field_deriv(iat): p0, p1 = aoslices[iat] dmu_dr = np.zeros_like(int1e_irp) # dim(mu)×dim(r)×nao×nao - dmu_dr[:,:,p0:p1,:] -= int1e_irp[:,:,:,p0:p1].transpose((0,1,3,2)) # TODO not sure why minus - dmu_dr[:,:,:,p0:p1] -= int1e_irp[:,:,:,p0:p1] # TODO check/fix E definition + dmu_dr[:,:,p0:p1,:] -= int1e_irp[:,:,:,p0:p1].transpose((0,1,3,2)) + dmu_dr[:,:,:,p0:p1] -= int1e_irp[:,:,:,p0:p1] dhext_dr = np.einsum('x,xypq->ypq', field, dmu_dr) return dhext_dr return field_deriv @@ -115,35 +113,13 @@ def get_guess_orbitals_grad(mol, guess, field=None): return e, eigenvalue_grad(mol, e, c, s1, h1), de_dfield -def get_guess_dm(mol, guess, xc="pbe", openshell=None, field=None): - """Computes density matrix from guess Hamiltonian orbitals. - - Constructs the density matrix by occupying the lowest energy orbitals from - the guess Hamiltonian according to the aufbau principle. - - Args: - mol (pyscf Mole): pyscf Mole object. - guess (callable): Guess Hamiltonian method function from get_guess(). - xc (str): Exchange-correlation functional name. Defaults to 'pbe'. - openshell (bool, optional): If True, forces open-shell treatment even for closed-shell systems. - Defaults to None. - field (numpy ndarray, optional): 3-component uniform electric field (∇φ) in atomic units. - Defaults to None. - - Returns: - numpy ndarray: Density matrix in AO basis. - - Closed-shell: 2D array (nao, nao) - - Open-shell: 3D array (2, nao, nao) for alpha and beta separately - """ - _e, v = get_guess_orbitals(mol, guess, xc, field=field) - return get_dm(v, mol.nelec, mol.spin if mol.spin>0 or openshell is not None else None) - - def get_spahm_representation(mol, guess_in, xc="pbe", field=None): - """Computes the SPAHM (Single-Point Approximate Hamiltonian Method) molecular representation. + """Computes the ε-SPAHM molecular representation. - The SPAHM representation consists of the occupied orbital energies from a guess - Hamiltonian, providing a fast quantum-mechanical descriptor for machine learning. + Reference: + A. Fabrizio, K. R. Briling, C. Corminboeuf, + "SPAHM: the spectrum of approximated Hamiltonian matrices representations", + Digital Discovery 1 286-294 (2022), doi:10.1039/d1dd00050k. Args: mol (pyscf Mole): pyscf Mole object. @@ -155,7 +131,7 @@ def get_spahm_representation(mol, guess_in, xc="pbe", field=None): Returns: numpy ndarray: SPAHM representation consisting of occupied orbital eigenvalues. - Closed-shell: 1D array of shape (n_occupied,) in Eh - - Open-shell: 2D array of shape (2, n_alpha/n_beta) for alpha and beta orbitals + - Open-shell: 2D array of shape (2, n_alpha) for alpha and beta orbitals (padded by zeros) """ guess = get_guess(guess_in) e, _v = get_guess_orbitals(mol, guess, xc, field=field) @@ -178,11 +154,11 @@ def get_spahm_representation_grad(mol, guess_in, field=None): Returns: tuple: (spahm, spahm_grad, spahm_field_grad) where: - spahm (numpy ndarray): SPAHM representation - occupied orbital energies in Eh. - Shape: (n_occ,) for closed-shell or (2, n_alpha/n_beta) for open-shell + Shape: (n_occ,) for closed-shell or (2, n_alpha) for open-shell - spahm_grad (numpy ndarray): Nuclear gradients of SPAHM in Eh/bohr. - Shape: (n_occ, natm, 3) or (2, n_alpha/n_beta, natm, 3) + Shape: (n_occ, natm, 3) or (2, n_alpha, natm, 3) - spahm_field_grad (numpy ndarray or None): Electric field gradients in Eh/a.u. - Shape: (n_occ, 3) or (2, n_alpha/n_beta, 3), or None if field is None + Shape: (n_occ, 3) or (2, n_alpha, 3), or None if field is None """ guess = get_guess_g(guess_in) e, agrad, fgrad = get_guess_orbitals_grad(mol, guess, field=field) diff --git a/qstack/spahm/rho/Dmatrix.py b/qstack/spahm/rho/Dmatrix.py index 52a96d35..0cae23bc 100644 --- a/qstack/spahm/rho/Dmatrix.py +++ b/qstack/spahm/rho/Dmatrix.py @@ -1,321 +1,319 @@ import numpy as np from numpy import sqrt + def c_split(mol, c): - """Splits coefficient vector by angular momentum quantum number for each atom. - - Organizes expansion coefficients into sublists grouped by angular momentum (l) - for each atomic basis function. Only supports uncontracted basis sets. - - Args: - mol (pyscf Mole): pyscf Mole object. - c (numpy ndarray): 1D array of expansion coefficients. - - Returns: - list: List of [l, coefficients] pairs where l is angular momentum and - coefficients is the subset of c for that angular momentum shell. - - Note: - Works for uncontracted basis sets only. - """ - cs = [] - i0 = 0 - for at in mol.aoslice_by_atom(): - for b in range(at[0], at[1]): - l = mol.bas_angular(b) - msize = 2*l+1 - cs.append([l, c[i0:i0+msize]]) - i0 += msize - return cs + """Splits coefficient vector by angular momentum quantum number for each atom. + + Organizes expansion coefficients into sublists grouped by angular momentum (l) + for each atomic basis function. + + Args: + mol (pyscf Mole): pyscf Mole object. + c (numpy ndarray): 1D array of expansion coefficients. + + Returns: + list: List of [l, coefficients] pairs where l is angular momentum and + coefficients is the subset of c for that angular momentum shell. + """ + cs = [] + i0 = 0 + for at in mol.aoslice_by_atom(): + for b in range(at[0], at[1]): + l = mol.bas_angular(b) + msize = 2*l+1 + for _n in range(mol.bas_nctr(b)): + cs.append([l, c[i0:i0+msize]]) + i0 += msize + return cs + def rotate_c(D, cs): - """Rotates coefficient vector using Wigner D-matrices. + """Rotates coefficient vector using real Wigner D-matrices. - Applies angular momentum rotation to each angular momentum block separately. + Applies angular momentum rotation to each angular momentum block separately. - Args: - D (list): List of Wigner D-matrices indexed by angular momentum l. - cs (list): List of [l, coefficients] pairs from c_split(). + Args: + D (list): List of Wigner D-matrices (numpy ndarray of shape (2l+1,(2l+1) indexed by angular momentum l. + cs (list): List of [l, coefficients] pairs from c_split(). + + Returns: + numpy ndarray: 1D array of rotated coefficients. + """ + return np.hstack([D[l] @ ci for l, ci in cs]) - Returns: - numpy ndarray: 1D array of rotated coefficients. - """ - c_new = [] - for l,ci in cs: - ci_new = D[l] @ ci - c_new.append(ci_new) - return np.hstack(c_new) def new_xy_axis(z): - """Constructs orthonormal coordinate system from a given z-axis. + """Constructs orthonormal coordinate system from a given z-axis. - Finds optimal x' and y' axes that form a right-handed orthonormal system - with the given z' direction. The algorithm chooses x' to have maximal - component along the original axis with minimal projection onto z'. + Finds optimal x' and y' axes that form a right-handed orthonormal system + with the given z' direction. The algorithm chooses x' to have maximal + component along the original axis with minimal projection onto z'. - Args: - z (numpy ndarray): 3D vector defining the new z-axis direction. + Args: + z (numpy ndarray): 3D vector defining the new z-axis direction. - Returns: - numpy ndarray: 3x3 rotation matrix with rows [x', y', z'] defining the - new orthonormal coordinate system. - """ - z = z/np.linalg.norm(z) # don't use /= so a copy of z is created - i = np.argmin(abs(z)) # find the axis with the minimal projection of the vector z - x = -z[i] * z - x[i] += 1.0 # create a vector orthogonal to z with dominant component i - x /= np.sqrt(1.0-z[i]*z[i]) # normalize - y = np.cross(z,x) - return np.array([x,y,z]) + Returns: + numpy ndarray: 3x3 rotation matrix with rows [x', y', z'] defining the + new orthonormal coordinate system. + """ + z = z/np.linalg.norm(z) # don't use /= so a copy of z is created + i = np.argmin(abs(z)) # find the axis with the minimal projection of the vector z + x = -z[i] * z + x[i] += 1.0 # create a vector orthogonal to z with dominant component i + x /= np.sqrt(1.0-z[i]*z[i]) # normalize + y = np.cross(z,x) + return np.array([x,y,z]) def Dmatrix(xyz, lmax, order='xyz'): - """Generates Wigner D-matrices for spatial rotation of spherical harmonics. - - Computes rotation matrices D^l for angular momenta l = 0 to lmax, where - D^l[m1, m2] transforms spherical harmonics under the specified rotation. - The rotation is defined by new axes x' = xyz[0], y' = xyz[1], z' = xyz[2]. - - Args: - xyz (numpy ndarray): 3x3 rotation matrix with rows defining new [x', y', z'] axes. - lmax (int): Maximum angular momentum (supports lmax <= 4). - order (str): Ordering convention for spherical harmonics. Defaults to 'xyz'. - - Returns: - list: List of numpy ndarrays D[l] where D[l] is the (2l+1) x (2l+1) Wigner - D-matrix for angular momentum l. Note: m1 index is rotated (D is transposed). - - Raises: - NotImplementedError: If lmax > 4. - - Note: - The matrices are computed using explicit algebraic expressions for each l. - """ - - xx = xyz[0,0]; xy = xyz[0,1]; xz = xyz[0,2] - yx = xyz[1,0]; yy = xyz[1,1]; yz = xyz[1,2] - zx = xyz[2,0]; zy = xyz[2,1]; zz = xyz[2,2] - - SQRT3 = sqrt(3.0) - - D = [np.zeros((2*l+1,2*l+1)) for l in range(lmax+1)] - - D[0][0,0] = 1.0 - - if lmax < 1: - return D - - l=1 - if order=='yzx': # -1 0 1 - D[1][l+ -1,l+ -1] = yy - D[1][l+ -1,l+ 0] = yz - D[1][l+ -1,l+ 1] = yx - D[1][l+ 0,l+ -1] = zy - D[1][l+ 0,l+ 0] = zz - D[1][l+ 0,l+ 1] = zx - D[1][l+ 1,l+ -1] = xy - D[1][l+ 1,l+ 0] = xz - D[1][l+ 1,l+ 1] = xx - elif order=='xyz': # 1 -1 0 - D[1][ 0, 0] = xx - D[1][ 0, 1] = xy - D[1][ 0, 2] = xz - D[1][ 1, 0] = yx - D[1][ 1, 1] = yy - D[1][ 1, 2] = yz - D[1][ 2, 0] = zx - D[1][ 2, 1] = zy - D[1][ 2, 2] = zz - - if lmax < 2: - return D - - l=2 - D[2][l+ -2,l+ -2] = xx*yy+xy*yx - D[2][l+ -2,l+ -1] = xy*yz+xz*yy - D[2][l+ -2,l+ 0] = xz*yz * SQRT3 - D[2][l+ -2,l+ 1] = xx*yz+xz*yx - D[2][l+ -2,l+ 2] = xx*yx-xy*yy - D[2][l+ -1,l+ -2] = yx*zy+yy*zx - D[2][l+ -1,l+ -1] = yy*zz+yz*zy - D[2][l+ -1,l+ 0] = yz*zz * SQRT3 - D[2][l+ -1,l+ 1] = yx*zz+yz*zx - D[2][l+ -1,l+ 2] = yx*zx-yy*zy - D[2][l+ 0,l+ -2] = zx*zy * SQRT3 - D[2][l+ 0,l+ -1] = zy*zz * SQRT3 - D[2][l+ 0,l+ 0] = 1.5*zz*zz - 0.5 - D[2][l+ 0,l+ 1] = zx*zz * SQRT3 - D[2][l+ 0,l+ 2] = (zx*zx-zy*zy) * 0.5 * SQRT3 - D[2][l+ 1,l+ -2] = xx*zy+xy*zx - D[2][l+ 1,l+ -1] = xy*zz+xz*zy - D[2][l+ 1,l+ 0] = xz*zz * SQRT3 - D[2][l+ 1,l+ 1] = xx*zz+xz*zx - D[2][l+ 1,l+ 2] = xx*zx-xy*zy - D[2][l+ 2,l+ -2] = xx*xy-yx*yy - D[2][l+ 2,l+ -1] = xy*xz-yy*yz - D[2][l+ 2,l+ 0] = (xz*xz-yz*yz) * 0.5 * SQRT3 - D[2][l+ 2,l+ 1] = xx*xz-yx*yz - D[2][l+ 2,l+ 2] = (xx*xx-xy*xy+yy*yy-yx*yx) * 0.5 - - if lmax < 3: - return D - - l=3 - D[3][l+ -3,l+ -3] = 3*xx**2*yy/4 + 3*xx*xy*yx/2 - 3*xy**2*yy/4 - 3*yx**2*yy/4 + yy**3/4 - D[3][l+ -3,l+ -2] = sqrt(6)*(xx*xy*yz + xx*xz*yy + xy*xz*yx - yx*yy*yz)/2 - D[3][l+ -3,l+ -1] = sqrt(15)*(-xx**2*yy - 2*xx*xy*yx - 3*xy**2*yy + 8*xy*xz*yz + 4*xz**2*yy + yx**2*yy + yy**3 - 4*yy*yz**2)/20 - D[3][l+ -3,l+ 0] = sqrt(10)*(-3*xx**2*yz - 6*xx*xz*yx - 3*xy**2*yz - 6*xy*xz*yy + 6*xz**2*yz + 3*yx**2*yz + 3*yy**2*yz - 2*yz**3)/20 - D[3][l+ -3,l+ 1] = sqrt(15)*(-3*xx**2*yx - 2*xx*xy*yy + 8*xx*xz*yz - xy**2*yx + 4*xz**2*yx + yx**3 + yx*yy**2 - 4*yx*yz**2)/20 - D[3][l+ -3,l+ 2] = sqrt(6)*(xx**2*yz + 2*xx*xz*yx - xy**2*yz - 2*xy*xz*yy - yx**2*yz + yy**2*yz)/4 - D[3][l+ -3,l+ 3] = 3*xx**2*yx/4 - 3*xx*xy*yy/2 - 3*xy**2*yx/4 - yx**3/4 + 3*yx*yy**2/4 - D[3][l+ -2,l+ -3] = sqrt(6)*(xx*yx*zy + xx*yy*zx + xy*yx*zx - xy*yy*zy)/2 - D[3][l+ -2,l+ -2] = xx*yy*zz + xx*yz*zy + xy*yx*zz + xy*yz*zx + xz*yx*zy + xz*yy*zx - D[3][l+ -2,l+ -1] = sqrt(10)*(-xx*yx*zy - xx*yy*zx - xy*yx*zx - 3*xy*yy*zy + 4*xy*yz*zz + 4*xz*yy*zz + 4*xz*yz*zy)/10 - D[3][l+ -2,l+ 0] = sqrt(15)*(-xx*yx*zz - xx*yz*zx - xy*yy*zz - xy*yz*zy - xz*yx*zx - xz*yy*zy + 2*xz*yz*zz)/5 - D[3][l+ -2,l+ 1] = sqrt(10)*(-3*xx*yx*zx - xx*yy*zy + 4*xx*yz*zz - xy*yx*zy - xy*yy*zx + 4*xz*yx*zz + 4*xz*yz*zx)/10 - D[3][l+ -2,l+ 2] = xx*yx*zz + xx*yz*zx - xy*yy*zz - xy*yz*zy + xz*yx*zx - xz*yy*zy - D[3][l+ -2,l+ 3] = sqrt(6)*(xx*yx*zx - xx*yy*zy - xy*yx*zy - xy*yy*zx)/2 - D[3][l+ -1,l+ -3] = sqrt(15)*(2*yx*zx*zy + yy*zx**2 - yy*zy**2)/4 - D[3][l+ -1,l+ -2] = sqrt(10)*(yx*zy*zz + yy*zx*zz + yz*zx*zy)/2 - D[3][l+ -1,l+ -1] = -yx*zx*zy/2 - yy*zx**2/4 - 3*yy*zy**2/4 + yy*zz**2 + 2*yz*zy*zz - D[3][l+ -1,l+ 0] = sqrt(6)*(-2*yx*zx*zz - 2*yy*zy*zz - yz*zx**2 - yz*zy**2 + 2*yz*zz**2)/4 - D[3][l+ -1,l+ 1] = -3*yx*zx**2/4 - yx*zy**2/4 + yx*zz**2 - yy*zx*zy/2 + 2*yz*zx*zz - D[3][l+ -1,l+ 2] = sqrt(10)*(2*yx*zx*zz - 2*yy*zy*zz + yz*zx**2 - yz*zy**2)/4 - D[3][l+ -1,l+ 3] = sqrt(15)*(yx*zx**2 - yx*zy**2 - 2*yy*zx*zy)/4 - D[3][l+ 0,l+ -3] = sqrt(10)*zy*(3*zx**2 - zy**2)/4 - D[3][l+ 0,l+ -2] = sqrt(15)*zx*zy*zz - D[3][l+ 0,l+ -1] = sqrt(6)*zy*(5*zz**2 - 1)/4 - D[3][l+ 0,l+ 0] = zz*(-3*zx**2 - 3*zy**2 + 2*zz**2)/2 - D[3][l+ 0,l+ 1] = sqrt(6)*zx*(5*zz**2 - 1)/4 - D[3][l+ 0,l+ 2] = sqrt(15)*zz*(zx - zy)*(zx + zy)/2 - D[3][l+ 0,l+ 3] = sqrt(10)*zx*(zx**2 - 3*zy**2)/4 - D[3][l+ 1,l+ -3] = sqrt(15)*(2*xx*zx*zy + xy*zx**2 - xy*zy**2)/4 - D[3][l+ 1,l+ -2] = sqrt(10)*(xx*zy*zz + xy*zx*zz + xz*zx*zy)/2 - D[3][l+ 1,l+ -1] = -xx*zx*zy/2 - xy*zx**2/4 - 3*xy*zy**2/4 + xy*zz**2 + 2*xz*zy*zz - D[3][l+ 1,l+ 0] = sqrt(6)*(-2*xx*zx*zz - 2*xy*zy*zz - xz*zx**2 - xz*zy**2 + 2*xz*zz**2)/4 - D[3][l+ 1,l+ 1] = -3*xx*zx**2/4 - xx*zy**2/4 + xx*zz**2 - xy*zx*zy/2 + 2*xz*zx*zz - D[3][l+ 1,l+ 2] = sqrt(10)*(2*xx*zx*zz - 2*xy*zy*zz + xz*zx**2 - xz*zy**2)/4 - D[3][l+ 1,l+ 3] = sqrt(15)*(xx*zx**2 - xx*zy**2 - 2*xy*zx*zy)/4 - D[3][l+ 2,l+ -3] = sqrt(6)*(xx**2*zy + 2*xx*xy*zx - xy**2*zy - yx**2*zy - 2*yx*yy*zx + yy**2*zy)/4 - D[3][l+ 2,l+ -2] = xx*xy*zz + xx*xz*zy + xy*xz*zx - yx*yy*zz - yx*yz*zy - yy*yz*zx - D[3][l+ 2,l+ -1] = sqrt(10)*(-xx**2*zy - 2*xx*xy*zx - 3*xy**2*zy + 8*xy*xz*zz + 4*xz**2*zy + yx**2*zy + 2*yx*yy*zx + 3*yy**2*zy - 8*yy*yz*zz - 4*yz**2*zy)/20 - D[3][l+ 2,l+ 0] = sqrt(15)*(-xx**2*zz - 2*xx*xz*zx - xy**2*zz - 2*xy*xz*zy + 2*xz**2*zz + yx**2*zz + 2*yx*yz*zx + yy**2*zz + 2*yy*yz*zy - 2*yz**2*zz)/10 - D[3][l+ 2,l+ 1] = sqrt(10)*(-3*xx**2*zx - 2*xx*xy*zy + 8*xx*xz*zz - xy**2*zx + 4*xz**2*zx + 3*yx**2*zx + 2*yx*yy*zy - 8*yx*yz*zz + yy**2*zx - 4*yz**2*zx)/20 - D[3][l+ 2,l+ 2] = xx**2*zz/2 + xx*xz*zx - xy**2*zz/2 - xy*xz*zy - yx**2*zz/2 - yx*yz*zx + yy**2*zz/2 + yy*yz*zy - D[3][l+ 2,l+ 3] = sqrt(6)*(xx**2*zx - 2*xx*xy*zy - xy**2*zx - yx**2*zx + 2*yx*yy*zy + yy**2*zx)/4 - D[3][l+ 3,l+ -3] = 3*xx**2*xy/4 - 3*xx*yx*yy/2 - xy**3/4 - 3*xy*yx**2/4 + 3*xy*yy**2/4 - D[3][l+ 3,l+ -2] = sqrt(6)*(xx*xy*xz - xx*yy*yz - xy*yx*yz - xz*yx*yy)/2 - D[3][l+ 3,l+ -1] = sqrt(15)*(-xx**2*xy + 2*xx*yx*yy - xy**3 + 4*xy*xz**2 + xy*yx**2 + 3*xy*yy**2 - 4*xy*yz**2 - 8*xz*yy*yz)/20 - D[3][l+ 3,l+ 0] = sqrt(10)*(-3*xx**2*xz + 6*xx*yx*yz - 3*xy**2*xz + 6*xy*yy*yz + 2*xz**3 + 3*xz*yx**2 + 3*xz*yy**2 - 6*xz*yz**2)/20 - D[3][l+ 3,l+ 1] = sqrt(15)*(-xx**3 - xx*xy**2 + 4*xx*xz**2 + 3*xx*yx**2 + xx*yy**2 - 4*xx*yz**2 + 2*xy*yx*yy - 8*xz*yx*yz)/20 - D[3][l+ 3,l+ 2] = sqrt(6)*(xx**2*xz - 2*xx*yx*yz - xy**2*xz + 2*xy*yy*yz - xz*yx**2 + xz*yy**2)/4 - D[3][l+ 3,l+ 3] = xx**3/4 - 3*xx*xy**2/4 - 3*xx*yx**2/4 + 3*xx*yy**2/4 + 3*xy*yx*yy/2 - - if lmax < 4: - return D - - l=4 - D[4][l+ -4,l+-4] = xx**3*yy/2 + 3*xx**2*xy*yx/2 - 3*xx*xy**2*yy/2 - 3*xx*yx**2*yy/2 + xx*yy**3/2 - xy**3*yx/2 - xy*yx**3/2 + 3*xy*yx*yy**2/2 - D[4][l+ -4,l+-3] = sqrt(2)*(3*xx**2*xy*yz + 3*xx**2*xz*yy + 6*xx*xy*xz*yx - 6*xx*yx*yy*yz - xy**3*yz - 3*xy**2*xz*yy - 3*xy*yx**2*yz + 3*xy*yy**2*yz - 3*xz*yx**2*yy + xz*yy**3)/4 - D[4][l+ -4,l+-2] = sqrt(7)*(-xx**3*yy - 3*xx**2*xy*yx - 3*xx*xy**2*yy + 12*xx*xy*xz*yz + 6*xx*xz**2*yy + 3*xx*yx**2*yy + xx*yy**3 - 6*xx*yy*yz**2 - xy**3*yx + 6*xy*xz**2*yx + xy*yx**3 + 3*xy*yx*yy**2 - 6*xy*yx*yz**2 - 12*xz*yx*yy*yz)/14 - D[4][l+ -4,l+-1] = sqrt(14)*(-3*xx**2*xy*yz - 3*xx**2*xz*yy - 6*xx*xy*xz*yx + 6*xx*yx*yy*yz - 3*xy**3*yz - 9*xy**2*xz*yy + 12*xy*xz**2*yz + 3*xy*yx**2*yz + 9*xy*yy**2*yz - 4*xy*yz**3 + 4*xz**3*yy + 3*xz*yx**2*yy + 3*xz*yy**3 - 12*xz*yy*yz**2)/28 - D[4][l+ -4,l+ 0] = sqrt(35)*(3*xx**3*yx + 3*xx**2*xy*yy - 12*xx**2*xz*yz + 3*xx*xy**2*yx - 12*xx*xz**2*yx - 3*xx*yx**3 - 3*xx*yx*yy**2 + 12*xx*yx*yz**2 + 3*xy**3*yy - 12*xy**2*xz*yz - 12*xy*xz**2*yy - 3*xy*yx**2*yy - 3*xy*yy**3 + 12*xy*yy*yz**2 + 8*xz**3*yz + 12*xz*yx**2*yz + 12*xz*yy**2*yz - 8*xz*yz**3)/70 - D[4][l+ -4,l+ 1] = sqrt(14)*(-3*xx**3*yz - 9*xx**2*xz*yx - 3*xx*xy**2*yz - 6*xx*xy*xz*yy + 12*xx*xz**2*yz + 9*xx*yx**2*yz + 3*xx*yy**2*yz - 4*xx*yz**3 - 3*xy**2*xz*yx + 6*xy*yx*yy*yz + 4*xz**3*yx + 3*xz*yx**3 + 3*xz*yx*yy**2 - 12*xz*yx*yz**2)/28 - D[4][l+ -4,l+ 2] = sqrt(7)*(-xx**3*yx + 3*xx**2*xz*yz + 3*xx*xz**2*yx + xx*yx**3 - 3*xx*yx*yz**2 + xy**3*yy - 3*xy**2*xz*yz - 3*xy*xz**2*yy - xy*yy**3 + 3*xy*yy*yz**2 - 3*xz*yx**2*yz + 3*xz*yy**2*yz)/7 - D[4][l+ -4,l+ 3] = sqrt(2)*(xx**3*yz + 3*xx**2*xz*yx - 3*xx*xy**2*yz - 6*xx*xy*xz*yy - 3*xx*yx**2*yz + 3*xx*yy**2*yz - 3*xy**2*xz*yx + 6*xy*yx*yy*yz - xz*yx**3 + 3*xz*yx*yy**2)/4 - D[4][l+ -4,l+ 4] = xx**3*yx/2 - 3*xx**2*xy*yy/2 - 3*xx*xy**2*yx/2 - xx*yx**3/2 + 3*xx*yx*yy**2/2 + xy**3*yy/2 + 3*xy*yx**2*yy/2 - xy*yy**3/2 - D[4][l+ -3,l+-4] = sqrt(2)*(3*xx**2*yx*zy + 3*xx**2*yy*zx + 6*xx*xy*yx*zx - 6*xx*xy*yy*zy - 3*xy**2*yx*zy - 3*xy**2*yy*zx - yx**3*zy - 3*yx**2*yy*zx + 3*yx*yy**2*zy + yy**3*zx)/4 - D[4][l+ -3,l+-3] = 3*xx**2*yy*zz/4 + 3*xx**2*yz*zy/4 + 3*xx*xy*yx*zz/2 + 3*xx*xy*yz*zx/2 + 3*xx*xz*yx*zy/2 + 3*xx*xz*yy*zx/2 - 3*xy**2*yy*zz/4 - 3*xy**2*yz*zy/4 + 3*xy*xz*yx*zx/2 - 3*xy*xz*yy*zy/2 - 3*yx**2*yy*zz/4 - 3*yx**2*yz*zy/4 - 3*yx*yy*yz*zx/2 + yy**3*zz/4 + 3*yy**2*yz*zy/4 - D[4][l+ -3,l+-2] = sqrt(14)*(-3*xx**2*yx*zy - 3*xx**2*yy*zx - 6*xx*xy*yx*zx - 6*xx*xy*yy*zy + 12*xx*xy*yz*zz + 12*xx*xz*yy*zz + 12*xx*xz*yz*zy - 3*xy**2*yx*zy - 3*xy**2*yy*zx + 12*xy*xz*yx*zz + 12*xy*xz*yz*zx + 6*xz**2*yx*zy + 6*xz**2*yy*zx + yx**3*zy + 3*yx**2*yy*zx + 3*yx*yy**2*zy - 12*yx*yy*yz*zz - 6*yx*yz**2*zy + yy**3*zx - 6*yy*yz**2*zx)/28 - D[4][l+ -3,l+-1] = sqrt(7)*(-3*xx**2*yy*zz - 3*xx**2*yz*zy - 6*xx*xy*yx*zz - 6*xx*xy*yz*zx - 6*xx*xz*yx*zy - 6*xx*xz*yy*zx - 9*xy**2*yy*zz - 9*xy**2*yz*zy - 6*xy*xz*yx*zx - 18*xy*xz*yy*zy + 24*xy*xz*yz*zz + 12*xz**2*yy*zz + 12*xz**2*yz*zy + 3*yx**2*yy*zz + 3*yx**2*yz*zy + 6*yx*yy*yz*zx + 3*yy**3*zz + 9*yy**2*yz*zy - 12*yy*yz**2*zz - 4*yz**3*zy)/28 - D[4][l+ -3,l+ 0] = sqrt(70)*(9*xx**2*yx*zx + 3*xx**2*yy*zy - 12*xx**2*yz*zz + 6*xx*xy*yx*zy + 6*xx*xy*yy*zx - 24*xx*xz*yx*zz - 24*xx*xz*yz*zx + 3*xy**2*yx*zx + 9*xy**2*yy*zy - 12*xy**2*yz*zz - 24*xy*xz*yy*zz - 24*xy*xz*yz*zy - 12*xz**2*yx*zx - 12*xz**2*yy*zy + 24*xz**2*yz*zz - 3*yx**3*zx - 3*yx**2*yy*zy + 12*yx**2*yz*zz - 3*yx*yy**2*zx + 12*yx*yz**2*zx - 3*yy**3*zy + 12*yy**2*yz*zz + 12*yy*yz**2*zy - 8*yz**3*zz)/140 - D[4][l+ -3,l+ 1] = sqrt(7)*(-9*xx**2*yx*zz - 9*xx**2*yz*zx - 6*xx*xy*yy*zz - 6*xx*xy*yz*zy - 18*xx*xz*yx*zx - 6*xx*xz*yy*zy + 24*xx*xz*yz*zz - 3*xy**2*yx*zz - 3*xy**2*yz*zx - 6*xy*xz*yx*zy - 6*xy*xz*yy*zx + 12*xz**2*yx*zz + 12*xz**2*yz*zx + 3*yx**3*zz + 9*yx**2*yz*zx + 3*yx*yy**2*zz + 6*yx*yy*yz*zy - 12*yx*yz**2*zz + 3*yy**2*yz*zx - 4*yz**3*zx)/28 - D[4][l+ -3,l+ 2] = sqrt(14)*(-3*xx**2*yx*zx + 3*xx**2*yz*zz + 6*xx*xz*yx*zz + 6*xx*xz*yz*zx + 3*xy**2*yy*zy - 3*xy**2*yz*zz - 6*xy*xz*yy*zz - 6*xy*xz*yz*zy + 3*xz**2*yx*zx - 3*xz**2*yy*zy + yx**3*zx - 3*yx**2*yz*zz - 3*yx*yz**2*zx - yy**3*zy + 3*yy**2*yz*zz + 3*yy*yz**2*zy)/14 - D[4][l+ -3,l+ 3] = 3*xx**2*yx*zz/4 + 3*xx**2*yz*zx/4 - 3*xx*xy*yy*zz/2 - 3*xx*xy*yz*zy/2 + 3*xx*xz*yx*zx/2 - 3*xx*xz*yy*zy/2 - 3*xy**2*yx*zz/4 - 3*xy**2*yz*zx/4 - 3*xy*xz*yx*zy/2 - 3*xy*xz*yy*zx/2 - yx**3*zz/4 - 3*yx**2*yz*zx/4 + 3*yx*yy**2*zz/4 + 3*yx*yy*yz*zy/2 + 3*yy**2*yz*zx/4 - D[4][l+ -3,l+ 4] = sqrt(2)*(3*xx**2*yx*zx - 3*xx**2*yy*zy - 6*xx*xy*yx*zy - 6*xx*xy*yy*zx - 3*xy**2*yx*zx + 3*xy**2*yy*zy - yx**3*zx + 3*yx**2*yy*zy + 3*yx*yy**2*zx - yy**3*zy)/4 - D[4][l+ -2,l+-4] = sqrt(7)*(2*xx*yx*zx*zy + xx*yy*zx**2 - xx*yy*zy**2 + xy*yx*zx**2 - xy*yx*zy**2 - 2*xy*yy*zx*zy)/2 - D[4][l+ -2,l+-3] = sqrt(14)*(2*xx*yx*zy*zz + 2*xx*yy*zx*zz + 2*xx*yz*zx*zy + 2*xy*yx*zx*zz - 2*xy*yy*zy*zz + xy*yz*zx**2 - xy*yz*zy**2 + 2*xz*yx*zx*zy + xz*yy*zx**2 - xz*yy*zy**2)/4 - D[4][l+ -2,l+-2] = -xx*yx*zx*zy - xx*yy*zx**2/2 - xx*yy*zy**2/2 + xx*yy*zz**2 + 2*xx*yz*zy*zz - xy*yx*zx**2/2 - xy*yx*zy**2/2 + xy*yx*zz**2 - xy*yy*zx*zy + 2*xy*yz*zx*zz + 2*xz*yx*zy*zz + 2*xz*yy*zx*zz + 2*xz*yz*zx*zy - D[4][l+ -2,l+-1] = sqrt(2)*(-2*xx*yx*zy*zz - 2*xx*yy*zx*zz - 2*xx*yz*zx*zy - 2*xy*yx*zx*zz - 6*xy*yy*zy*zz - xy*yz*zx**2 - 3*xy*yz*zy**2 + 4*xy*yz*zz**2 - 2*xz*yx*zx*zy - xz*yy*zx**2 - 3*xz*yy*zy**2 + 4*xz*yy*zz**2 + 8*xz*yz*zy*zz)/4 - D[4][l+ -2,l+ 0] = sqrt(5)*(3*xx*yx*zx**2 + xx*yx*zy**2 - 4*xx*yx*zz**2 + 2*xx*yy*zx*zy - 8*xx*yz*zx*zz + 2*xy*yx*zx*zy + xy*yy*zx**2 + 3*xy*yy*zy**2 - 4*xy*yy*zz**2 - 8*xy*yz*zy*zz - 8*xz*yx*zx*zz - 8*xz*yy*zy*zz - 4*xz*yz*zx**2 - 4*xz*yz*zy**2 + 8*xz*yz*zz**2)/10 - D[4][l+ -2,l+ 1] = sqrt(2)*(-6*xx*yx*zx*zz - 2*xx*yy*zy*zz - 3*xx*yz*zx**2 - xx*yz*zy**2 + 4*xx*yz*zz**2 - 2*xy*yx*zy*zz - 2*xy*yy*zx*zz - 2*xy*yz*zx*zy - 3*xz*yx*zx**2 - xz*yx*zy**2 + 4*xz*yx*zz**2 - 2*xz*yy*zx*zy + 8*xz*yz*zx*zz)/4 - D[4][l+ -2,l+ 2] = -xx*yx*zx**2 + xx*yx*zz**2 + 2*xx*yz*zx*zz + xy*yy*zy**2 - xy*yy*zz**2 - 2*xy*yz*zy*zz + 2*xz*yx*zx*zz - 2*xz*yy*zy*zz + xz*yz*zx**2 - xz*yz*zy**2 - D[4][l+ -2,l+ 3] = sqrt(14)*(2*xx*yx*zx*zz - 2*xx*yy*zy*zz + xx*yz*zx**2 - xx*yz*zy**2 - 2*xy*yx*zy*zz - 2*xy*yy*zx*zz - 2*xy*yz*zx*zy + xz*yx*zx**2 - xz*yx*zy**2 - 2*xz*yy*zx*zy)/4 - D[4][l+ -2,l+ 4] = sqrt(7)*(xx*yx*zx**2 - xx*yx*zy**2 - 2*xx*yy*zx*zy - 2*xy*yx*zx*zy - xy*yy*zx**2 + xy*yy*zy**2)/2 - D[4][l+ -1,l+-4] = sqrt(14)*(3*yx*zx**2*zy - yx*zy**3 + yy*zx**3 - 3*yy*zx*zy**2)/4 - D[4][l+ -1,l+-3] = sqrt(7)*(6*yx*zx*zy*zz + 3*yy*zx**2*zz - 3*yy*zy**2*zz + 3*yz*zx**2*zy - yz*zy**3)/4 - D[4][l+ -1,l+-2] = sqrt(2)*(-3*yx*zx**2*zy - yx*zy**3 + 6*yx*zy*zz**2 - yy*zx**3 - 3*yy*zx*zy**2 + 6*yy*zx*zz**2 + 12*yz*zx*zy*zz)/4 - D[4][l+ -1,l+-1] = -3*yx*zx*zy*zz/2 - 3*yy*zx**2*zz/4 - 9*yy*zy**2*zz/4 + yy*zz**3 - 3*yz*zx**2*zy/4 - 3*yz*zy**3/4 + 3*yz*zy*zz**2 - D[4][l+ -1,l+ 0] = sqrt(10)*(3*yx*zx**3 + 3*yx*zx*zy**2 - 12*yx*zx*zz**2 + 3*yy*zx**2*zy + 3*yy*zy**3 - 12*yy*zy*zz**2 - 12*yz*zx**2*zz - 12*yz*zy**2*zz + 8*yz*zz**3)/20 - D[4][l+ -1,l+ 1] = -9*yx*zx**2*zz/4 - 3*yx*zy**2*zz/4 + yx*zz**3 - 3*yy*zx*zy*zz/2 - 3*yz*zx**3/4 - 3*yz*zx*zy**2/4 + 3*yz*zx*zz**2 - D[4][l+ -1,l+ 2] = sqrt(2)*(-yx*zx**3 + 3*yx*zx*zz**2 + yy*zy**3 - 3*yy*zy*zz**2 + 3*yz*zx**2*zz - 3*yz*zy**2*zz)/2 - D[4][l+ -1,l+ 3] = sqrt(7)*(3*yx*zx**2*zz - 3*yx*zy**2*zz - 6*yy*zx*zy*zz + yz*zx**3 - 3*yz*zx*zy**2)/4 - D[4][l+ -1,l+ 4] = sqrt(14)*(yx*zx**3 - 3*yx*zx*zy**2 - 3*yy*zx**2*zy + yy*zy**3)/4 - D[4][l+ 0,l+-4] = sqrt(35)*zx*zy*(zx - zy)*(zx + zy)/2 - D[4][l+ 0,l+-3] = sqrt(70)*zy*zz*(3*zx**2 - zy**2)/4 - D[4][l+ 0,l+-2] = sqrt(5)*zx*zy*(7*zz**2 - 1)/2 - D[4][l+ 0,l+-1] = sqrt(10)*zy*zz*(-3*zx**2 - 3*zy**2 + 4*zz**2)/4 - D[4][l+ 0,l+ 0] = 3*zx**4/8 + 3*zx**2*zy**2/4 - 3*zx**2*zz**2 + 3*zy**4/8 - 3*zy**2*zz**2 + zz**4 - D[4][l+ 0,l+ 1] = sqrt(10)*zx*zz*(-3*zx**2 - 3*zy**2 + 4*zz**2)/4 - D[4][l+ 0,l+ 2] = sqrt(5)*(zx - zy)*(zx + zy)*(7*zz**2 - 1)/4 - D[4][l+ 0,l+ 3] = sqrt(70)*zx*zz*(zx**2 - 3*zy**2)/4 - D[4][l+ 0,l+ 4] = sqrt(35)*(zx**4 - 6*zx**2*zy**2 + zy**4)/8 - D[4][l+ 1,l+-4] = sqrt(14)*(3*xx*zx**2*zy - xx*zy**3 + xy*zx**3 - 3*xy*zx*zy**2)/4 - D[4][l+ 1,l+-3] = sqrt(7)*(6*xx*zx*zy*zz + 3*xy*zx**2*zz - 3*xy*zy**2*zz + 3*xz*zx**2*zy - xz*zy**3)/4 - D[4][l+ 1,l+-2] = sqrt(2)*(-3*xx*zx**2*zy - xx*zy**3 + 6*xx*zy*zz**2 - xy*zx**3 - 3*xy*zx*zy**2 + 6*xy*zx*zz**2 + 12*xz*zx*zy*zz)/4 - D[4][l+ 1,l+-1] = -3*xx*zx*zy*zz/2 - 3*xy*zx**2*zz/4 - 9*xy*zy**2*zz/4 + xy*zz**3 - 3*xz*zx**2*zy/4 - 3*xz*zy**3/4 + 3*xz*zy*zz**2 - D[4][l+ 1,l+ 0] = sqrt(10)*(3*xx*zx**3 + 3*xx*zx*zy**2 - 12*xx*zx*zz**2 + 3*xy*zx**2*zy + 3*xy*zy**3 - 12*xy*zy*zz**2 - 12*xz*zx**2*zz - 12*xz*zy**2*zz + 8*xz*zz**3)/20 - D[4][l+ 1,l+ 1] = -9*xx*zx**2*zz/4 - 3*xx*zy**2*zz/4 + xx*zz**3 - 3*xy*zx*zy*zz/2 - 3*xz*zx**3/4 - 3*xz*zx*zy**2/4 + 3*xz*zx*zz**2 - D[4][l+ 1,l+ 2] = sqrt(2)*(-xx*zx**3 + 3*xx*zx*zz**2 + xy*zy**3 - 3*xy*zy*zz**2 + 3*xz*zx**2*zz - 3*xz*zy**2*zz)/2 - D[4][l+ 1,l+ 3] = sqrt(7)*(3*xx*zx**2*zz - 3*xx*zy**2*zz - 6*xy*zx*zy*zz + xz*zx**3 - 3*xz*zx*zy**2)/4 - D[4][l+ 1,l+ 4] = sqrt(14)*(xx*zx**3 - 3*xx*zx*zy**2 - 3*xy*zx**2*zy + xy*zy**3)/4 - D[4][l+ 2,l+-4] = sqrt(7)*(-xx**3*xy + 3*xx**2*zx*zy + xx*xy**3 + 3*xx*xy*zx**2 - 3*xx*xy*zy**2 - 3*xy**2*zx*zy + yx**3*yy - 3*yx**2*zx*zy - yx*yy**3 - 3*yx*yy*zx**2 + 3*yx*yy*zy**2 + 3*yy**2*zx*zy)/7 - D[4][l+ 2,l+-3] = sqrt(14)*(-3*xx**2*xy*xz + 3*xx**2*zy*zz + 6*xx*xy*zx*zz + 6*xx*xz*zx*zy + xy**3*xz - 3*xy**2*zy*zz + 3*xy*xz*zx**2 - 3*xy*xz*zy**2 + 3*yx**2*yy*yz - 3*yx**2*zy*zz - 6*yx*yy*zx*zz - 6*yx*yz*zx*zy - yy**3*yz + 3*yy**2*zy*zz - 3*yy*yz*zx**2 + 3*yy*yz*zy**2)/14 - D[4][l+ 2,l+-2] = xx**3*xy/7 - 3*xx**2*zx*zy/7 + xx*xy**3/7 - 6*xx*xy*xz**2/7 - 3*xx*xy*zx**2/7 - 3*xx*xy*zy**2/7 + 6*xx*xy*zz**2/7 + 12*xx*xz*zy*zz/7 - 3*xy**2*zx*zy/7 + 12*xy*xz*zx*zz/7 + 6*xz**2*zx*zy/7 - yx**3*yy/7 + 3*yx**2*zx*zy/7 - yx*yy**3/7 + 6*yx*yy*yz**2/7 + 3*yx*yy*zx**2/7 + 3*yx*yy*zy**2/7 - 6*yx*yy*zz**2/7 - 12*yx*yz*zy*zz/7 + 3*yy**2*zx*zy/7 - 12*yy*yz*zx*zz/7 - 6*yz**2*zx*zy/7 - D[4][l+ 2,l+-1] = sqrt(2)*(3*xx**2*xy*xz - 3*xx**2*zy*zz - 6*xx*xy*zx*zz - 6*xx*xz*zx*zy + 3*xy**3*xz - 9*xy**2*zy*zz - 4*xy*xz**3 - 3*xy*xz*zx**2 - 9*xy*xz*zy**2 + 12*xy*xz*zz**2 + 12*xz**2*zy*zz - 3*yx**2*yy*yz + 3*yx**2*zy*zz + 6*yx*yy*zx*zz + 6*yx*yz*zx*zy - 3*yy**3*yz + 9*yy**2*zy*zz + 4*yy*yz**3 + 3*yy*yz*zx**2 + 9*yy*yz*zy**2 - 12*yy*yz*zz**2 - 12*yz**2*zy*zz)/14 - D[4][l+ 2,l+ 0] = sqrt(5)*(-3*xx**4 - 6*xx**2*xy**2 + 24*xx**2*xz**2 + 18*xx**2*zx**2 + 6*xx**2*zy**2 - 24*xx**2*zz**2 + 24*xx*xy*zx*zy - 96*xx*xz*zx*zz - 3*xy**4 + 24*xy**2*xz**2 + 6*xy**2*zx**2 + 18*xy**2*zy**2 - 24*xy**2*zz**2 - 96*xy*xz*zy*zz - 8*xz**4 - 24*xz**2*zx**2 - 24*xz**2*zy**2 + 48*xz**2*zz**2 + 3*yx**4 + 6*yx**2*yy**2 - 24*yx**2*yz**2 - 18*yx**2*zx**2 - 6*yx**2*zy**2 + 24*yx**2*zz**2 - 24*yx*yy*zx*zy + 96*yx*yz*zx*zz + 3*yy**4 - 24*yy**2*yz**2 - 6*yy**2*zx**2 - 18*yy**2*zy**2 + 24*yy**2*zz**2 + 96*yy*yz*zy*zz + 8*yz**4 + 24*yz**2*zx**2 + 24*yz**2*zy**2 - 48*yz**2*zz**2)/140 - D[4][l+ 2,l+ 1] = sqrt(2)*(3*xx**3*xz - 9*xx**2*zx*zz + 3*xx*xy**2*xz - 6*xx*xy*zy*zz - 4*xx*xz**3 - 9*xx*xz*zx**2 - 3*xx*xz*zy**2 + 12*xx*xz*zz**2 - 3*xy**2*zx*zz - 6*xy*xz*zx*zy + 12*xz**2*zx*zz - 3*yx**3*yz + 9*yx**2*zx*zz - 3*yx*yy**2*yz + 6*yx*yy*zy*zz + 4*yx*yz**3 + 9*yx*yz*zx**2 + 3*yx*yz*zy**2 - 12*yx*yz*zz**2 + 3*yy**2*zx*zz + 6*yy*yz*zx*zy - 12*yz**2*zx*zz)/14 - D[4][l+ 2,l+ 2] = xx**4/14 - 3*xx**2*xz**2/7 - 3*xx**2*zx**2/7 + 3*xx**2*zz**2/7 + 12*xx*xz*zx*zz/7 - xy**4/14 + 3*xy**2*xz**2/7 + 3*xy**2*zy**2/7 - 3*xy**2*zz**2/7 - 12*xy*xz*zy*zz/7 + 3*xz**2*zx**2/7 - 3*xz**2*zy**2/7 - yx**4/14 + 3*yx**2*yz**2/7 + 3*yx**2*zx**2/7 - 3*yx**2*zz**2/7 - 12*yx*yz*zx*zz/7 + yy**4/14 - 3*yy**2*yz**2/7 - 3*yy**2*zy**2/7 + 3*yy**2*zz**2/7 + 12*yy*yz*zy*zz/7 - 3*yz**2*zx**2/7 + 3*yz**2*zy**2/7 - D[4][l+ 2,l+ 3] = sqrt(14)*(-xx**3*xz + 3*xx**2*zx*zz + 3*xx*xy**2*xz - 6*xx*xy*zy*zz + 3*xx*xz*zx**2 - 3*xx*xz*zy**2 - 3*xy**2*zx*zz - 6*xy*xz*zx*zy + yx**3*yz - 3*yx**2*zx*zz - 3*yx*yy**2*yz + 6*yx*yy*zy*zz - 3*yx*yz*zx**2 + 3*yx*yz*zy**2 + 3*yy**2*zx*zz + 6*yy*yz*zx*zy)/14 - D[4][l+ 2,l+ 4] = sqrt(7)*(-xx**4 + 6*xx**2*xy**2 + 6*xx**2*zx**2 - 6*xx**2*zy**2 - 24*xx*xy*zx*zy - xy**4 - 6*xy**2*zx**2 + 6*xy**2*zy**2 + yx**4 - 6*yx**2*yy**2 - 6*yx**2*zx**2 + 6*yx**2*zy**2 + 24*yx*yy*zx*zy + yy**4 + 6*yy**2*zx**2 - 6*yy**2*zy**2)/28 - D[4][l+ 3,l+-4] = sqrt(2)*(xx**3*zy + 3*xx**2*xy*zx - 3*xx*xy**2*zy - 3*xx*yx**2*zy - 6*xx*yx*yy*zx + 3*xx*yy**2*zy - xy**3*zx - 3*xy*yx**2*zx + 6*xy*yx*yy*zy + 3*xy*yy**2*zx)/4 - D[4][l+ 3,l+-3] = 3*xx**2*xy*zz/4 + 3*xx**2*xz*zy/4 + 3*xx*xy*xz*zx/2 - 3*xx*yx*yy*zz/2 - 3*xx*yx*yz*zy/2 - 3*xx*yy*yz*zx/2 - xy**3*zz/4 - 3*xy**2*xz*zy/4 - 3*xy*yx**2*zz/4 - 3*xy*yx*yz*zx/2 + 3*xy*yy**2*zz/4 + 3*xy*yy*yz*zy/2 - 3*xz*yx**2*zy/4 - 3*xz*yx*yy*zx/2 + 3*xz*yy**2*zy/4 - D[4][l+ 3,l+-2] = sqrt(14)*(-xx**3*zy - 3*xx**2*xy*zx - 3*xx*xy**2*zy + 12*xx*xy*xz*zz + 6*xx*xz**2*zy + 3*xx*yx**2*zy + 6*xx*yx*yy*zx + 3*xx*yy**2*zy - 12*xx*yy*yz*zz - 6*xx*yz**2*zy - xy**3*zx + 6*xy*xz**2*zx + 3*xy*yx**2*zx + 6*xy*yx*yy*zy - 12*xy*yx*yz*zz + 3*xy*yy**2*zx - 6*xy*yz**2*zx - 12*xz*yx*yy*zz - 12*xz*yx*yz*zy - 12*xz*yy*yz*zx)/28 - D[4][l+ 3,l+-1] = sqrt(7)*(-3*xx**2*xy*zz - 3*xx**2*xz*zy - 6*xx*xy*xz*zx + 6*xx*yx*yy*zz + 6*xx*yx*yz*zy + 6*xx*yy*yz*zx - 3*xy**3*zz - 9*xy**2*xz*zy + 12*xy*xz**2*zz + 3*xy*yx**2*zz + 6*xy*yx*yz*zx + 9*xy*yy**2*zz + 18*xy*yy*yz*zy - 12*xy*yz**2*zz + 4*xz**3*zy + 3*xz*yx**2*zy + 6*xz*yx*yy*zx + 9*xz*yy**2*zy - 24*xz*yy*yz*zz - 12*xz*yz**2*zy)/28 - D[4][l+ 3,l+ 0] = sqrt(70)*(3*xx**3*zx + 3*xx**2*xy*zy - 12*xx**2*xz*zz + 3*xx*xy**2*zx - 12*xx*xz**2*zx - 9*xx*yx**2*zx - 6*xx*yx*yy*zy + 24*xx*yx*yz*zz - 3*xx*yy**2*zx + 12*xx*yz**2*zx + 3*xy**3*zy - 12*xy**2*xz*zz - 12*xy*xz**2*zy - 3*xy*yx**2*zy - 6*xy*yx*yy*zx - 9*xy*yy**2*zy + 24*xy*yy*yz*zz + 12*xy*yz**2*zy + 8*xz**3*zz + 12*xz*yx**2*zz + 24*xz*yx*yz*zx + 12*xz*yy**2*zz + 24*xz*yy*yz*zy - 24*xz*yz**2*zz)/140 - D[4][l+ 3,l+ 1] = sqrt(7)*(-3*xx**3*zz - 9*xx**2*xz*zx - 3*xx*xy**2*zz - 6*xx*xy*xz*zy + 12*xx*xz**2*zz + 9*xx*yx**2*zz + 18*xx*yx*yz*zx + 3*xx*yy**2*zz + 6*xx*yy*yz*zy - 12*xx*yz**2*zz - 3*xy**2*xz*zx + 6*xy*yx*yy*zz + 6*xy*yx*yz*zy + 6*xy*yy*yz*zx + 4*xz**3*zx + 9*xz*yx**2*zx + 6*xz*yx*yy*zy - 24*xz*yx*yz*zz + 3*xz*yy**2*zx - 12*xz*yz**2*zx)/28 - D[4][l+ 3,l+ 2] = sqrt(14)*(-xx**3*zx + 3*xx**2*xz*zz + 3*xx*xz**2*zx + 3*xx*yx**2*zx - 6*xx*yx*yz*zz - 3*xx*yz**2*zx + xy**3*zy - 3*xy**2*xz*zz - 3*xy*xz**2*zy - 3*xy*yy**2*zy + 6*xy*yy*yz*zz + 3*xy*yz**2*zy - 3*xz*yx**2*zz - 6*xz*yx*yz*zx + 3*xz*yy**2*zz + 6*xz*yy*yz*zy)/14 - D[4][l+ 3,l+ 3] = xx**3*zz/4 + 3*xx**2*xz*zx/4 - 3*xx*xy**2*zz/4 - 3*xx*xy*xz*zy/2 - 3*xx*yx**2*zz/4 - 3*xx*yx*yz*zx/2 + 3*xx*yy**2*zz/4 + 3*xx*yy*yz*zy/2 - 3*xy**2*xz*zx/4 + 3*xy*yx*yy*zz/2 + 3*xy*yx*yz*zy/2 + 3*xy*yy*yz*zx/2 - 3*xz*yx**2*zx/4 + 3*xz*yx*yy*zy/2 + 3*xz*yy**2*zx/4 - D[4][l+ 3,l+ 4] = sqrt(2)*(xx**3*zx - 3*xx**2*xy*zy - 3*xx*xy**2*zx - 3*xx*yx**2*zx + 6*xx*yx*yy*zy + 3*xx*yy**2*zx + xy**3*zy + 3*xy*yx**2*zy + 6*xy*yx*yy*zx - 3*xy*yy**2*zy)/4 - D[4][l+ 4,l+-4] = xx**3*xy/2 - 3*xx**2*yx*yy/2 - xx*xy**3/2 - 3*xx*xy*yx**2/2 + 3*xx*xy*yy**2/2 + 3*xy**2*yx*yy/2 + yx**3*yy/2 - yx*yy**3/2 - D[4][l+ 4,l+-3] = sqrt(2)*(3*xx**2*xy*xz - 3*xx**2*yy*yz - 6*xx*xy*yx*yz - 6*xx*xz*yx*yy - xy**3*xz + 3*xy**2*yy*yz - 3*xy*xz*yx**2 + 3*xy*xz*yy**2 + 3*yx**2*yy*yz - yy**3*yz)/4 - D[4][l+ 4,l+-2] = sqrt(7)*(-xx**3*xy + 3*xx**2*yx*yy - xx*xy**3 + 6*xx*xy*xz**2 + 3*xx*xy*yx**2 + 3*xx*xy*yy**2 - 6*xx*xy*yz**2 - 12*xx*xz*yy*yz + 3*xy**2*yx*yy - 12*xy*xz*yx*yz - 6*xz**2*yx*yy - yx**3*yy - yx*yy**3 + 6*yx*yy*yz**2)/14 - D[4][l+ 4,l+-1] = sqrt(14)*(-3*xx**2*xy*xz + 3*xx**2*yy*yz + 6*xx*xy*yx*yz + 6*xx*xz*yx*yy - 3*xy**3*xz + 9*xy**2*yy*yz + 4*xy*xz**3 + 3*xy*xz*yx**2 + 9*xy*xz*yy**2 - 12*xy*xz*yz**2 - 12*xz**2*yy*yz - 3*yx**2*yy*yz - 3*yy**3*yz + 4*yy*yz**3)/28 - D[4][l+ 4,l+ 0] = sqrt(35)*(3*xx**4 + 6*xx**2*xy**2 - 24*xx**2*xz**2 - 18*xx**2*yx**2 - 6*xx**2*yy**2 + 24*xx**2*yz**2 - 24*xx*xy*yx*yy + 96*xx*xz*yx*yz + 3*xy**4 - 24*xy**2*xz**2 - 6*xy**2*yx**2 - 18*xy**2*yy**2 + 24*xy**2*yz**2 + 96*xy*xz*yy*yz + 8*xz**4 + 24*xz**2*yx**2 + 24*xz**2*yy**2 - 48*xz**2*yz**2 + 3*yx**4 + 6*yx**2*yy**2 - 24*yx**2*yz**2 + 3*yy**4 - 24*yy**2*yz**2 + 8*yz**4)/280 - D[4][l+ 4,l+ 1] = sqrt(14)*(-3*xx**3*xz + 9*xx**2*yx*yz - 3*xx*xy**2*xz + 6*xx*xy*yy*yz + 4*xx*xz**3 + 9*xx*xz*yx**2 + 3*xx*xz*yy**2 - 12*xx*xz*yz**2 + 3*xy**2*yx*yz + 6*xy*xz*yx*yy - 12*xz**2*yx*yz - 3*yx**3*yz - 3*yx*yy**2*yz + 4*yx*yz**3)/28 - D[4][l+ 4,l+ 2] = sqrt(7)*(-xx**4 + 6*xx**2*xz**2 + 6*xx**2*yx**2 - 6*xx**2*yz**2 - 24*xx*xz*yx*yz + xy**4 - 6*xy**2*xz**2 - 6*xy**2*yy**2 + 6*xy**2*yz**2 + 24*xy*xz*yy*yz - 6*xz**2*yx**2 + 6*xz**2*yy**2 - yx**4 + 6*yx**2*yz**2 + yy**4 - 6*yy**2*yz**2)/28 - D[4][l+ 4,l+ 3] = sqrt(2)*(xx**3*xz - 3*xx**2*yx*yz - 3*xx*xy**2*xz + 6*xx*xy*yy*yz - 3*xx*xz*yx**2 + 3*xx*xz*yy**2 + 3*xy**2*yx*yz + 6*xy*xz*yx*yy + yx**3*yz - 3*yx*yy**2*yz)/4 - D[4][l+ 4,l+ 4] = xx**4/8 - 3*xx**2*xy**2/4 - 3*xx**2*yx**2/4 + 3*xx**2*yy**2/4 + 3*xx*xy*yx*yy + xy**4/8 + 3*xy**2*yx**2/4 - 3*xy**2*yy**2/4 + yx**4/8 - 3*yx**2*yy**2/4 + yy**4/8 - - if lmax > 4: - raise NotImplementedError(f'Too a big {lmax=}') - - return D + """Generates real Wigner D-matrices for spatial rotation of spherical harmonics. + + Computes rotation matrices D^l for angular momenta l = 0 to lmax, where + D^l[m1, m2] transforms spherical harmonics under the specified rotation. + The rotation is defined by new axes x' = xyz[0], y' = xyz[1], z' = xyz[2]. + The code is generated by `mathutils/wigner.py`. + + Args: + xyz (numpy ndarray): 3x3 rotation matrix with rows defining new [x', y', z'] axes. + lmax (int): Maximum angular momentum (supports lmax <= 4). + order (str): Ordering convention for l=1 spherical harmonics. Defaults to 'xyz'. + + Returns: + list: List of numpy ndarrays D[l] where D[l] is the (2l+1) x (2l+1) real Wigner + D-matrix for angular momentum l. Note: m1 index is rotated (D is transposed). + + Raises: + NotImplementedError: If lmax > 4. + + Note: + The matrices are computed using explicit algebraic expressions for each l. + """ + + xx = xyz[0,0]; xy = xyz[0,1]; xz = xyz[0,2] + yx = xyz[1,0]; yy = xyz[1,1]; yz = xyz[1,2] + zx = xyz[2,0]; zy = xyz[2,1]; zz = xyz[2,2] + + SQRT3 = sqrt(3.0) + + D = [np.zeros((2*l+1,2*l+1)) for l in range(lmax+1)] + + D[0][0,0] = 1.0 + + if lmax < 1: + return D + + l=1 + if order=='yzx': # -1 0 1 + D[1][l+ -1,l+ -1] = yy + D[1][l+ -1,l+ 0] = yz + D[1][l+ -1,l+ 1] = yx + D[1][l+ 0,l+ -1] = zy + D[1][l+ 0,l+ 0] = zz + D[1][l+ 0,l+ 1] = zx + D[1][l+ 1,l+ -1] = xy + D[1][l+ 1,l+ 0] = xz + D[1][l+ 1,l+ 1] = xx + elif order=='xyz': # 1 -1 0 + D[1][ 0, 0] = xx + D[1][ 0, 1] = xy + D[1][ 0, 2] = xz + D[1][ 1, 0] = yx + D[1][ 1, 1] = yy + D[1][ 1, 2] = yz + D[1][ 2, 0] = zx + D[1][ 2, 1] = zy + D[1][ 2, 2] = zz + + if lmax < 2: + return D + + l=2 + D[2][l+ -2,l+ -2] = xx*yy+xy*yx + D[2][l+ -2,l+ -1] = xy*yz+xz*yy + D[2][l+ -2,l+ 0] = xz*yz * SQRT3 + D[2][l+ -2,l+ 1] = xx*yz+xz*yx + D[2][l+ -2,l+ 2] = xx*yx-xy*yy + D[2][l+ -1,l+ -2] = yx*zy+yy*zx + D[2][l+ -1,l+ -1] = yy*zz+yz*zy + D[2][l+ -1,l+ 0] = yz*zz * SQRT3 + D[2][l+ -1,l+ 1] = yx*zz+yz*zx + D[2][l+ -1,l+ 2] = yx*zx-yy*zy + D[2][l+ 0,l+ -2] = zx*zy * SQRT3 + D[2][l+ 0,l+ -1] = zy*zz * SQRT3 + D[2][l+ 0,l+ 0] = 1.5*zz*zz - 0.5 + D[2][l+ 0,l+ 1] = zx*zz * SQRT3 + D[2][l+ 0,l+ 2] = (zx*zx-zy*zy) * 0.5 * SQRT3 + D[2][l+ 1,l+ -2] = xx*zy+xy*zx + D[2][l+ 1,l+ -1] = xy*zz+xz*zy + D[2][l+ 1,l+ 0] = xz*zz * SQRT3 + D[2][l+ 1,l+ 1] = xx*zz+xz*zx + D[2][l+ 1,l+ 2] = xx*zx-xy*zy + D[2][l+ 2,l+ -2] = xx*xy-yx*yy + D[2][l+ 2,l+ -1] = xy*xz-yy*yz + D[2][l+ 2,l+ 0] = (xz*xz-yz*yz) * 0.5 * SQRT3 + D[2][l+ 2,l+ 1] = xx*xz-yx*yz + D[2][l+ 2,l+ 2] = (xx*xx-xy*xy+yy*yy-yx*yx) * 0.5 + + if lmax < 3: + return D + + l=3 + D[3][l+ -3,l+ -3] = 3*xx**2*yy/4 + 3*xx*xy*yx/2 - 3*xy**2*yy/4 - 3*yx**2*yy/4 + yy**3/4 + D[3][l+ -3,l+ -2] = sqrt(6)*(xx*xy*yz + xx*xz*yy + xy*xz*yx - yx*yy*yz)/2 + D[3][l+ -3,l+ -1] = sqrt(15)*(-xx**2*yy - 2*xx*xy*yx - 3*xy**2*yy + 8*xy*xz*yz + 4*xz**2*yy + yx**2*yy + yy**3 - 4*yy*yz**2)/20 + D[3][l+ -3,l+ 0] = sqrt(10)*(-3*xx**2*yz - 6*xx*xz*yx - 3*xy**2*yz - 6*xy*xz*yy + 6*xz**2*yz + 3*yx**2*yz + 3*yy**2*yz - 2*yz**3)/20 + D[3][l+ -3,l+ 1] = sqrt(15)*(-3*xx**2*yx - 2*xx*xy*yy + 8*xx*xz*yz - xy**2*yx + 4*xz**2*yx + yx**3 + yx*yy**2 - 4*yx*yz**2)/20 + D[3][l+ -3,l+ 2] = sqrt(6)*(xx**2*yz + 2*xx*xz*yx - xy**2*yz - 2*xy*xz*yy - yx**2*yz + yy**2*yz)/4 + D[3][l+ -3,l+ 3] = 3*xx**2*yx/4 - 3*xx*xy*yy/2 - 3*xy**2*yx/4 - yx**3/4 + 3*yx*yy**2/4 + D[3][l+ -2,l+ -3] = sqrt(6)*(xx*yx*zy + xx*yy*zx + xy*yx*zx - xy*yy*zy)/2 + D[3][l+ -2,l+ -2] = xx*yy*zz + xx*yz*zy + xy*yx*zz + xy*yz*zx + xz*yx*zy + xz*yy*zx + D[3][l+ -2,l+ -1] = sqrt(10)*(-xx*yx*zy - xx*yy*zx - xy*yx*zx - 3*xy*yy*zy + 4*xy*yz*zz + 4*xz*yy*zz + 4*xz*yz*zy)/10 + D[3][l+ -2,l+ 0] = sqrt(15)*(-xx*yx*zz - xx*yz*zx - xy*yy*zz - xy*yz*zy - xz*yx*zx - xz*yy*zy + 2*xz*yz*zz)/5 + D[3][l+ -2,l+ 1] = sqrt(10)*(-3*xx*yx*zx - xx*yy*zy + 4*xx*yz*zz - xy*yx*zy - xy*yy*zx + 4*xz*yx*zz + 4*xz*yz*zx)/10 + D[3][l+ -2,l+ 2] = xx*yx*zz + xx*yz*zx - xy*yy*zz - xy*yz*zy + xz*yx*zx - xz*yy*zy + D[3][l+ -2,l+ 3] = sqrt(6)*(xx*yx*zx - xx*yy*zy - xy*yx*zy - xy*yy*zx)/2 + D[3][l+ -1,l+ -3] = sqrt(15)*(2*yx*zx*zy + yy*zx**2 - yy*zy**2)/4 + D[3][l+ -1,l+ -2] = sqrt(10)*(yx*zy*zz + yy*zx*zz + yz*zx*zy)/2 + D[3][l+ -1,l+ -1] = -yx*zx*zy/2 - yy*zx**2/4 - 3*yy*zy**2/4 + yy*zz**2 + 2*yz*zy*zz + D[3][l+ -1,l+ 0] = sqrt(6)*(-2*yx*zx*zz - 2*yy*zy*zz - yz*zx**2 - yz*zy**2 + 2*yz*zz**2)/4 + D[3][l+ -1,l+ 1] = -3*yx*zx**2/4 - yx*zy**2/4 + yx*zz**2 - yy*zx*zy/2 + 2*yz*zx*zz + D[3][l+ -1,l+ 2] = sqrt(10)*(2*yx*zx*zz - 2*yy*zy*zz + yz*zx**2 - yz*zy**2)/4 + D[3][l+ -1,l+ 3] = sqrt(15)*(yx*zx**2 - yx*zy**2 - 2*yy*zx*zy)/4 + D[3][l+ 0,l+ -3] = sqrt(10)*zy*(3*zx**2 - zy**2)/4 + D[3][l+ 0,l+ -2] = sqrt(15)*zx*zy*zz + D[3][l+ 0,l+ -1] = sqrt(6)*zy*(5*zz**2 - 1)/4 + D[3][l+ 0,l+ 0] = zz*(-3*zx**2 - 3*zy**2 + 2*zz**2)/2 + D[3][l+ 0,l+ 1] = sqrt(6)*zx*(5*zz**2 - 1)/4 + D[3][l+ 0,l+ 2] = sqrt(15)*zz*(zx - zy)*(zx + zy)/2 + D[3][l+ 0,l+ 3] = sqrt(10)*zx*(zx**2 - 3*zy**2)/4 + D[3][l+ 1,l+ -3] = sqrt(15)*(2*xx*zx*zy + xy*zx**2 - xy*zy**2)/4 + D[3][l+ 1,l+ -2] = sqrt(10)*(xx*zy*zz + xy*zx*zz + xz*zx*zy)/2 + D[3][l+ 1,l+ -1] = -xx*zx*zy/2 - xy*zx**2/4 - 3*xy*zy**2/4 + xy*zz**2 + 2*xz*zy*zz + D[3][l+ 1,l+ 0] = sqrt(6)*(-2*xx*zx*zz - 2*xy*zy*zz - xz*zx**2 - xz*zy**2 + 2*xz*zz**2)/4 + D[3][l+ 1,l+ 1] = -3*xx*zx**2/4 - xx*zy**2/4 + xx*zz**2 - xy*zx*zy/2 + 2*xz*zx*zz + D[3][l+ 1,l+ 2] = sqrt(10)*(2*xx*zx*zz - 2*xy*zy*zz + xz*zx**2 - xz*zy**2)/4 + D[3][l+ 1,l+ 3] = sqrt(15)*(xx*zx**2 - xx*zy**2 - 2*xy*zx*zy)/4 + D[3][l+ 2,l+ -3] = sqrt(6)*(xx**2*zy + 2*xx*xy*zx - xy**2*zy - yx**2*zy - 2*yx*yy*zx + yy**2*zy)/4 + D[3][l+ 2,l+ -2] = xx*xy*zz + xx*xz*zy + xy*xz*zx - yx*yy*zz - yx*yz*zy - yy*yz*zx + D[3][l+ 2,l+ -1] = sqrt(10)*(-xx**2*zy - 2*xx*xy*zx - 3*xy**2*zy + 8*xy*xz*zz + 4*xz**2*zy + yx**2*zy + 2*yx*yy*zx + 3*yy**2*zy - 8*yy*yz*zz - 4*yz**2*zy)/20 + D[3][l+ 2,l+ 0] = sqrt(15)*(-xx**2*zz - 2*xx*xz*zx - xy**2*zz - 2*xy*xz*zy + 2*xz**2*zz + yx**2*zz + 2*yx*yz*zx + yy**2*zz + 2*yy*yz*zy - 2*yz**2*zz)/10 + D[3][l+ 2,l+ 1] = sqrt(10)*(-3*xx**2*zx - 2*xx*xy*zy + 8*xx*xz*zz - xy**2*zx + 4*xz**2*zx + 3*yx**2*zx + 2*yx*yy*zy - 8*yx*yz*zz + yy**2*zx - 4*yz**2*zx)/20 + D[3][l+ 2,l+ 2] = xx**2*zz/2 + xx*xz*zx - xy**2*zz/2 - xy*xz*zy - yx**2*zz/2 - yx*yz*zx + yy**2*zz/2 + yy*yz*zy + D[3][l+ 2,l+ 3] = sqrt(6)*(xx**2*zx - 2*xx*xy*zy - xy**2*zx - yx**2*zx + 2*yx*yy*zy + yy**2*zx)/4 + D[3][l+ 3,l+ -3] = 3*xx**2*xy/4 - 3*xx*yx*yy/2 - xy**3/4 - 3*xy*yx**2/4 + 3*xy*yy**2/4 + D[3][l+ 3,l+ -2] = sqrt(6)*(xx*xy*xz - xx*yy*yz - xy*yx*yz - xz*yx*yy)/2 + D[3][l+ 3,l+ -1] = sqrt(15)*(-xx**2*xy + 2*xx*yx*yy - xy**3 + 4*xy*xz**2 + xy*yx**2 + 3*xy*yy**2 - 4*xy*yz**2 - 8*xz*yy*yz)/20 + D[3][l+ 3,l+ 0] = sqrt(10)*(-3*xx**2*xz + 6*xx*yx*yz - 3*xy**2*xz + 6*xy*yy*yz + 2*xz**3 + 3*xz*yx**2 + 3*xz*yy**2 - 6*xz*yz**2)/20 + D[3][l+ 3,l+ 1] = sqrt(15)*(-xx**3 - xx*xy**2 + 4*xx*xz**2 + 3*xx*yx**2 + xx*yy**2 - 4*xx*yz**2 + 2*xy*yx*yy - 8*xz*yx*yz)/20 + D[3][l+ 3,l+ 2] = sqrt(6)*(xx**2*xz - 2*xx*yx*yz - xy**2*xz + 2*xy*yy*yz - xz*yx**2 + xz*yy**2)/4 + D[3][l+ 3,l+ 3] = xx**3/4 - 3*xx*xy**2/4 - 3*xx*yx**2/4 + 3*xx*yy**2/4 + 3*xy*yx*yy/2 + + if lmax < 4: + return D + + l=4 + D[4][l+ -4,l+-4] = xx**3*yy/2 + 3*xx**2*xy*yx/2 - 3*xx*xy**2*yy/2 - 3*xx*yx**2*yy/2 + xx*yy**3/2 - xy**3*yx/2 - xy*yx**3/2 + 3*xy*yx*yy**2/2 + D[4][l+ -4,l+-3] = sqrt(2)*(3*xx**2*xy*yz + 3*xx**2*xz*yy + 6*xx*xy*xz*yx - 6*xx*yx*yy*yz - xy**3*yz - 3*xy**2*xz*yy - 3*xy*yx**2*yz + 3*xy*yy**2*yz - 3*xz*yx**2*yy + xz*yy**3)/4 + D[4][l+ -4,l+-2] = sqrt(7)*(-xx**3*yy - 3*xx**2*xy*yx - 3*xx*xy**2*yy + 12*xx*xy*xz*yz + 6*xx*xz**2*yy + 3*xx*yx**2*yy + xx*yy**3 - 6*xx*yy*yz**2 - xy**3*yx + 6*xy*xz**2*yx + xy*yx**3 + 3*xy*yx*yy**2 - 6*xy*yx*yz**2 - 12*xz*yx*yy*yz)/14 + D[4][l+ -4,l+-1] = sqrt(14)*(-3*xx**2*xy*yz - 3*xx**2*xz*yy - 6*xx*xy*xz*yx + 6*xx*yx*yy*yz - 3*xy**3*yz - 9*xy**2*xz*yy + 12*xy*xz**2*yz + 3*xy*yx**2*yz + 9*xy*yy**2*yz - 4*xy*yz**3 + 4*xz**3*yy + 3*xz*yx**2*yy + 3*xz*yy**3 - 12*xz*yy*yz**2)/28 + D[4][l+ -4,l+ 0] = sqrt(35)*(3*xx**3*yx + 3*xx**2*xy*yy - 12*xx**2*xz*yz + 3*xx*xy**2*yx - 12*xx*xz**2*yx - 3*xx*yx**3 - 3*xx*yx*yy**2 + 12*xx*yx*yz**2 + 3*xy**3*yy - 12*xy**2*xz*yz - 12*xy*xz**2*yy - 3*xy*yx**2*yy - 3*xy*yy**3 + 12*xy*yy*yz**2 + 8*xz**3*yz + 12*xz*yx**2*yz + 12*xz*yy**2*yz - 8*xz*yz**3)/70 + D[4][l+ -4,l+ 1] = sqrt(14)*(-3*xx**3*yz - 9*xx**2*xz*yx - 3*xx*xy**2*yz - 6*xx*xy*xz*yy + 12*xx*xz**2*yz + 9*xx*yx**2*yz + 3*xx*yy**2*yz - 4*xx*yz**3 - 3*xy**2*xz*yx + 6*xy*yx*yy*yz + 4*xz**3*yx + 3*xz*yx**3 + 3*xz*yx*yy**2 - 12*xz*yx*yz**2)/28 + D[4][l+ -4,l+ 2] = sqrt(7)*(-xx**3*yx + 3*xx**2*xz*yz + 3*xx*xz**2*yx + xx*yx**3 - 3*xx*yx*yz**2 + xy**3*yy - 3*xy**2*xz*yz - 3*xy*xz**2*yy - xy*yy**3 + 3*xy*yy*yz**2 - 3*xz*yx**2*yz + 3*xz*yy**2*yz)/7 + D[4][l+ -4,l+ 3] = sqrt(2)*(xx**3*yz + 3*xx**2*xz*yx - 3*xx*xy**2*yz - 6*xx*xy*xz*yy - 3*xx*yx**2*yz + 3*xx*yy**2*yz - 3*xy**2*xz*yx + 6*xy*yx*yy*yz - xz*yx**3 + 3*xz*yx*yy**2)/4 + D[4][l+ -4,l+ 4] = xx**3*yx/2 - 3*xx**2*xy*yy/2 - 3*xx*xy**2*yx/2 - xx*yx**3/2 + 3*xx*yx*yy**2/2 + xy**3*yy/2 + 3*xy*yx**2*yy/2 - xy*yy**3/2 + D[4][l+ -3,l+-4] = sqrt(2)*(3*xx**2*yx*zy + 3*xx**2*yy*zx + 6*xx*xy*yx*zx - 6*xx*xy*yy*zy - 3*xy**2*yx*zy - 3*xy**2*yy*zx - yx**3*zy - 3*yx**2*yy*zx + 3*yx*yy**2*zy + yy**3*zx)/4 + D[4][l+ -3,l+-3] = 3*xx**2*yy*zz/4 + 3*xx**2*yz*zy/4 + 3*xx*xy*yx*zz/2 + 3*xx*xy*yz*zx/2 + 3*xx*xz*yx*zy/2 + 3*xx*xz*yy*zx/2 - 3*xy**2*yy*zz/4 - 3*xy**2*yz*zy/4 + 3*xy*xz*yx*zx/2 - 3*xy*xz*yy*zy/2 - 3*yx**2*yy*zz/4 - 3*yx**2*yz*zy/4 - 3*yx*yy*yz*zx/2 + yy**3*zz/4 + 3*yy**2*yz*zy/4 + D[4][l+ -3,l+-2] = sqrt(14)*(-3*xx**2*yx*zy - 3*xx**2*yy*zx - 6*xx*xy*yx*zx - 6*xx*xy*yy*zy + 12*xx*xy*yz*zz + 12*xx*xz*yy*zz + 12*xx*xz*yz*zy - 3*xy**2*yx*zy - 3*xy**2*yy*zx + 12*xy*xz*yx*zz + 12*xy*xz*yz*zx + 6*xz**2*yx*zy + 6*xz**2*yy*zx + yx**3*zy + 3*yx**2*yy*zx + 3*yx*yy**2*zy - 12*yx*yy*yz*zz - 6*yx*yz**2*zy + yy**3*zx - 6*yy*yz**2*zx)/28 + D[4][l+ -3,l+-1] = sqrt(7)*(-3*xx**2*yy*zz - 3*xx**2*yz*zy - 6*xx*xy*yx*zz - 6*xx*xy*yz*zx - 6*xx*xz*yx*zy - 6*xx*xz*yy*zx - 9*xy**2*yy*zz - 9*xy**2*yz*zy - 6*xy*xz*yx*zx - 18*xy*xz*yy*zy + 24*xy*xz*yz*zz + 12*xz**2*yy*zz + 12*xz**2*yz*zy + 3*yx**2*yy*zz + 3*yx**2*yz*zy + 6*yx*yy*yz*zx + 3*yy**3*zz + 9*yy**2*yz*zy - 12*yy*yz**2*zz - 4*yz**3*zy)/28 + D[4][l+ -3,l+ 0] = sqrt(70)*(9*xx**2*yx*zx + 3*xx**2*yy*zy - 12*xx**2*yz*zz + 6*xx*xy*yx*zy + 6*xx*xy*yy*zx - 24*xx*xz*yx*zz - 24*xx*xz*yz*zx + 3*xy**2*yx*zx + 9*xy**2*yy*zy - 12*xy**2*yz*zz - 24*xy*xz*yy*zz - 24*xy*xz*yz*zy - 12*xz**2*yx*zx - 12*xz**2*yy*zy + 24*xz**2*yz*zz - 3*yx**3*zx - 3*yx**2*yy*zy + 12*yx**2*yz*zz - 3*yx*yy**2*zx + 12*yx*yz**2*zx - 3*yy**3*zy + 12*yy**2*yz*zz + 12*yy*yz**2*zy - 8*yz**3*zz)/140 + D[4][l+ -3,l+ 1] = sqrt(7)*(-9*xx**2*yx*zz - 9*xx**2*yz*zx - 6*xx*xy*yy*zz - 6*xx*xy*yz*zy - 18*xx*xz*yx*zx - 6*xx*xz*yy*zy + 24*xx*xz*yz*zz - 3*xy**2*yx*zz - 3*xy**2*yz*zx - 6*xy*xz*yx*zy - 6*xy*xz*yy*zx + 12*xz**2*yx*zz + 12*xz**2*yz*zx + 3*yx**3*zz + 9*yx**2*yz*zx + 3*yx*yy**2*zz + 6*yx*yy*yz*zy - 12*yx*yz**2*zz + 3*yy**2*yz*zx - 4*yz**3*zx)/28 + D[4][l+ -3,l+ 2] = sqrt(14)*(-3*xx**2*yx*zx + 3*xx**2*yz*zz + 6*xx*xz*yx*zz + 6*xx*xz*yz*zx + 3*xy**2*yy*zy - 3*xy**2*yz*zz - 6*xy*xz*yy*zz - 6*xy*xz*yz*zy + 3*xz**2*yx*zx - 3*xz**2*yy*zy + yx**3*zx - 3*yx**2*yz*zz - 3*yx*yz**2*zx - yy**3*zy + 3*yy**2*yz*zz + 3*yy*yz**2*zy)/14 + D[4][l+ -3,l+ 3] = 3*xx**2*yx*zz/4 + 3*xx**2*yz*zx/4 - 3*xx*xy*yy*zz/2 - 3*xx*xy*yz*zy/2 + 3*xx*xz*yx*zx/2 - 3*xx*xz*yy*zy/2 - 3*xy**2*yx*zz/4 - 3*xy**2*yz*zx/4 - 3*xy*xz*yx*zy/2 - 3*xy*xz*yy*zx/2 - yx**3*zz/4 - 3*yx**2*yz*zx/4 + 3*yx*yy**2*zz/4 + 3*yx*yy*yz*zy/2 + 3*yy**2*yz*zx/4 + D[4][l+ -3,l+ 4] = sqrt(2)*(3*xx**2*yx*zx - 3*xx**2*yy*zy - 6*xx*xy*yx*zy - 6*xx*xy*yy*zx - 3*xy**2*yx*zx + 3*xy**2*yy*zy - yx**3*zx + 3*yx**2*yy*zy + 3*yx*yy**2*zx - yy**3*zy)/4 + D[4][l+ -2,l+-4] = sqrt(7)*(2*xx*yx*zx*zy + xx*yy*zx**2 - xx*yy*zy**2 + xy*yx*zx**2 - xy*yx*zy**2 - 2*xy*yy*zx*zy)/2 + D[4][l+ -2,l+-3] = sqrt(14)*(2*xx*yx*zy*zz + 2*xx*yy*zx*zz + 2*xx*yz*zx*zy + 2*xy*yx*zx*zz - 2*xy*yy*zy*zz + xy*yz*zx**2 - xy*yz*zy**2 + 2*xz*yx*zx*zy + xz*yy*zx**2 - xz*yy*zy**2)/4 + D[4][l+ -2,l+-2] = -xx*yx*zx*zy - xx*yy*zx**2/2 - xx*yy*zy**2/2 + xx*yy*zz**2 + 2*xx*yz*zy*zz - xy*yx*zx**2/2 - xy*yx*zy**2/2 + xy*yx*zz**2 - xy*yy*zx*zy + 2*xy*yz*zx*zz + 2*xz*yx*zy*zz + 2*xz*yy*zx*zz + 2*xz*yz*zx*zy + D[4][l+ -2,l+-1] = sqrt(2)*(-2*xx*yx*zy*zz - 2*xx*yy*zx*zz - 2*xx*yz*zx*zy - 2*xy*yx*zx*zz - 6*xy*yy*zy*zz - xy*yz*zx**2 - 3*xy*yz*zy**2 + 4*xy*yz*zz**2 - 2*xz*yx*zx*zy - xz*yy*zx**2 - 3*xz*yy*zy**2 + 4*xz*yy*zz**2 + 8*xz*yz*zy*zz)/4 + D[4][l+ -2,l+ 0] = sqrt(5)*(3*xx*yx*zx**2 + xx*yx*zy**2 - 4*xx*yx*zz**2 + 2*xx*yy*zx*zy - 8*xx*yz*zx*zz + 2*xy*yx*zx*zy + xy*yy*zx**2 + 3*xy*yy*zy**2 - 4*xy*yy*zz**2 - 8*xy*yz*zy*zz - 8*xz*yx*zx*zz - 8*xz*yy*zy*zz - 4*xz*yz*zx**2 - 4*xz*yz*zy**2 + 8*xz*yz*zz**2)/10 + D[4][l+ -2,l+ 1] = sqrt(2)*(-6*xx*yx*zx*zz - 2*xx*yy*zy*zz - 3*xx*yz*zx**2 - xx*yz*zy**2 + 4*xx*yz*zz**2 - 2*xy*yx*zy*zz - 2*xy*yy*zx*zz - 2*xy*yz*zx*zy - 3*xz*yx*zx**2 - xz*yx*zy**2 + 4*xz*yx*zz**2 - 2*xz*yy*zx*zy + 8*xz*yz*zx*zz)/4 + D[4][l+ -2,l+ 2] = -xx*yx*zx**2 + xx*yx*zz**2 + 2*xx*yz*zx*zz + xy*yy*zy**2 - xy*yy*zz**2 - 2*xy*yz*zy*zz + 2*xz*yx*zx*zz - 2*xz*yy*zy*zz + xz*yz*zx**2 - xz*yz*zy**2 + D[4][l+ -2,l+ 3] = sqrt(14)*(2*xx*yx*zx*zz - 2*xx*yy*zy*zz + xx*yz*zx**2 - xx*yz*zy**2 - 2*xy*yx*zy*zz - 2*xy*yy*zx*zz - 2*xy*yz*zx*zy + xz*yx*zx**2 - xz*yx*zy**2 - 2*xz*yy*zx*zy)/4 + D[4][l+ -2,l+ 4] = sqrt(7)*(xx*yx*zx**2 - xx*yx*zy**2 - 2*xx*yy*zx*zy - 2*xy*yx*zx*zy - xy*yy*zx**2 + xy*yy*zy**2)/2 + D[4][l+ -1,l+-4] = sqrt(14)*(3*yx*zx**2*zy - yx*zy**3 + yy*zx**3 - 3*yy*zx*zy**2)/4 + D[4][l+ -1,l+-3] = sqrt(7)*(6*yx*zx*zy*zz + 3*yy*zx**2*zz - 3*yy*zy**2*zz + 3*yz*zx**2*zy - yz*zy**3)/4 + D[4][l+ -1,l+-2] = sqrt(2)*(-3*yx*zx**2*zy - yx*zy**3 + 6*yx*zy*zz**2 - yy*zx**3 - 3*yy*zx*zy**2 + 6*yy*zx*zz**2 + 12*yz*zx*zy*zz)/4 + D[4][l+ -1,l+-1] = -3*yx*zx*zy*zz/2 - 3*yy*zx**2*zz/4 - 9*yy*zy**2*zz/4 + yy*zz**3 - 3*yz*zx**2*zy/4 - 3*yz*zy**3/4 + 3*yz*zy*zz**2 + D[4][l+ -1,l+ 0] = sqrt(10)*(3*yx*zx**3 + 3*yx*zx*zy**2 - 12*yx*zx*zz**2 + 3*yy*zx**2*zy + 3*yy*zy**3 - 12*yy*zy*zz**2 - 12*yz*zx**2*zz - 12*yz*zy**2*zz + 8*yz*zz**3)/20 + D[4][l+ -1,l+ 1] = -9*yx*zx**2*zz/4 - 3*yx*zy**2*zz/4 + yx*zz**3 - 3*yy*zx*zy*zz/2 - 3*yz*zx**3/4 - 3*yz*zx*zy**2/4 + 3*yz*zx*zz**2 + D[4][l+ -1,l+ 2] = sqrt(2)*(-yx*zx**3 + 3*yx*zx*zz**2 + yy*zy**3 - 3*yy*zy*zz**2 + 3*yz*zx**2*zz - 3*yz*zy**2*zz)/2 + D[4][l+ -1,l+ 3] = sqrt(7)*(3*yx*zx**2*zz - 3*yx*zy**2*zz - 6*yy*zx*zy*zz + yz*zx**3 - 3*yz*zx*zy**2)/4 + D[4][l+ -1,l+ 4] = sqrt(14)*(yx*zx**3 - 3*yx*zx*zy**2 - 3*yy*zx**2*zy + yy*zy**3)/4 + D[4][l+ 0,l+-4] = sqrt(35)*zx*zy*(zx - zy)*(zx + zy)/2 + D[4][l+ 0,l+-3] = sqrt(70)*zy*zz*(3*zx**2 - zy**2)/4 + D[4][l+ 0,l+-2] = sqrt(5)*zx*zy*(7*zz**2 - 1)/2 + D[4][l+ 0,l+-1] = sqrt(10)*zy*zz*(-3*zx**2 - 3*zy**2 + 4*zz**2)/4 + D[4][l+ 0,l+ 0] = 3*zx**4/8 + 3*zx**2*zy**2/4 - 3*zx**2*zz**2 + 3*zy**4/8 - 3*zy**2*zz**2 + zz**4 + D[4][l+ 0,l+ 1] = sqrt(10)*zx*zz*(-3*zx**2 - 3*zy**2 + 4*zz**2)/4 + D[4][l+ 0,l+ 2] = sqrt(5)*(zx - zy)*(zx + zy)*(7*zz**2 - 1)/4 + D[4][l+ 0,l+ 3] = sqrt(70)*zx*zz*(zx**2 - 3*zy**2)/4 + D[4][l+ 0,l+ 4] = sqrt(35)*(zx**4 - 6*zx**2*zy**2 + zy**4)/8 + D[4][l+ 1,l+-4] = sqrt(14)*(3*xx*zx**2*zy - xx*zy**3 + xy*zx**3 - 3*xy*zx*zy**2)/4 + D[4][l+ 1,l+-3] = sqrt(7)*(6*xx*zx*zy*zz + 3*xy*zx**2*zz - 3*xy*zy**2*zz + 3*xz*zx**2*zy - xz*zy**3)/4 + D[4][l+ 1,l+-2] = sqrt(2)*(-3*xx*zx**2*zy - xx*zy**3 + 6*xx*zy*zz**2 - xy*zx**3 - 3*xy*zx*zy**2 + 6*xy*zx*zz**2 + 12*xz*zx*zy*zz)/4 + D[4][l+ 1,l+-1] = -3*xx*zx*zy*zz/2 - 3*xy*zx**2*zz/4 - 9*xy*zy**2*zz/4 + xy*zz**3 - 3*xz*zx**2*zy/4 - 3*xz*zy**3/4 + 3*xz*zy*zz**2 + D[4][l+ 1,l+ 0] = sqrt(10)*(3*xx*zx**3 + 3*xx*zx*zy**2 - 12*xx*zx*zz**2 + 3*xy*zx**2*zy + 3*xy*zy**3 - 12*xy*zy*zz**2 - 12*xz*zx**2*zz - 12*xz*zy**2*zz + 8*xz*zz**3)/20 + D[4][l+ 1,l+ 1] = -9*xx*zx**2*zz/4 - 3*xx*zy**2*zz/4 + xx*zz**3 - 3*xy*zx*zy*zz/2 - 3*xz*zx**3/4 - 3*xz*zx*zy**2/4 + 3*xz*zx*zz**2 + D[4][l+ 1,l+ 2] = sqrt(2)*(-xx*zx**3 + 3*xx*zx*zz**2 + xy*zy**3 - 3*xy*zy*zz**2 + 3*xz*zx**2*zz - 3*xz*zy**2*zz)/2 + D[4][l+ 1,l+ 3] = sqrt(7)*(3*xx*zx**2*zz - 3*xx*zy**2*zz - 6*xy*zx*zy*zz + xz*zx**3 - 3*xz*zx*zy**2)/4 + D[4][l+ 1,l+ 4] = sqrt(14)*(xx*zx**3 - 3*xx*zx*zy**2 - 3*xy*zx**2*zy + xy*zy**3)/4 + D[4][l+ 2,l+-4] = sqrt(7)*(-xx**3*xy + 3*xx**2*zx*zy + xx*xy**3 + 3*xx*xy*zx**2 - 3*xx*xy*zy**2 - 3*xy**2*zx*zy + yx**3*yy - 3*yx**2*zx*zy - yx*yy**3 - 3*yx*yy*zx**2 + 3*yx*yy*zy**2 + 3*yy**2*zx*zy)/7 + D[4][l+ 2,l+-3] = sqrt(14)*(-3*xx**2*xy*xz + 3*xx**2*zy*zz + 6*xx*xy*zx*zz + 6*xx*xz*zx*zy + xy**3*xz - 3*xy**2*zy*zz + 3*xy*xz*zx**2 - 3*xy*xz*zy**2 + 3*yx**2*yy*yz - 3*yx**2*zy*zz - 6*yx*yy*zx*zz - 6*yx*yz*zx*zy - yy**3*yz + 3*yy**2*zy*zz - 3*yy*yz*zx**2 + 3*yy*yz*zy**2)/14 + D[4][l+ 2,l+-2] = xx**3*xy/7 - 3*xx**2*zx*zy/7 + xx*xy**3/7 - 6*xx*xy*xz**2/7 - 3*xx*xy*zx**2/7 - 3*xx*xy*zy**2/7 + 6*xx*xy*zz**2/7 + 12*xx*xz*zy*zz/7 - 3*xy**2*zx*zy/7 + 12*xy*xz*zx*zz/7 + 6*xz**2*zx*zy/7 - yx**3*yy/7 + 3*yx**2*zx*zy/7 - yx*yy**3/7 + 6*yx*yy*yz**2/7 + 3*yx*yy*zx**2/7 + 3*yx*yy*zy**2/7 - 6*yx*yy*zz**2/7 - 12*yx*yz*zy*zz/7 + 3*yy**2*zx*zy/7 - 12*yy*yz*zx*zz/7 - 6*yz**2*zx*zy/7 + D[4][l+ 2,l+-1] = sqrt(2)*(3*xx**2*xy*xz - 3*xx**2*zy*zz - 6*xx*xy*zx*zz - 6*xx*xz*zx*zy + 3*xy**3*xz - 9*xy**2*zy*zz - 4*xy*xz**3 - 3*xy*xz*zx**2 - 9*xy*xz*zy**2 + 12*xy*xz*zz**2 + 12*xz**2*zy*zz - 3*yx**2*yy*yz + 3*yx**2*zy*zz + 6*yx*yy*zx*zz + 6*yx*yz*zx*zy - 3*yy**3*yz + 9*yy**2*zy*zz + 4*yy*yz**3 + 3*yy*yz*zx**2 + 9*yy*yz*zy**2 - 12*yy*yz*zz**2 - 12*yz**2*zy*zz)/14 + D[4][l+ 2,l+ 0] = sqrt(5)*(-3*xx**4 - 6*xx**2*xy**2 + 24*xx**2*xz**2 + 18*xx**2*zx**2 + 6*xx**2*zy**2 - 24*xx**2*zz**2 + 24*xx*xy*zx*zy - 96*xx*xz*zx*zz - 3*xy**4 + 24*xy**2*xz**2 + 6*xy**2*zx**2 + 18*xy**2*zy**2 - 24*xy**2*zz**2 - 96*xy*xz*zy*zz - 8*xz**4 - 24*xz**2*zx**2 - 24*xz**2*zy**2 + 48*xz**2*zz**2 + 3*yx**4 + 6*yx**2*yy**2 - 24*yx**2*yz**2 - 18*yx**2*zx**2 - 6*yx**2*zy**2 + 24*yx**2*zz**2 - 24*yx*yy*zx*zy + 96*yx*yz*zx*zz + 3*yy**4 - 24*yy**2*yz**2 - 6*yy**2*zx**2 - 18*yy**2*zy**2 + 24*yy**2*zz**2 + 96*yy*yz*zy*zz + 8*yz**4 + 24*yz**2*zx**2 + 24*yz**2*zy**2 - 48*yz**2*zz**2)/140 + D[4][l+ 2,l+ 1] = sqrt(2)*(3*xx**3*xz - 9*xx**2*zx*zz + 3*xx*xy**2*xz - 6*xx*xy*zy*zz - 4*xx*xz**3 - 9*xx*xz*zx**2 - 3*xx*xz*zy**2 + 12*xx*xz*zz**2 - 3*xy**2*zx*zz - 6*xy*xz*zx*zy + 12*xz**2*zx*zz - 3*yx**3*yz + 9*yx**2*zx*zz - 3*yx*yy**2*yz + 6*yx*yy*zy*zz + 4*yx*yz**3 + 9*yx*yz*zx**2 + 3*yx*yz*zy**2 - 12*yx*yz*zz**2 + 3*yy**2*zx*zz + 6*yy*yz*zx*zy - 12*yz**2*zx*zz)/14 + D[4][l+ 2,l+ 2] = xx**4/14 - 3*xx**2*xz**2/7 - 3*xx**2*zx**2/7 + 3*xx**2*zz**2/7 + 12*xx*xz*zx*zz/7 - xy**4/14 + 3*xy**2*xz**2/7 + 3*xy**2*zy**2/7 - 3*xy**2*zz**2/7 - 12*xy*xz*zy*zz/7 + 3*xz**2*zx**2/7 - 3*xz**2*zy**2/7 - yx**4/14 + 3*yx**2*yz**2/7 + 3*yx**2*zx**2/7 - 3*yx**2*zz**2/7 - 12*yx*yz*zx*zz/7 + yy**4/14 - 3*yy**2*yz**2/7 - 3*yy**2*zy**2/7 + 3*yy**2*zz**2/7 + 12*yy*yz*zy*zz/7 - 3*yz**2*zx**2/7 + 3*yz**2*zy**2/7 + D[4][l+ 2,l+ 3] = sqrt(14)*(-xx**3*xz + 3*xx**2*zx*zz + 3*xx*xy**2*xz - 6*xx*xy*zy*zz + 3*xx*xz*zx**2 - 3*xx*xz*zy**2 - 3*xy**2*zx*zz - 6*xy*xz*zx*zy + yx**3*yz - 3*yx**2*zx*zz - 3*yx*yy**2*yz + 6*yx*yy*zy*zz - 3*yx*yz*zx**2 + 3*yx*yz*zy**2 + 3*yy**2*zx*zz + 6*yy*yz*zx*zy)/14 + D[4][l+ 2,l+ 4] = sqrt(7)*(-xx**4 + 6*xx**2*xy**2 + 6*xx**2*zx**2 - 6*xx**2*zy**2 - 24*xx*xy*zx*zy - xy**4 - 6*xy**2*zx**2 + 6*xy**2*zy**2 + yx**4 - 6*yx**2*yy**2 - 6*yx**2*zx**2 + 6*yx**2*zy**2 + 24*yx*yy*zx*zy + yy**4 + 6*yy**2*zx**2 - 6*yy**2*zy**2)/28 + D[4][l+ 3,l+-4] = sqrt(2)*(xx**3*zy + 3*xx**2*xy*zx - 3*xx*xy**2*zy - 3*xx*yx**2*zy - 6*xx*yx*yy*zx + 3*xx*yy**2*zy - xy**3*zx - 3*xy*yx**2*zx + 6*xy*yx*yy*zy + 3*xy*yy**2*zx)/4 + D[4][l+ 3,l+-3] = 3*xx**2*xy*zz/4 + 3*xx**2*xz*zy/4 + 3*xx*xy*xz*zx/2 - 3*xx*yx*yy*zz/2 - 3*xx*yx*yz*zy/2 - 3*xx*yy*yz*zx/2 - xy**3*zz/4 - 3*xy**2*xz*zy/4 - 3*xy*yx**2*zz/4 - 3*xy*yx*yz*zx/2 + 3*xy*yy**2*zz/4 + 3*xy*yy*yz*zy/2 - 3*xz*yx**2*zy/4 - 3*xz*yx*yy*zx/2 + 3*xz*yy**2*zy/4 + D[4][l+ 3,l+-2] = sqrt(14)*(-xx**3*zy - 3*xx**2*xy*zx - 3*xx*xy**2*zy + 12*xx*xy*xz*zz + 6*xx*xz**2*zy + 3*xx*yx**2*zy + 6*xx*yx*yy*zx + 3*xx*yy**2*zy - 12*xx*yy*yz*zz - 6*xx*yz**2*zy - xy**3*zx + 6*xy*xz**2*zx + 3*xy*yx**2*zx + 6*xy*yx*yy*zy - 12*xy*yx*yz*zz + 3*xy*yy**2*zx - 6*xy*yz**2*zx - 12*xz*yx*yy*zz - 12*xz*yx*yz*zy - 12*xz*yy*yz*zx)/28 + D[4][l+ 3,l+-1] = sqrt(7)*(-3*xx**2*xy*zz - 3*xx**2*xz*zy - 6*xx*xy*xz*zx + 6*xx*yx*yy*zz + 6*xx*yx*yz*zy + 6*xx*yy*yz*zx - 3*xy**3*zz - 9*xy**2*xz*zy + 12*xy*xz**2*zz + 3*xy*yx**2*zz + 6*xy*yx*yz*zx + 9*xy*yy**2*zz + 18*xy*yy*yz*zy - 12*xy*yz**2*zz + 4*xz**3*zy + 3*xz*yx**2*zy + 6*xz*yx*yy*zx + 9*xz*yy**2*zy - 24*xz*yy*yz*zz - 12*xz*yz**2*zy)/28 + D[4][l+ 3,l+ 0] = sqrt(70)*(3*xx**3*zx + 3*xx**2*xy*zy - 12*xx**2*xz*zz + 3*xx*xy**2*zx - 12*xx*xz**2*zx - 9*xx*yx**2*zx - 6*xx*yx*yy*zy + 24*xx*yx*yz*zz - 3*xx*yy**2*zx + 12*xx*yz**2*zx + 3*xy**3*zy - 12*xy**2*xz*zz - 12*xy*xz**2*zy - 3*xy*yx**2*zy - 6*xy*yx*yy*zx - 9*xy*yy**2*zy + 24*xy*yy*yz*zz + 12*xy*yz**2*zy + 8*xz**3*zz + 12*xz*yx**2*zz + 24*xz*yx*yz*zx + 12*xz*yy**2*zz + 24*xz*yy*yz*zy - 24*xz*yz**2*zz)/140 + D[4][l+ 3,l+ 1] = sqrt(7)*(-3*xx**3*zz - 9*xx**2*xz*zx - 3*xx*xy**2*zz - 6*xx*xy*xz*zy + 12*xx*xz**2*zz + 9*xx*yx**2*zz + 18*xx*yx*yz*zx + 3*xx*yy**2*zz + 6*xx*yy*yz*zy - 12*xx*yz**2*zz - 3*xy**2*xz*zx + 6*xy*yx*yy*zz + 6*xy*yx*yz*zy + 6*xy*yy*yz*zx + 4*xz**3*zx + 9*xz*yx**2*zx + 6*xz*yx*yy*zy - 24*xz*yx*yz*zz + 3*xz*yy**2*zx - 12*xz*yz**2*zx)/28 + D[4][l+ 3,l+ 2] = sqrt(14)*(-xx**3*zx + 3*xx**2*xz*zz + 3*xx*xz**2*zx + 3*xx*yx**2*zx - 6*xx*yx*yz*zz - 3*xx*yz**2*zx + xy**3*zy - 3*xy**2*xz*zz - 3*xy*xz**2*zy - 3*xy*yy**2*zy + 6*xy*yy*yz*zz + 3*xy*yz**2*zy - 3*xz*yx**2*zz - 6*xz*yx*yz*zx + 3*xz*yy**2*zz + 6*xz*yy*yz*zy)/14 + D[4][l+ 3,l+ 3] = xx**3*zz/4 + 3*xx**2*xz*zx/4 - 3*xx*xy**2*zz/4 - 3*xx*xy*xz*zy/2 - 3*xx*yx**2*zz/4 - 3*xx*yx*yz*zx/2 + 3*xx*yy**2*zz/4 + 3*xx*yy*yz*zy/2 - 3*xy**2*xz*zx/4 + 3*xy*yx*yy*zz/2 + 3*xy*yx*yz*zy/2 + 3*xy*yy*yz*zx/2 - 3*xz*yx**2*zx/4 + 3*xz*yx*yy*zy/2 + 3*xz*yy**2*zx/4 + D[4][l+ 3,l+ 4] = sqrt(2)*(xx**3*zx - 3*xx**2*xy*zy - 3*xx*xy**2*zx - 3*xx*yx**2*zx + 6*xx*yx*yy*zy + 3*xx*yy**2*zx + xy**3*zy + 3*xy*yx**2*zy + 6*xy*yx*yy*zx - 3*xy*yy**2*zy)/4 + D[4][l+ 4,l+-4] = xx**3*xy/2 - 3*xx**2*yx*yy/2 - xx*xy**3/2 - 3*xx*xy*yx**2/2 + 3*xx*xy*yy**2/2 + 3*xy**2*yx*yy/2 + yx**3*yy/2 - yx*yy**3/2 + D[4][l+ 4,l+-3] = sqrt(2)*(3*xx**2*xy*xz - 3*xx**2*yy*yz - 6*xx*xy*yx*yz - 6*xx*xz*yx*yy - xy**3*xz + 3*xy**2*yy*yz - 3*xy*xz*yx**2 + 3*xy*xz*yy**2 + 3*yx**2*yy*yz - yy**3*yz)/4 + D[4][l+ 4,l+-2] = sqrt(7)*(-xx**3*xy + 3*xx**2*yx*yy - xx*xy**3 + 6*xx*xy*xz**2 + 3*xx*xy*yx**2 + 3*xx*xy*yy**2 - 6*xx*xy*yz**2 - 12*xx*xz*yy*yz + 3*xy**2*yx*yy - 12*xy*xz*yx*yz - 6*xz**2*yx*yy - yx**3*yy - yx*yy**3 + 6*yx*yy*yz**2)/14 + D[4][l+ 4,l+-1] = sqrt(14)*(-3*xx**2*xy*xz + 3*xx**2*yy*yz + 6*xx*xy*yx*yz + 6*xx*xz*yx*yy - 3*xy**3*xz + 9*xy**2*yy*yz + 4*xy*xz**3 + 3*xy*xz*yx**2 + 9*xy*xz*yy**2 - 12*xy*xz*yz**2 - 12*xz**2*yy*yz - 3*yx**2*yy*yz - 3*yy**3*yz + 4*yy*yz**3)/28 + D[4][l+ 4,l+ 0] = sqrt(35)*(3*xx**4 + 6*xx**2*xy**2 - 24*xx**2*xz**2 - 18*xx**2*yx**2 - 6*xx**2*yy**2 + 24*xx**2*yz**2 - 24*xx*xy*yx*yy + 96*xx*xz*yx*yz + 3*xy**4 - 24*xy**2*xz**2 - 6*xy**2*yx**2 - 18*xy**2*yy**2 + 24*xy**2*yz**2 + 96*xy*xz*yy*yz + 8*xz**4 + 24*xz**2*yx**2 + 24*xz**2*yy**2 - 48*xz**2*yz**2 + 3*yx**4 + 6*yx**2*yy**2 - 24*yx**2*yz**2 + 3*yy**4 - 24*yy**2*yz**2 + 8*yz**4)/280 + D[4][l+ 4,l+ 1] = sqrt(14)*(-3*xx**3*xz + 9*xx**2*yx*yz - 3*xx*xy**2*xz + 6*xx*xy*yy*yz + 4*xx*xz**3 + 9*xx*xz*yx**2 + 3*xx*xz*yy**2 - 12*xx*xz*yz**2 + 3*xy**2*yx*yz + 6*xy*xz*yx*yy - 12*xz**2*yx*yz - 3*yx**3*yz - 3*yx*yy**2*yz + 4*yx*yz**3)/28 + D[4][l+ 4,l+ 2] = sqrt(7)*(-xx**4 + 6*xx**2*xz**2 + 6*xx**2*yx**2 - 6*xx**2*yz**2 - 24*xx*xz*yx*yz + xy**4 - 6*xy**2*xz**2 - 6*xy**2*yy**2 + 6*xy**2*yz**2 + 24*xy*xz*yy*yz - 6*xz**2*yx**2 + 6*xz**2*yy**2 - yx**4 + 6*yx**2*yz**2 + yy**4 - 6*yy**2*yz**2)/28 + D[4][l+ 4,l+ 3] = sqrt(2)*(xx**3*xz - 3*xx**2*yx*yz - 3*xx*xy**2*xz + 6*xx*xy*yy*yz - 3*xx*xz*yx**2 + 3*xx*xz*yy**2 + 3*xy**2*yx*yz + 6*xy*xz*yx*yy + yx**3*yz - 3*yx*yy**2*yz)/4 + D[4][l+ 4,l+ 4] = xx**4/8 - 3*xx**2*xy**2/4 - 3*xx**2*yx**2/4 + 3*xx**2*yy**2/4 + 3*xx*xy*yx*yy + xy**4/8 + 3*xy**2*yx**2/4 - 3*xy**2*yy**2/4 + yx**4/8 - 3*yx**2*yy**2/4 + yy**4/8 + + if lmax > 4: + raise NotImplementedError(f'Too a big {lmax=}') + + return D def Dmatrix_for_z(z, lmax, order='xyz'): """Generates Wigner D-matrices for rotation that aligns z-axis with given vector. - Convenience function that combines new_xy_axis() and Dmatrix() to compute + Wrapper function that combines new_xy_axis() and Dmatrix() to compute rotation matrices for a rotation defined only by the target z-direction. Args: z (numpy ndarray): 3D vector defining the target z-axis direction. lmax (int): Maximum angular momentum (supports lmax <= 4). - order (str): Ordering convention for spherical harmonics. Defaults to 'xyz'. + order (str): Ordering convention for l=1 spherical harmonics. Defaults to 'xyz'. Returns: - list: List of Wigner D-matrices for l = 0 to lmax. + list: List of Wigner D-matrices for l=0 to lmax. Raises: NotImplementedError: If lmax > 4. diff --git a/qstack/spahm/rho/compute_rho_spahm.py b/qstack/spahm/rho/compute_rho_spahm.py index 147f26b4..71560b03 100644 --- a/qstack/spahm/rho/compute_rho_spahm.py +++ b/qstack/spahm/rho/compute_rho_spahm.py @@ -15,6 +15,11 @@ def spahm_a_b(rep_type, mols, dms, pairfile=None, dump_and_exit=False, same_basis=False, only_z=None): """Computes SPAHM(a) or SPAHM(b) representations for a set of molecules. + Reference: + K. R. Briling, Y. Calvino Alonso, A. Fabrizio, C. Corminboeuf, + "SPAHM(a,b): Encoding the density information from guess Hamiltonian in quantum machine learning representations", + J. Chem. Theory Comput. 20 1108–1117 (2024), doi:10.1021/acs.jctc.3c01040. + Args: rep_type (str): Representation type: 'atom' for SPAHM(a) or 'bond' for SPAHM(b). mols (list): List of pyscf Mole objects. From 2bf05bcb4a72ceeca17010cb3071fe1f3b4544e9 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Wed, 5 Nov 2025 11:02:14 +0100 Subject: [PATCH 12/23] More docs and formatting --- qstack/fields/decomposition.py | 42 ++++----- qstack/mathutils/xyz_integrals_float.py | 92 ++++++++++++++------ qstack/mathutils/xyz_integrals_sym.py | 95 +++++++++++++------- qstack/regression/kernel_utils.py | 52 +++++------ qstack/regression/local_kernels.py | 56 ++++++------ qstack/spahm/compute_spahm.py | 2 +- qstack/spahm/rho/lowdin.py | 110 ++++++++++++------------ qstack/spahm/rho/utils.py | 20 ++--- 8 files changed, 270 insertions(+), 199 deletions(-) diff --git a/qstack/fields/decomposition.py b/qstack/fields/decomposition.py index 61a7ef2c..cbfaf65d 100644 --- a/qstack/fields/decomposition.py +++ b/qstack/fields/decomposition.py @@ -106,27 +106,27 @@ def get_coeff(dm, eri2c, eri3c, slices=None): return c def _get_inv_metric(mol, metric, v): - """Computes the inverse metric applied to a vector. - - Args: - mol (pyscf Mole): pyscf Mole object. - metric (str or numpy ndarray): Metric type ('unit', 'overlap', 'coulomb') or a metric matrix. - v (numpy ndarray): Vector to apply the inverse metric to. - - Returns: - numpy ndarray: Result of applying the inverse metric to the input vector. - """ - if isinstance(metric, str): - metric = metric.lower() - if metric in ['u', 'unit', '1']: - return v - elif metric in ['s', 'overlap', 'ovlp']: - O = mol.intor('int1e_ovlp_sph') - elif metric in ['j', 'coulomb']: - O = mol.intor('int2c2e_sph') - else: - O = metric - return scipy.linalg.solve(O, v, assume_a='pos') + """Computes the inverse metric applied to a vector. + + Args: + mol (pyscf Mole): pyscf Mole object. + metric (str or numpy ndarray): Metric type ('unit', 'overlap', 'coulomb') or a metric matrix. + v (numpy ndarray): Vector to apply the inverse metric to. + + Returns: + numpy ndarray: Result of applying the inverse metric to the input vector. + """ + if isinstance(metric, str): + metric = metric.lower() + if metric in ['u', 'unit', '1']: + return v + elif metric in ['s', 'overlap', 'ovlp']: + O = mol.intor('int1e_ovlp_sph') + elif metric in ['j', 'coulomb']: + O = mol.intor('int2c2e_sph') + else: + O = metric + return scipy.linalg.solve(O, v, assume_a='pos') def correct_N_atomic(mol, N, c0, metric='u'): diff --git a/qstack/mathutils/xyz_integrals_float.py b/qstack/mathutils/xyz_integrals_float.py index 95fae4a2..a57a2e22 100755 --- a/qstack/mathutils/xyz_integrals_float.py +++ b/qstack/mathutils/xyz_integrals_float.py @@ -2,36 +2,72 @@ import sys -def xyz(n,m,k): -# computes the integral of x^2k y^2n z^2m over a sphere - k,n,m = sorted([k,n,m], reverse=True) - # k>=n>=m - if n==0: - xyz = 2.0 * (1.0 - (2.0*k-1.0)/(2.0*k+1.0)) - else: - xyz = (2*k-1) * I23(n,m,k) - return xyz +def xyz(n, m, k): + """Computes the integral of x^2k y^2n z^2m over a unit sphere. + + Args: + n (int): Half of power of y. + m (int): Half of power of z. + k (int): Half of power of x. + + Note: + The argument order does not matter. + + Returns: + float: The value of the integral. + """ + + k,n,m = sorted([k,n,m], reverse=True) + if n==0: # both n and m are 0 + xyz = 2.0 * (1.0 - (2.0*k-1.0)/(2.0*k+1.0)) + else: + xyz = (2*k-1) * I23(n,m,k) + return xyz + def I23(n,m,k): - I23 = 0.0 - for l in range(n+m+2): - I23 = I23 + (-1)**l * trinomial( n+m+1, n+m+1-l, l) / (2.0*l+2.0*k-1.0) - I23 = I23 / ( (2*n+1) * 2**(2*n+2*m) ) - for l in range(1, n+2): - I23 = I23 * (2*n+3-2*l) / (2*m-1+2*l) - return I23 - -def trinomial(k1,k2,k3): -# (k1+k2+k3)! / (k1! * k2! * k3!) - k1,k2,k3 = sorted([k1,k2,k3], reverse=True) - trinom = 1.0 - for k in range(1,k2+1): - trinom = trinom * (k+k1) / k - for k in range(1,k3+1): - trinom = trinom * (k+k1+k2) / k - return trinom + """Compute an auxiliary integral needed for the integral over the unit sphere. + + Args: + n (int) + m (int) + k (int) + + Returns: + float: The value of the integral. + """ + + I23 = 0.0 + for l in range(n+m+2): + I23 = I23 + (-1)**l * trinomial( n+m+1, n+m+1-l, l) / (2.0*l+2.0*k-1.0) + I23 = I23 / ( (2*n+1) * 2**(2*n+2*m) ) + for l in range(1, n+2): + I23 = I23 * (2*n+3-2*l) / (2*m-1+2*l) + return I23 + + +def trinomial(k1, k2, k3): + """Computes the trinomial coefficient (k1+k2+k3)! / (k1! * k2! * k3!). + + Args: + k1 (int) + k2 (int) + k3 (int) + + Returns: + float: The value of the trinomial coefficient. + """ + + k1,k2,k3 = sorted([k1,k2,k3], reverse=True) + trinom = 1.0 + for k in range(1,k2+1): + trinom = trinom * (k+k1) / k + for k in range(1,k3+1): + trinom = trinom * (k+k1+k2) / k + return trinom + if __name__ == "__main__": - k,n,m = map(int, sys.argv[1:4]) - print(f"{xyz(k,n,m):.15f} π") + k,n,m = map(int, sys.argv[1:4]) + print(f"{xyz(k,n,m):.15f} π") diff --git a/qstack/mathutils/xyz_integrals_sym.py b/qstack/mathutils/xyz_integrals_sym.py index 0dd5381e..527c17b1 100755 --- a/qstack/mathutils/xyz_integrals_sym.py +++ b/qstack/mathutils/xyz_integrals_sym.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import sys - try: import sympy except ImportError: @@ -13,36 +12,72 @@ """) raise -def xyz(n,m,k): -# computes the integral of x^2k y^2n z^2m over a sphere - k,n,m = sorted([k,n,m], reverse=True) - # k>=n>=m - if n==0: - K = sympy.symbols('K') - xyz = (2 * (1 - (2*K-1)/(2*K+1))).subs(K,k) - else: - xyz = (2*k-1) * I23(n,m,k) - return xyz * sympy.pi + +def xyz(n, m, k): + """Computes the integral of x^2k y^2n z^2m over a unit sphere. + + Args: + n (int): Half of power of y. + m (int): Half of power of z. + k (int): Half of power of x. + + Note: + The argument order does not matter. + + Returns: + sympy.Expr: The value of the integral. + """ + + k,n,m = sorted([k,n,m], reverse=True) + if n==0: # both n and m are 0 + K = sympy.symbols('K') + xyz = (2 * (1 - (2*K-1)/(2*K+1))).subs(K,k) + else: + xyz = (2*k-1) * I23(n,m,k) + return xyz * sympy.pi + def I23(n,m,k): - I23 = 0.0 - K = sympy.symbols('K') - for l in range(n+m+2): - I23 = I23 + (-1)**l * trinomial( n+m+1, n+m+1-l, l) / (2*l+2*K-1) - I23 = I23.subs(K,k) - I23 = I23 / ( (2*n+1) * 2**(2*n+2*m) ) - for l in range(1, n+2): - I23 = I23 * (2*n+3-2*l) / (2*m-1+2*l) - return I23 - -def trinomial(k1,k2,k3): -# (k1+k2+k3)! / (k1! * k2! * k3!) - k1,k2,k3 = sorted([k1,k2,k3]) - trinom = sympy.FallingFactorial(k1+k2+k3, k3) / (sympy.factorial(k1)*sympy.factorial(k2)) - return trinom + """Computes an auxiliary integral needed for the integral over the unit sphere. -if __name__ == "__main__": - k,n,m = map(int, sys.argv[1:4]) - x = xyz(k,n,m) - print(f"{x:.15f} = {x}") + Args: + n (int) + m (int) + k (int) + + Returns: + sympy.Expr: The value of the integral. + """ + I23 = 0.0 + K = sympy.symbols('K') + for l in range(n+m+2): + I23 = I23 + (-1)**l * trinomial( n+m+1, n+m+1-l, l) / (2*l+2*K-1) + I23 = I23.subs(K,k) + I23 = I23 / ( (2*n+1) * 2**(2*n+2*m) ) + for l in range(1, n+2): + I23 = I23 * (2*n+3-2*l) / (2*m-1+2*l) + return I23 + + +def trinomial(k1, k2, k3): + """Computes the trinomial coefficient (k1+k2+k3)! / (k1! * k2! * k3!). + + Args: + k1 (int) + k2 (int) + k3 (int) + + Returns: + sympy.Expr: The value of the trinomial coefficient. + """ + + k1,k2,k3 = sorted([k1,k2,k3]) + trinom = sympy.FallingFactorial(k1+k2+k3, k3) / (sympy.factorial(k1)*sympy.factorial(k2)) + return trinom + + +if __name__ == "__main__": + k,n,m = map(int, sys.argv[1:4]) + x = xyz(k,n,m) + print(f"{x:.15f} = {x}") diff --git a/qstack/regression/kernel_utils.py b/qstack/regression/kernel_utils.py index 42ff4a72..84d0eff7 100644 --- a/qstack/regression/kernel_utils.py +++ b/qstack/regression/kernel_utils.py @@ -24,20 +24,20 @@ def __call__(self, _parser, namespace, values, _option_string=None): defaults = SimpleNamespace( - sigma=32.0, - eta=1e-5, - kernel='L', - gkernel=None, - gdict={'alpha':1.0, 'normalize':1, 'verbose':0}, - test_size=0.2, - n_rep=5, - splits=5, - train_size=[0.125, 0.25, 0.5, 0.75, 1.0], - etaarr=np.logspace(-10, 0, 5).tolist(), - sigmaarr=np.logspace(0,6, 13).tolist(), - sigmaarr_mult=np.logspace(0,2, 5).tolist(), - random_state=0, - ) + sigma=32.0, + eta=1e-5, + kernel='L', + gkernel=None, + gdict={'alpha':1.0, 'normalize':1, 'verbose':0}, + test_size=0.2, + n_rep=5, + splits=5, + train_size=[0.125, 0.25, 0.5, 0.75, 1.0], + etaarr=np.logspace(-10, 0, 5).tolist(), + sigmaarr=np.logspace(0,6, 13).tolist(), + sigmaarr_mult=np.logspace(0,2, 5).tolist(), + random_state=0, + ) def get_local_kernel(arg): @@ -89,22 +89,22 @@ def get_global_kernel(arg, local_kernel): def get_kernel(arg, arg2=None): - """Returns the appropriate kernel function based on arguments. + """Returns the appropriate kernel function based on arguments. - Args: - arg (str): Local kernel name. - arg2 (tuple, optional): If provided, tuple of (global_kernel_name, options) for global kernel. Defaults to None. + Args: + arg (str): Local kernel name. + arg2 (tuple, optional): If provided, tuple of (global_kernel_name, options) for global kernel. Defaults to None. - Returns: - callable: Kernel function (local or global). - """ + Returns: + callable: Kernel function (local or global). + """ - local_kernel = get_local_kernel(arg) + local_kernel = get_local_kernel(arg) - if arg2 is None or arg2[0] is None: - return local_kernel - else: - return get_global_kernel(arg2, local_kernel) + if arg2 is None or arg2[0] is None: + return local_kernel + else: + return get_global_kernel(arg2, local_kernel) def train_test_split_idx(y, idx_test=None, idx_train=None, diff --git a/qstack/regression/local_kernels.py b/qstack/regression/local_kernels.py index d1419d25..eb3ffcf0 100644 --- a/qstack/regression/local_kernels.py +++ b/qstack/regression/local_kernels.py @@ -6,34 +6,34 @@ def custom_laplacian_kernel(X, Y, gamma): - """Computes Laplacian kernel between X and Y using Python implementation. - - K(x, y) = exp(-gamma * ||x - y||_1) - - Args: - X (numpy ndarray): First set of samples (can be multi-dimensional). - Y (numpy ndarray): Second set of samples. - gamma (float): Kernel width parameter. - - Returns: - numpy ndarray: Laplacian kernel matrix of shape (len(X), len(Y)). - - Raises: - RuntimeError: If X and Y have incompatible shapes. - """ - if X.shape[1:] != Y.shape[1:]: - raise RuntimeError(f"Incompatible shapes {X.shape} and {Y.shape}") - def cdist(X, Y): - K = np.zeros((len(X),len(Y))) - for i,x in enumerate(X): - x = np.array([x] * len(Y)) - d = np.abs(x-Y) - d = np.sum(d, axis=tuple(range(1, len(d.shape)))) - K[i,:] = d - return K - K = -gamma * cdist(X, Y) - np.exp(K, out=K) - return K + """Computes Laplacian kernel between X and Y using Python implementation. + + K(x, y) = exp(-gamma * ||x - y||_1) + + Args: + X (numpy ndarray): First set of samples (can be multi-dimensional). + Y (numpy ndarray): Second set of samples. + gamma (float): Kernel width parameter. + + Returns: + numpy ndarray: Laplacian kernel matrix of shape (len(X), len(Y)). + + Raises: + RuntimeError: If X and Y have incompatible shapes. + """ + if X.shape[1:] != Y.shape[1:]: + raise RuntimeError(f"Incompatible shapes {X.shape} and {Y.shape}") + def cdist(X, Y): + K = np.zeros((len(X),len(Y))) + for i,x in enumerate(X): + x = np.array([x] * len(Y)) + d = np.abs(x-Y) + d = np.sum(d, axis=tuple(range(1, len(d.shape)))) + K[i,:] = d + return K + K = -gamma * cdist(X, Y) + np.exp(K, out=K) + return K def custom_C_kernels(kernel_function, return_distance_function=False): diff --git a/qstack/spahm/compute_spahm.py b/qstack/spahm/compute_spahm.py index 469c6963..e36d4bf9 100644 --- a/qstack/spahm/compute_spahm.py +++ b/qstack/spahm/compute_spahm.py @@ -1,6 +1,6 @@ import numpy as np from pyscf import scf, grad -from .guesses import solveF, get_guess, get_occ, get_dm, eigenvalue_grad, get_guess_g +from .guesses import solveF, get_guess, get_occ, eigenvalue_grad, get_guess_g def get_guess_orbitals(mol, guess, xc="pbe", field=None, return_ao_dip=False): diff --git a/qstack/spahm/rho/lowdin.py b/qstack/spahm/rho/lowdin.py index 6463d11e..76518de4 100644 --- a/qstack/spahm/rho/lowdin.py +++ b/qstack/spahm/rho/lowdin.py @@ -1,69 +1,69 @@ import numpy as np class Lowdin_split: - """Löwdin orthogonalization for density matrix partitioning. + """Löwdin orthogonalization for density matrix partitioning. - Transforms density matrix to orthogonal basis using symmetric orthogonalization, - enabling clean atomic and bond partitioning of electron density. + Transforms density matrix to orthogonal basis using symmetric orthogonalization, + enabling clean atomic and bond partitioning of electron density. - Attributes: - S (numpy ndarray): Overlap matrix in AO basis. - S12 (numpy ndarray): Square root of overlap matrix (S^{1/2}). - S12i (numpy ndarray): Inverse square root of overlap matrix (S^{-1/2}). - mol (pyscf Mole): pyscf Mole object. - dm (numpy ndarray): Original density matrix in AO basis. - dmL (numpy ndarray): Löwdin-orthogonalized density matrix. - """ - - def __init__(self, mol, dm): - """Initializes Löwdin split with molecule and density matrix. - - Args: + Attributes: + S (numpy ndarray): Overlap matrix in AO basis. + S12 (numpy ndarray): Square root of overlap matrix (S^{1/2}). + S12i (numpy ndarray): Inverse square root of overlap matrix (S^{-1/2}). mol (pyscf Mole): pyscf Mole object. - dm (numpy ndarray): Density matrix in AO basis. + dm (numpy ndarray): Original density matrix in AO basis. + dmL (numpy ndarray): Löwdin-orthogonalized density matrix. """ - S = mol.intor_symmetric('int1e_ovlp') - S12,S12i = self.sqrtm(S) - self.S = S - self.S12 = S12 - self.S12i = S12i - self.mol = mol - self.dm = dm - self.dmL = S12 @ dm @ S12 - def sqrtm(self, m): - """Computes matrix square root and inverse square root via eigendecomposition. + def __init__(self, mol, dm): + """Initializes Löwdin split with molecule and density matrix. - Args: - m (numpy ndarray): Symmetric positive-definite matrix. + Args: + mol (pyscf Mole): pyscf Mole object. + dm (numpy ndarray): Density matrix in AO basis. + """ + S = mol.intor_symmetric('int1e_ovlp') + S12,S12i = self.sqrtm(S) + self.S = S + self.S12 = S12 + self.S12i = S12i + self.mol = mol + self.dm = dm + self.dmL = S12 @ dm @ S12 - Returns: - tuple: (m^{1/2}, m^{-1/2}) both symmetrized. - """ - e,b = np.linalg.eigh(m) - e = np.sqrt(e) - sm = b @ np.diag(e ) @ b.T - sm1 = b @ np.diag(1.0/e) @ b.T - return (sm+sm.T)*0.5, (sm1+sm1.T)*0.5 + def sqrtm(self, m): + """Computes matrix square root and inverse square root via eigendecomposition. - def get_bond(self, at1idx, at2idx): - """Extracts bond density matrix for an atom pair. + Args: + m (numpy ndarray): Symmetric positive-definite matrix. - Isolates the density matrix components corresponding to interactions - between two atoms, transforming back to AO basis. + Returns: + tuple: (m^{1/2}, m^{-1/2}) both symmetrized. + """ + e,b = np.linalg.eigh(m) + e = np.sqrt(e) + sm = b @ np.diag(e ) @ b.T + sm1 = b @ np.diag(1.0/e) @ b.T + return (sm+sm.T)*0.5, (sm1+sm1.T)*0.5 - Args: - at1idx (int): Index of first atom. - at2idx (int): Index of second atom. + def get_bond(self, at1idx, at2idx): + """Extracts bond density matrix for an atom pair. - Returns: - numpy ndarray: Bond density matrix in AO basis (2D array). - """ - mo1idx = range(*self.mol.aoslice_nr_by_atom()[at1idx][2:]) - mo2idx = range(*self.mol.aoslice_nr_by_atom()[at2idx][2:]) - ix1 = np.ix_(mo1idx,mo2idx) - ix2 = np.ix_(mo2idx,mo1idx) - dmL_bond = np.zeros_like(self.dmL) - dmL_bond[ix1] = self.dmL[ix1] - dmL_bond[ix2] = self.dmL[ix2] - return self.S12i @ dmL_bond @ self.S12i + Isolates the density matrix components corresponding to interactions + between two atoms, transforming back to AO basis. + + Args: + at1idx (int): Index of first atom. + at2idx (int): Index of second atom. + + Returns: + numpy ndarray: Bond density matrix in AO basis (2D array). + """ + mo1idx = range(*self.mol.aoslice_nr_by_atom()[at1idx][2:]) + mo2idx = range(*self.mol.aoslice_nr_by_atom()[at2idx][2:]) + ix1 = np.ix_(mo1idx,mo2idx) + ix2 = np.ix_(mo2idx,mo1idx) + dmL_bond = np.zeros_like(self.dmL) + dmL_bond[ix1] = self.dmL[ix1] + dmL_bond[ix2] = self.dmL[ix2] + return self.S12i @ dmL_bond @ self.S12i diff --git a/qstack/spahm/rho/utils.py b/qstack/spahm/rho/utils.py index 4ec72d65..ca8dbcae 100644 --- a/qstack/spahm/rho/utils.py +++ b/qstack/spahm/rho/utils.py @@ -8,16 +8,16 @@ from qstack import compound defaults = SimpleNamespace( - guess='LB', - model='Lowdin-long-x', - basis='minao', - auxbasis='ccpvdzjkfit', - omod=['alpha', 'beta'], - elements=["H", "C", "N", "O", "S"], - cutoff=5.0, - xc='hf', - bpath=os.path.dirname(__file__)+'/basis_opt', - ) + guess='LB', + model='Lowdin-long-x', + basis='minao', + auxbasis='ccpvdzjkfit', + omod=['alpha', 'beta'], + elements=["H", "C", "N", "O", "S"], + cutoff=5.0, + xc='hf', + bpath=os.path.dirname(__file__)+'/basis_opt', + ) def get_chsp(fname, n): From b6c74d74f520f7a21cdb373dcdefb3aa5c78e7ca Mon Sep 17 00:00:00 2001 From: Ksenia Date: Wed, 5 Nov 2025 11:02:33 +0100 Subject: [PATCH 13/23] Refactor GWH and LB2020 guess --- qstack/spahm/LB2020guess.py | 937 ++++++++++++++++++------------------ qstack/spahm/guesses.py | 298 ++++++------ tests/test_spahm.py | 15 +- 3 files changed, 645 insertions(+), 605 deletions(-) diff --git a/qstack/spahm/LB2020guess.py b/qstack/spahm/LB2020guess.py index 4f8f1f5b..fcf1b82f 100644 --- a/qstack/spahm/LB2020guess.py +++ b/qstack/spahm/LB2020guess.py @@ -2,462 +2,485 @@ import numpy as np from pyscf import data, df, scf -""" Taken from https://github.com/briling/aepm and modified """ class LB2020guess: - - acfile_default = './parameters_HF.dat' - - def __init__(self, fname=None, parameters='HF'): - self.get_basis(fname, parameters) - - def renormalize(self, a): - """Computes renormalization factor for Gaussian basis functions. - - Args: - a (float): Gaussian exponent. - - Returns: - float: Renormalization factor (0.5*a/pi)^(3/4). - """ - # 1/norm1 = \int \exp(-a*r^2) d^3 r => norm1 = (a/pi)^(3/2) - # 1/norm2^2 = \int (\exp(-a*r^2))^2 d^3 r => norm2 = (2.0*a/pi)^(3/4) - # coefficient = norm1 / norm2 = (0.5*a/pi)^(3/4) - x = np.sqrt(np.sqrt(0.5*a/np.pi)) - return x*x*x - - def read_ac(self, fname): - """Reads auxiliary basis parameters from file. - - Args: - fname (str, optional): Path to parameter file. If None, uses default. - - Returns: - dict: Dictionary mapping element symbols to basis function parameters. - """ - if fname is None: - fname = self.acfile_default - with open(fname) as f: - lines = f.readlines() - basis = {'H': []} - il=0 - while il 0: - zrest = zcore - bad_idx = [] - for iprim in range(len(acbasis[q])): - if np.isclose(zrest, 0): - break - a, c = acbasis[q][iprim][1] - renorm = self.renormalize(a) - c /= renorm # convert back to charge units: sum {c} == charge(q) - dc = min(c, zrest) - if np.isclose(c, dc): - bad_idx.append(iprim) - else: - acbasis[q][iprim][1][1] = (c-dc)*renorm - zrest -= dc - for i in bad_idx[::-1]: - acbasis[q].pop(i) - return acbasis - - def get_auxweights(self, auxmol): - """Extracts auxiliary basis weights from auxiliary molecule object. - - Args: - auxmol (pyscf Mole): Auxiliary molecule object. - - Returns: - numpy ndarray: Array of auxiliary basis function weights. - """ - w = np.zeros(auxmol.nao) - iao = 0 - for iat in range(auxmol.natm): - q = auxmol._atom[iat][0] - for prim in auxmol._basis[q]: - w[iao] = prim[1][1] - iao+=1 - return w - - def merge_caps(self, w, eri3c): - """Contracts 3-center integrals with auxiliary basis weights. - - Args: - w (numpy ndarray): Auxiliary basis weights. - eri3c (numpy ndarray): 3-center electron repulsion integrals. - - Returns: - numpy ndarray: Contracted integrals. - """ - return np.einsum('...i,i->...', eri3c, w) - - def get_eri3c(self, mol, auxmol): - """Computes 3-center electron repulsion integrals. - - Args: - mol (pyscf Mole): Main molecule object. - auxmol (pyscf Mole): Auxiliary molecule object. - - Returns: - numpy ndarray: 3-center ERIs (ij|P) where i,j are AO indices and P is aux basis index. - """ - pmol = mol + auxmol - shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) - eri3c = pmol.intor('int3c2e_sph', shls_slice=shls_slice) - return eri3c - - def check_coefficients(self, mol, acbasis): - ch1 = sum(sum(c/self.renormalize(a) for _, (a, c) in acbasis[mol.atom_pure_symbol(iat)]) for iat in range(mol.natm)) - ch2 = sum(mol.atom_charges()) - (mol.charge if self.parameters == 'HF' else 0) - if not np.isclose(ch1, ch2): - raise RuntimeError("Coefficients corrupted after adding ECP") - - def HLB20(self, mol): - acbasis = self.use_charge(mol) - if mol.has_ecp(): - acbasis = self.use_ecp(mol, acbasis) - self.check_coefficients(mol, acbasis) - - auxmol = df.make_auxmol(mol, acbasis) - eri3c = self.get_eri3c(mol, auxmol) - auxw = self.get_auxweights(auxmol) - return self.merge_caps(auxw, eri3c) - - def Heff(self, mol): - self.mol = mol - self.Hcore = scf.hf.get_hcore(mol) - self.H = self.Hcore + self.HLB20(mol) - return self.H - - - def HLB20_ints_generator(self, mol, auxmol): - pmol = mol + auxmol - shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) - eri3c2e_ip1 = pmol.intor('int3c2e_ip1', shls_slice=shls_slice) # (nabla \, \| \) - eri3c2e_ip2 = pmol.intor('int3c2e_ip2', shls_slice=shls_slice) # ( \, \| nabla\) - aoslices = mol.aoslice_by_atom()[:,2:] - auxaoslices = auxmol.aoslice_by_atom()[:,2:] - def HLB20_ints_deriv(iat): - p0, p1 = aoslices[iat] - P0, P1 = auxaoslices[iat] - eri3c2e_ip = np.zeros_like(eri3c2e_ip1) - eri3c2e_ip[:,p0:p1,:,:] += eri3c2e_ip1[:,p0:p1,:,:] - eri3c2e_ip[:,:,p0:p1,:] += eri3c2e_ip1[:,p0:p1,:,:].transpose((0,2,1,3)) - eri3c2e_ip[:,:,:,P0:P1] += eri3c2e_ip2[:,:,:,P0:P1] - return -eri3c2e_ip - return HLB20_ints_deriv - - def HLB20_generator(self, mol): - acbasis = self.use_charge(mol) - if mol.has_ecp(): - acbasis = self.use_ecp(mol, acbasis) - self.check_coefficients(mol, acbasis) - auxmol = df.make_auxmol(mol, acbasis) - eri3c = self.HLB20_ints_generator(mol, auxmol) - auxw = self.get_auxweights(auxmol) - def HLB20_deriv(iat): - return self.merge_caps(auxw, eri3c(iat)) - return HLB20_deriv + """See https://github.com/briling/aepm.""" + + def __init__(self, fname=None, parameters='HF'): + self.acfile_default = './parameters_HF.dat' + self.Qmax = 102 + self.init_data() + self.get_basis(fname, parameters) + + + def renormalize(self, a): + r"""Computes renormalization factor for Gaussian basis functions. + + The auxiliary basis functions are given in charge normalization, thus + we need to renormalize them to square-integral normalization for use in integrals. + + 1/norm1 = \int \exp(-a*r^2) d^3 r => norm1 = (a/pi)^(3/2) + 1/norm2^2 = \int (\exp(-a*r^2))^2 d^3 r => norm2 = (2.0*a/pi)^(3/4) + coefficient = norm1 / norm2 = (0.5*a/pi)^(3/4) + + Args: + a (float): Gaussian exponent. + + Returns: + float: Renormalization factor (0.5*a/pi)^(3/4). + """ + x = np.sqrt(np.sqrt(0.5*a/np.pi)) + return x*x*x + + + def read_ac(self, fname): + """Reads auxiliary basis parameters from file. + + Args: + fname (str, optional): Path to parameter file. If None, uses default. + + Returns: + dict: Dictionary mapping element symbols to basis function parameters. + """ + if fname is None: + fname = self.acfile_default + with open(fname) as f: + lines = f.readlines() + basis = {'H': []} + il=0 + while il 0: + zrest = zcore + bad_idx = [] + for iprim in range(len(acbasis[q])): + if np.isclose(zrest, 0): + break + a, c = acbasis[q][iprim][1] + renorm = self.renormalize(a) + c /= renorm # convert back to charge units: sum {c} == charge(q) + dc = min(c, zrest) + if np.isclose(c, dc): + bad_idx.append(iprim) + else: + acbasis[q][iprim][1][1] = (c-dc)*renorm + zrest -= dc + for i in bad_idx[::-1]: + acbasis[q].pop(i) + return acbasis + + + def get_auxweights(self, auxmol): + """Extracts auxiliary basis weights from auxiliary molecule object. + + Args: + auxmol (pyscf Mole): Auxiliary molecule object. + + Returns: + numpy ndarray: Array of auxiliary basis function weights. + """ + w = np.zeros(auxmol.nao) + iao = 0 + for iat in range(auxmol.natm): + q = auxmol._atom[iat][0] + for prim in auxmol._basis[q]: + w[iao] = prim[1][1] + iao+=1 + return w + + + def merge_caps(self, w, eri3c): + """Contracts 3-center integrals with auxiliary basis weights. + + Args: + w (numpy ndarray): Auxiliary basis weights. + eri3c (numpy ndarray): 3-center electron repulsion integrals. + + Returns: + numpy ndarray: Contracted integrals. + """ + return np.einsum('...i,i->...', eri3c, w) + + + def get_eri3c(self, mol, auxmol): + """Computes 3-center electron repulsion integrals. + + Args: + mol (pyscf Mole): Main molecule object. + auxmol (pyscf Mole): Auxiliary molecule object. + + Returns: + numpy ndarray: 3-center ERIs (ij|P) where i,j are AO indices and P is aux basis index. + """ + pmol = mol + auxmol + shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) + eri3c = pmol.intor('int3c2e_sph', shls_slice=shls_slice) + return eri3c + + + def check_coefficients(self, mol, acbasis): + ch1 = sum(sum(c/self.renormalize(a) for _, (a, c) in acbasis[mol.atom_pure_symbol(iat)]) for iat in range(mol.natm)) + ch2 = sum(mol.atom_charges()) - (mol.charge if self.parameters == 'HF' else 0) + if not np.isclose(ch1, ch2): + raise RuntimeError("Coefficients corrupted after adding ECP") + + + def HLB20(self, mol): + acbasis = self.use_charge(mol) + acbasis = self.use_ecp(mol, acbasis) + self.check_coefficients(mol, acbasis) + auxmol = df.make_auxmol(mol, acbasis) + eri3c = self.get_eri3c(mol, auxmol) + auxw = self.get_auxweights(auxmol) + return self.merge_caps(auxw, eri3c) + + + def Heff(self, mol): + self.mol = mol + self.Hcore = scf.hf.get_hcore(mol) + self.H = self.Hcore + self.HLB20(mol) + return self.H + + + def HLB20_ints_generator(self, mol, auxmol): + pmol = mol + auxmol + shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) + eri3c2e_ip1 = pmol.intor('int3c2e_ip1', shls_slice=shls_slice) # (nabla \, \| \) + eri3c2e_ip2 = pmol.intor('int3c2e_ip2', shls_slice=shls_slice) # ( \, \| nabla\) + aoslices = mol.aoslice_by_atom()[:,2:] + auxaoslices = auxmol.aoslice_by_atom()[:,2:] + def HLB20_ints_deriv(iat): + p0, p1 = aoslices[iat] + P0, P1 = auxaoslices[iat] + eri3c2e_ip = np.zeros_like(eri3c2e_ip1) + eri3c2e_ip[:,p0:p1,:,:] += eri3c2e_ip1[:,p0:p1,:,:] + eri3c2e_ip[:,:,p0:p1,:] += eri3c2e_ip1[:,p0:p1,:,:].transpose((0,2,1,3)) + eri3c2e_ip[:,:,:,P0:P1] += eri3c2e_ip2[:,:,:,P0:P1] + return -eri3c2e_ip + return HLB20_ints_deriv + + + def HLB20_generator(self, mol): + acbasis = self.use_charge(mol) + acbasis = self.use_ecp(mol, acbasis) + self.check_coefficients(mol, acbasis) + auxmol = df.make_auxmol(mol, acbasis) + eri3c = self.HLB20_ints_generator(mol, auxmol) + auxw = self.get_auxweights(auxmol) + def HLB20_deriv(iat): + return self.merge_caps(auxw, eri3c(iat)) + return HLB20_deriv + + + def init_data(self): + self.caps_array = np.zeros(self.Qmax+1) + self.caps_array [ 1 : 2 +1] = 1.0 / 3.0 + self.caps_array [ 3 : 4 +1] = 1.0 / 16.0 + self.caps_array [ 5 : 10 +1] = 1.0 / 3.0 + self.caps_array [ 11 : 12 +1] = 1.0 / 32.0 + self.caps_array [ 13 : 18 +1] = 1.0 / 8.0 + self.caps_array [ 19 : 20 +1] = 1.0 / 32.0 + self.caps_array [ 21 : 30 +1] = 1.0 / 6.0 + self.caps_array [ 31 : 36 +1] = 1.0 / 12.0 + self.caps_array [ 37 : 38 +1] = 1.0 / 32.0 + self.caps_array [ 39 : 48 +1] = 1.0 / 8.0 + self.caps_array [ 49 : 54 +1] = 1.0 / 12.0 + self.caps_array [ 55 : 70 +1] = 1.0 / 32.0 + self.caps_array [ 71 : 86 +1] = 1.0 / 12.0 + self.caps_array [ 87 : 102 +1] = 1.0 / 32.0 + + self.hfs_basis = {'H': [[0, [0.0815877135278, 0.03846658840144482]]], + 'He': [[0, [0.808048051263, 0.42950970838920094]]], + 'Li': [[0, [2.60255347642, 0.9236581585938292]], [0, [0.0280604557276, 0.02092188631196157]]], + 'Be': [[0, [4.59692793038, 1.5671644720955082]], [0, [0.0804833286681, 0.07687177344753668]]], + 'B': [[0, [6.83323506001, 2.343454972959998]], [0, [0.128413097632, 0.15132206888434654]]], + 'C': [[0, [9.78271998209, 3.2338673789342076]], [0, [0.220436310973, 0.29830455285829904]]], + 'N': [[0, [13.0102305297, 4.234735126785875]], [0, [0.338162891505, 0.5080513541327736]]], + 'O': [[0, [16.0693906158, 5.282243530372744]], [0, [0.486361793604, 0.7907181038567846]]], + 'F': [[0, [19.101114431, 6.414979114451199]], [0, [0.654163546258, 1.1388759924473113]]], + 'Ne': [[0, [21.8775289055, 7.6030507281160205]], [0, [0.840940013903, 1.552754282665946]]], + 'Na': [[0, [39.3300572224, 8.771875563897146]], [0, [1.92102415925, 2.714794977091659]], [0, [0.07805961683, 0.0811487825176091]]], + 'Mg': [[0, [44.5119316877, 10.28939716988604]], [0, [2.2032684956, 3.187374601857695]], [0, [0.0887611981764, 0.1080039414860704]]], + 'Al': [[0, [48.8730920117, 11.90986953428574]], [0, [2.40497322587, 3.637221913420624]], [0, [0.0734291195179, 0.10551672569775308]]], + 'Si': [[0, [57.6233652793, 13.49494028809413]], [0, [2.94976481323, 4.320831140260877]], [0, [0.105177215317, 0.17781870789022175]]], + 'P': [[0, [66.8662881023, 15.16878399053013]], [0, [3.53343161485, 5.031541131577296]], [0, [0.143855885176, 0.2753173411346297]]], + 'S': [[0, [77.3837213998, 16.887417337354126]], [0, [4.24286242552, 5.806114862351204]], [0, [0.19956804901, 0.42411499313349615]]], + 'Cl': [[0, [87.9791594478, 18.69938754757193]], [0, [4.96724533871, 6.585294351730177]], [0, [0.25861139087, 0.5996439927014025]]], + 'Ar': [[0, [98.6384890866, 20.607909307338506]], [0, [5.70477691943, 7.365867515604668]], [0, [0.322389303278, 0.8050942514147619]]], + 'K': [[0, [127.304718328, 21.99381564116015]], [0, [8.14542318935, 8.914152537873168]], [0, [0.599909721308, 1.281113936405088]], [0, [0.035109044485, 0.03885492933110718]]], + 'Ca': [[0, [143.410292528, 23.911082457093176]], [0, [9.37643144182, 9.941770494312644]], [0, [0.707265349098, 1.5157353597122245]], [0, [0.0458961284132, 0.06364796065031839]]], + 'Sc': [[0, [158.994572671, 25.904442179513133]], [0, [10.6669154617, 10.883238053673036]], [0, [0.855948344081, 1.8492123402861176]], [0, [0.0663836669817, 0.10346527800168384]]], + 'Ti': [[0, [174.894134206, 27.966990953941423]], [0, [11.9900959183, 11.826374216637854]], [0, [0.99956141908, 2.2420793288691643]], [0, [0.0831227117008, 0.13681731240170136]]], + 'V': [[0, [190.604934656, 30.120471923041325]], [0, [13.3014644743, 12.749780443487515]], [0, [1.14767573276, 2.6800134943163965]], [0, [0.0996286409699, 0.17038950363938712]]], + 'Cr': [[0, [206.066074361, 32.365170842752164]], [0, [14.5931480682, 13.65262763441227]], [0, [1.30088471274, 3.1620979075567597]], [0, [0.116182713597, 0.2045615376503895]]], + 'Mn': [[0, [221.16302319, 34.70340183637225]], [0, [15.8499577468, 14.53121460187906]], [0, [1.45886982985, 3.6870872736982983]], [0, [0.132849408341, 0.23929909691538395]]], + 'Fe': [[0, [236.275958236, 37.13636549602007]], [0, [17.0021924689, 15.423416039160818]], [0, [1.58039830539, 4.245713892974994]], [0, [0.138154959446, 0.24497443207694142]]], + 'Co': [[0, [251.266826314, 39.654913061172365]], [0, [18.1259810867, 16.297312916228524]], [0, [1.72000963703, 4.847287712375843]], [0, [0.146811845647, 0.2591203258656231]]], + 'Ni': [[0, [265.85589001, 42.264044242431645]], [0, [19.1913776924, 17.14653742887106]], [0, [1.87127991906, 5.488214209582658]], [0, [0.15768951119, 0.278567741559687]]], + 'Cu': [[0, [279.89558372, 44.96377471419826]], [0, [20.1795701285, 17.969392784841585]], [0, [2.0307546413, 6.165773142025554]], [0, [0.170101920073, 0.30155942534387087]]], + 'Zn': [[0, [293.316698633, 47.75086136824285]], [0, [21.0782849144, 18.76770516482975]], [0, [2.19628414783, 6.877297576296975]], [0, [0.183611811011, 0.32702068243576027]]], + 'Ga': [[0, [223.351407737, 51.85980692866955]], [0, [13.8991890472, 16.536719699834574]], [0, [1.79766451577, 5.726042077556815]], [0, [0.101436081792, 0.1668401485879247]]], + 'Ge': [[0, [246.769907155, 54.68127699607652]], [0, [15.4889775395, 17.96144762660623]], [0, [2.02770427595, 6.416697553920722]], [0, [0.121093071072, 0.22751860302804602]]], + 'As': [[0, [270.61373978, 57.564175496815935]], [0, [17.0703038187, 19.413617521179415]], [0, [2.25845376191, 7.089265265044681]], [0, [0.143577215473, 0.3015460843175103]]], + 'Se': [[0, [300.848605175, 60.466799135862026]], [0, [19.2537085333, 21.03082294851702]], [0, [2.56276392363, 7.936692573640534]], [0, [0.179389676325, 0.42001811250289295]]], + 'Br': [[0, [328.797262949, 63.43810791545978]], [0, [21.1924514002, 22.57846913638884]], [0, [2.85204353045, 8.701657933885878]], [0, [0.215086275684, 0.5517188564364737]]], + 'Kr': [[0, [355.214174754, 66.5014860746436]], [0, [22.9403228854, 24.090357342766612]], [0, [3.13032037939, 9.401485056551708]], [0, [0.251740278777, 0.6984900695917353]]], + 'Rb': [[0, [514.90443499, 66.45387731817529]], [0, [39.0691107041, 29.02209698864997]], [0, [4.80426316714, 13.141490959616895]], [0, [0.563096338616, 1.4122294089225085]], [0, [0.0437868695146, 0.06021000634919698]]], + 'Sr': [[0, [543.738995237, 69.61302407200955]], [0, [41.128661972, 30.45525713468061]], [0, [5.13125190812, 14.000583603948632]], [0, [0.584163639475, 1.502072088846565]], [0, [0.0457473904015, 0.07193642805575438]]], + 'Y': [[0, [573.865438818, 72.8256986346715]], [0, [43.3418373882, 31.888048066550517]], [0, [5.52448276023, 14.85533123439925]], [0, [0.639200518782, 1.6690156488501313]], [0, [0.0565895915557, 0.0999066215124533]]], + 'Zr': [[0, [618.872151787, 75.68769297760856]], [0, [47.1806876039, 33.6719208912133]], [0, [6.08833013219, 15.986254333511788]], [0, [0.716501829036, 1.9375575902411635]], [0, [0.0713273733558, 0.13612560832942924]]], + 'Nb': [[0, [659.649771305, 78.78777525481571]], [0, [50.4574540637, 35.35272825043895]], [0, [6.61550206936, 17.019827159386775]], [0, [0.782993768027, 2.209716527541788]], [0, [0.0844791372764, 0.16887885100075942]]], + 'Mo': [[0, [699.220087037, 82.03480488866288]], [0, [53.5109266255, 37.0105613980275]], [0, [7.13722099954, 18.0118404058805]], [0, [0.84688911897, 2.5015703606076256]], [0, [0.0970485492451, 0.20007447901252992]]], + 'Tc': [[0, [738.224099414, 85.40820605841263]], [0, [56.4145863596, 38.665323565768624]], [0, [7.65973260957, 18.97333333242976]], [0, [0.910214479934, 2.8171999934246834]], [0, [0.109178173374, 0.22976890861393806]]], + 'Ru': [[0, [778.546434184, 88.81655493493976]], [0, [59.5272531305, 40.349976300677305]], [0, [8.19597691306, 20.019036121376814]], [0, [0.94324592826, 3.1566525285906963]], [0, [0.110016658835, 0.22366236275456197]]], + 'Rh': [[0, [823.569257154, 92.20831582218796]], [0, [63.1039857198, 42.120036402905754]], [0, [8.80656745978, 21.13062914191657]], [0, [0.997443318264, 3.555621798417296]], [0, [0.114146789352, 0.22823056867238134]]], + 'Pd': [[0, [870.785579213, 95.65527378041557]], [0, [66.8564686032, 43.92776585899257]], [0, [9.45937938438, 22.26339050610234]], [0, [1.06293161212, 3.9977377213753678]], [0, [0.120329936612, 0.2388496753106151]]], + 'Ag': [[0, [919.269642253, 99.18450014865658]], [0, [70.6762222363, 45.7571802751057]], [0, [10.1425268095, 23.40046850321277]], [0, [1.13573760258, 4.47761428381862]], [0, [0.12791270815, 0.25334823717250327]]], + 'Cd': [[0, [968.604184045, 102.80825873069548]], [0, [74.5132242078, 47.60242594900452]], [0, [10.8507700928, 24.534003350659496]], [0, [1.21398140348, 4.993379192403898]], [0, [0.136511946916, 0.27055997696052425]]], + 'In': [[0, [843.405124708, 110.96520306427927]], [0, [57.7349383985, 47.01539915999735]], [0, [9.2861062863, 21.19104807232844]], [0, [1.10262305907, 4.734376047579965]], [0, [0.0871639505153, 0.16198129973968295]]], + 'Sn': [[0, [884.286663849, 114.96679724832346]], [0, [60.3792607664, 48.9682687896402]], [0, [9.80580254299, 22.0195379814388]], [0, [1.1950856195, 5.126927789028787]], [0, [0.0980413909659, 0.2050113690611363]]], + 'Sb': [[0, [928.446756666, 119.0462561998037]], [0, [63.2301122301, 51.01110432146532]], [0, [10.334859026, 22.896873446536297]], [0, [1.2864338161, 5.510960266352291]], [0, [0.110597164197, 0.2557490251201422]]], + 'Te': [[0, [1006.91297059, 122.5086188723354]], [0, [69.9563984769, 53.37878173194244]], [0, [11.3710909629, 24.795357618473698]], [0, [1.42210784675, 6.046549960417364]], [0, [0.134157895373, 0.3465874814433826]]], + 'I': [[0, [1071.96922308, 126.39246353979226]], [0, [75.0210569929, 55.63865377654407]], [0, [12.2012216823, 26.216841337450926]], [0, [1.5460507813, 6.517948997414104]], [0, [0.156539597363, 0.44315588522145566]]], + 'Xe': [[0, [1130.33961713, 130.57341813447323]], [0, [79.1625523503, 57.86111406775242]], [0, [12.9165161164, 27.39407258678573]], [0, [1.66262421031, 6.948688659111207]], [0, [0.178743239447, 0.5471697476001655]]], + 'Cs': [[0, [1428.6078292, 128.82899897630176]], [0, [112.812659333, 62.53067152407603]], [0, [17.1798768915, 34.227961935433065]], [0, [2.21568367858, 8.479278930585247]], [0, [0.337292691131, 1.0003736377349504]], [0, [0.0255963992844, 0.03279972807297557]]], + 'Ba': [[0, [1523.81269929, 132.33374253222377]], [0, [121.683219116, 64.98719011675838]], [0, [18.5377225373, 36.123991347012996]], [0, [2.42239198451, 9.151849111983358]], [0, [0.37464487939, 1.1313217747067679]], [0, [0.0318322692155, 0.05088369770736389]]], + 'La': [[0, [1601.41213596, 136.45908437706606]], [0, [128.13744821, 67.27340937055364]], [0, [19.6176760354, 37.63312362169784]], [0, [2.60445015787, 9.70137042264615]], [0, [0.410586721293, 1.2723058645673604]], [0, [0.0407833256045, 0.07329654046194856]]], + 'Ce': [[0, [1658.47048562, 141.23225188668434]], [0, [132.175071965, 69.19731594670756]], [0, [20.5059445332, 38.7164904908682]], [0, [2.74661595209, 10.37163705261105]], [0, [0.44440565431, 1.4015607510299235]], [0, [0.0447234791579, 0.08190699460978135]]], + 'Pr': [[0, [1715.26288637, 146.15656574406643]], [0, [136.051409723, 71.1296245077095]], [0, [21.3888165255, 39.75113375394901]], [0, [2.89289683236, 11.083749067340108]], [0, [0.479036909221, 1.535278321067373]], [0, [0.0484113375519, 0.09010431008530405]]], + 'Nd': [[0, [1771.7679871, 151.23596000355406]], [0, [139.763308576, 73.0683098482763]], [0, [22.2678147838, 40.737595040425745]], [0, [3.04417565147, 11.835768086134143]], [0, [0.514725872587, 1.674859379548526]], [0, [0.051944834864, 0.09809792639764876]]], + 'Pm': [[0, [1827.66270931, 156.48142428705057]], [0, [143.259770003, 75.01290712875812]], [0, [23.1358417402, 41.66771534375595]], [0, [3.199640873, 12.62445386651362]], [0, [0.55121096113, 1.8197816596901466]], [0, [0.0553253104444, 0.10584644293996817]]], + 'Sm': [[0, [1882.00387559, 161.91851435984526]], [0, [146.409075469, 76.95898454189155]], [0, [23.9752478278, 42.51900002843879]], [0, [3.35734325422, 13.444573455091518]], [0, [0.58792965549, 1.9681143031964388]], [0, [0.0585081846119, 0.1131824248041495]]], + 'Eu': [[0, [1932.75854719, 167.6002366798858]], [0, [148.934876865, 78.89578945074894]], [0, [24.7493435866, 43.242193431261235]], [0, [3.51366553918, 14.286616540326525]], [0, [0.623904660384, 2.115867151197456]], [0, [0.0614016764838, 0.1198007548367038]]], + 'Gd': [[0, [1975.59820781, 173.62835263535527]], [0, [150.267478001, 80.80228887933148]], [0, [25.3812097224, 43.730413811304885]], [0, [3.66178046197, 15.131436422018986]], [0, [0.657389398602, 2.2552290399910286]], [0, [0.063837005797, 0.1251673713840444]]], + 'Tb': [[0, [2043.29310958, 179.2037445859498]], [0, [154.366907789, 83.0695482866181]], [0, [26.2243318766, 44.71490580298028]], [0, [3.7689949755, 16.120004675463715]], [0, [0.663323872601, 2.2895066251146168]], [0, [0.0627463378236, 0.12027956081283321]]], + 'Dy': [[0, [2104.40136331, 185.1021613728763]], [0, [157.480965804, 85.29961820416767]], [0, [26.9609319436, 45.495087853775985]], [0, [3.88372694635, 17.1127505005226]], [0, [0.672078437777, 2.338126517300418]], [0, [0.0616039775116, 0.1156643841617586]]], + 'Ho': [[0, [2154.45469909, 191.42238595600696]], [0, [159.026925085, 87.48357952837262]], [0, [27.5027486946, 45.95633835932799]], [0, [3.99631996625, 18.08974262324885]], [0, [0.680954465184, 2.3885052181436106]], [0, [0.060173741041, 0.1106488289229826]]], + 'Er': [[0, [2190.94335259, 198.20792842426218]], [0, [158.679181523, 89.64609353716017]], [0, [27.7793550439, 46.00785816969023]], [0, [4.10121508569, 19.03368366564328]], [0, [0.688486025083, 2.4335494796515578]], [0, [0.0583244945524, 0.10492614812539582]]], + 'Tm': [[0, [2214.48978734, 205.42335133632005]], [0, [156.520247439, 91.85354642909319]], [0, [27.7527332692, 45.61394284204278]], [0, [4.19629401273, 19.931108844198004]], [0, [0.69439592841, 2.4709679915183504]], [0, [0.0560809223751, 0.09861920356341443]]], + 'Yb': [[0, [2229.17694813, 212.95081901943223]], [0, [153.095425598, 94.19509432925543]], [0, [27.4346946752, 44.83036127743994]], [0, [4.28220834088, 20.774850510083816]], [0, [0.699283130104, 2.502096031620723]], [0, [0.0536017037618, 0.09215131893680198]]], + 'Lu': [[0, [2240.76323234, 220.64120470655195]], [0, [149.186347076, 96.73527206213477]], [0, [26.882917845, 43.799061576056026]], [0, [4.36078494051, 21.56356820520379]], [0, [0.703938851713, 2.529434335353653]], [0, [0.0510727969004, 0.08594615780727508]]], + 'Hf': [[0, [2597.75786792, 221.4984988672269]], [0, [188.512068741, 99.90153962759624]], [0, [33.1238337682, 51.910671812884125]], [0, [4.95610698524, 24.075881834623832]], [0, [0.840126255104, 3.0993574039786624]], [0, [0.0754290091254, 0.14830089683149864]]], + 'Ta': [[0, [2835.12187643, 224.97090244390182]], [0, [212.567396461, 103.35145847528267]], [0, [36.6312660085, 56.10051046630536]], [0, [5.37947393132, 25.81036889369674]], [0, [0.929151975174, 3.4952140173242676]], [0, [0.0934277241824, 0.20126674008286047]]], + 'W': [[0, [3025.30468794, 229.72846917871507]], [0, [230.276064677, 106.62250063664358]], [0, [39.2800873493, 59.00619946134962]], [0, [5.75689567535, 27.33479438233774]], [0, [1.00224682037, 3.845156936516533]], [0, [0.109770850673, 0.2527564765722409]]], + 'Re': [[0, [3195.61889568, 235.18655723502508]], [0, [245.095364506, 109.79052780485154]], [0, [41.5829628586, 61.378152855515665]], [0, [6.12180336884, 28.778519952039215]], [0, [1.06873134848, 4.1880086644629335]], [0, [0.12566089767, 0.3051098407321521]]], + 'Os': [[0, [3269.8476374, 242.79909040960803]], [0, [247.725409062, 112.31392244158873]], [0, [42.3817611068, 62.06265168735337]], [0, [6.3443841081, 29.935902842018656]], [0, [1.06629631325, 4.327346089905066]], [0, [0.129797938525, 0.311692170282036]]], + 'Ir': [[0, [3376.87667402, 250.01523976524726]], [0, [254.046404976, 115.1096116274901]], [0, [43.6319269616, 63.23965630985516]], [0, [6.61971894051, 31.182376053342995]], [0, [1.08166412391, 4.55923508274026]], [0, [0.135494138077, 0.32374501274030015]]], + 'Pt': [[0, [3503.10050982, 257.11908266124726]], [0, [262.371870877, 118.06427561966308]], [0, [45.1400059471, 64.68067709627911]], [0, [6.9289515116, 32.48110151528869]], [0, [1.1081209327, 4.853290082621408]], [0, [0.142293514583, 0.3395090236627465]]], + 'Au': [[0, [3642.37385874, 264.24694689878606]], [0, [271.922734082, 121.13071183685847]], [0, [46.8125693251, 66.2730681995686]], [0, [7.26324418491, 33.8141784669834]], [0, [1.14228427509, 5.19507983319498]], [0, [0.149995980588, 0.358211807740868]]], + 'Hg': [[0, [3791.49387942, 271.4719140255812]], [0, [282.287772429, 124.2856953523547]], [0, [48.5988727445, 67.95563981054985]], [0, [7.61787984362, 35.17197803197935]], [0, [1.18217257465, 5.576807483854668]], [0, [0.158470192072, 0.37936980854301705]]], + 'Tl': [[0, [2936.57544261, 296.6310154714052]], [0, [169.521034982, 128.06139769860988]], [0, [27.7278154244, 47.591482085503884]], [0, [6.18159381819, 28.12073706100974]], [0, [0.937899869214, 4.530828450871838]], [0, [0.0878953169292, 0.17357741811050326]]], + 'Pb': [[0, [3009.77971351, 305.25255358884556]], [0, [171.15240957, 131.55698757943694]], [0, [27.3362555847, 48.331254479828324]], [0, [6.28851042826, 28.17024630706816]], [0, [0.974528662433, 4.702910510256614]], [0, [0.0938717769309, 0.2034350353550231]]], + 'Bi': [[0, [3115.8496148, 313.8093294029781]], [0, [175.822879115, 134.97757970713243]], [0, [27.7224718639, 49.668864571379345]], [0, [6.46561267924, 28.69206651328442]], [0, [1.01344607802, 4.8981241805735936]], [0, [0.101357420858, 0.238854699432195]]], + 'Po': [[0, [3389.7826553, 320.66212678156774]], [0, [197.70471693, 137.6782930250117]], [0, [32.7684432058, 53.54264845480374]], [0, [7.20421362633, 32.282004781869944]], [0, [1.12137629162, 5.45713798816872]], [0, [0.122511534693, 0.3260853416866197]]], + 'At': [[0, [3615.64824975, 328.23201750107296]], [0, [214.627481704, 140.75596703512562]], [0, [36.212401456, 56.77561235809897]], [0, [7.72251960097, 34.59419860541525]], [0, [1.20498118619, 5.86665441892619]], [0, [0.140743435239, 0.4111765886798949]]], + 'Rn': [[0, [3822.43609476, 336.350129170213]], [0, [229.112932587, 144.07512932334393]], [0, [38.9069689318, 59.56038725013068]], [0, [8.15670864482, 36.44005731475775]], [0, [1.27699233159, 6.209274469973415]], [0, [0.157920555428, 0.49843705889064177]]], + 'Fr': [[0, [5482.49524373, 319.6645485275319]], [0, [423.079084959, 150.95938898566172]], [0, [69.2009027293, 87.27661177977454]], [0, [10.8608508924, 46.439279301313775]], [0, [1.90646535073, 8.470741952248504]], [0, [0.346917622534, 1.1656662095666177]], [0, [0.0307550493062, 0.04426509178665867]]], + 'Ra': [[0, [5824.22217495, 326.4339512907625]], [0, [453.52449169, 155.42009100679874]], [0, [73.3773009492, 90.8040545340973]], [0, [11.464490496, 48.34987083896135]], [0, [2.03967356151, 9.0403262759412]], [0, [0.371641031126, 1.2738484791994147]], [0, [0.0352213028729, 0.0608041346622409]]], + 'Ac': [[0, [6036.5236009, 335.7796360977328]], [0, [467.073213456, 159.12991928763344]], [0, [75.5296659631, 92.69417995252799]], [0, [11.8777377991, 49.82342721622108]], [0, [2.11757248944, 9.395644277773304]], [0, [0.381340313268, 1.3369398526252363]], [0, [0.0383652274726, 0.07276437615219129]]], + 'Th': [[0, [5781.02789712, 352.98106098074595]], [0, [422.978272928, 160.16991432703165]], [0, [70.6991828424, 88.76304121503837]], [0, [11.6767574085, 49.81457199002962]], [0, [1.97848007739, 9.160629598977392]], [0, [0.331860385876, 1.1973801004966318]], [0, [0.026439636441, 0.0440542374768854]]], + 'Pa': [[0, [6470.70044306, 355.6321452303036]], [0, [493.225990334, 166.6023122006732]], [0, [79.745338563, 96.26732853658515]], [0, [12.7459791331, 52.8675409376038]], [0, [2.24462222504, 10.084915235025559]], [0, [0.426863180027, 1.5811923530178016]], [0, [0.046081696294, 0.09023405892146073]]], + 'U': [[0, [6677.40964538, 366.3604410199135]], [0, [503.945262015, 170.3320215371999]], [0, [81.5371062053, 97.81121245233369]], [0, [13.1666520998, 54.33056511073182]], [0, [2.29389178189, 10.443579781479114]], [0, [0.446656456574, 1.692621958244384]], [0, [0.0486532127186, 0.09517496550595222]]], + 'Np': [[0, [6876.97988756, 377.6727808046505]], [0, [512.988616603, 174.062836818793]], [0, [83.094841748, 99.18659693335287]], [0, [13.577303204, 55.73888881006303]], [0, [2.33602765587, 10.819245703537456]], [0, [0.463908300107, 1.7929280737098292]], [0, [0.0503407579225, 0.09771182678295015]]], + 'Pu': [[0, [7073.46467092, 389.4767458720515]], [0, [520.913132774, 177.82649815105037]], [0, [84.4759601259, 100.44506864340687]], [0, [13.9835865192, 57.10032981294112]], [0, [2.37396977934, 11.219928664816145]], [0, [0.479267778382, 1.8834054685337047]], [0, [0.0513447915011, 0.09846227430507107]]], + 'Am': [[0, [7268.46090414, 401.81608908117465]], [0, [527.78044655, 181.63697263440923]], [0, [85.6760899888, 101.59181640122917]], [0, [14.3850956892, 58.408919501602334]], [0, [2.40863549995, 11.647846175973653]], [0, [0.492763833865, 1.9631974676672352]], [0, [0.0517261402691, 0.0976668611720969]]], + 'Cm': [[0, [7461.07270308, 414.6785096263954]], [0, [533.554758233, 185.49522592946025]], [0, [86.6825832446, 102.6241476347959]], [0, [14.7808918438, 59.65650561763819]], [0, [2.44083150917, 12.104436105449656]], [0, [0.504445261811, 2.0318083591637333]], [0, [0.05153336688, 0.09554389071702431]]], + 'Bk': [[0, [7834.61665484, 425.45484503199054]], [0, [559.731032607, 190.24065973922345]], [0, [90.3490076906, 105.5123797523743]], [0, [15.4132069342, 61.660011790470044]], [0, [2.50033510705, 12.766578648339491]], [0, [0.518065291853, 2.1191379396217878]], [0, [0.0502100687403, 0.09121645646373851]]], + 'Cf': [[0, [8201.72850818, 436.9331462249914]], [0, [584.091204457, 194.97482137107826]], [0, [93.761139844, 108.17900966131799]], [0, [16.0367739256, 63.59285048454238]], [0, [2.55798346418, 13.447343479938588]], [0, [0.530648155402, 2.197810376070656]], [0, [0.0488334639026, 0.08694654953904225]]], + 'Es': [[0, [8568.60789377, 449.0720813904205]], [0, [607.252250672, 199.7280083883994]], [0, [96.9993255946, 110.69452133675912]], [0, [16.6586969315, 65.47695911971626]], [0, [2.61542700505, 14.151859950374625]], [0, [0.542629123426, 2.2703523785804616]], [0, [0.0474279857112, 0.0828159494077535]]], + 'Fm': [[0, [8937.95057321, 461.81915524070774]], [0, [629.550428909, 204.51466732620824]], [0, [100.107987123, 113.0978434843216]], [0, [17.2830930058, 67.32473882802411]], [0, [2.67360927633, 14.882691359072656]], [0, [0.554298517748, 2.3385277257658816]], [0, [0.0460161264899, 0.07887980370375362]]], + 'Md': [[0, [9312.72294426, 475.2053390594495]], [0, [651.169079559, 209.34968317990007]], [0, [103.107616035, 115.4101662912288]], [0, [17.9118103054, 69.14191004315032]], [0, [2.73292190404, 15.640711247342152]], [0, [0.565798602387, 2.4032865063709417]], [0, [0.0446097028095, 0.07515373722237267]]], + 'No': [[0, [9692.47931286, 489.2442112346279]], [0, [672.034303671, 214.23197488829172]], [0, [105.988550516, 117.63038685999115]], [0, [18.5442146559, 70.92646079341314]], [0, [2.7932429022, 16.425417198806798]], [0, [0.577107028367, 2.4645969790388342]], [0, [0.0432042120982, 0.07161106318788321]]], + } + + self.hf_basis = {'H': [], + 'He': [[0, [1.8865345899608519, 0.4056146926108746]]], + 'Li': [[0, [1.9854870701524918, 0.842937532901041]]], + 'Be': [[0, [4.744586184977778, 1.3574437702689057]], [0, [0.2792470137084066, 0.12818229520909]]], + 'B': [[0, [6.0338581393756145, 2.094637525409216]], [0, [0.2296652845463048, 0.1538820056563987]]], + 'C': [[0, [8.36842382629919, 2.912335066987576]], [0, [0.3175823851018592, 0.2825906903498745]]], + 'N': [[0, [10.93399949627562, 3.848864491590766]], [0, [0.43457823405570917, 0.4666119106370673]]], + 'O': [[0, [13.822779569568999, 4.823227937581987]], [0, [0.6163807631542392, 0.7589805047258943]]], + 'F': [[0, [16.696221288447184, 5.913626376015676]], [0, [0.8069674335184295, 1.1067332169360984]]], + 'Ne': [[0, [19.44766524633368, 7.113317051280908]], [0, [1.0081157441421305, 1.508827408605945]]], + 'Na': [[0, [22.043514485429395, 8.505970515543133]], [0, [1.0688208368282481, 1.7698621680543754]]], + 'Mg': [[0, [35.68089579776235, 9.370720219473146]], [0, [2.9023990296953044, 2.762905547578002]], [0, [0.391911845854857, 0.4395524604043637]]], + 'Al': [[0, [34.328377368288, 11.458430900825965]], [0, [1.895391976451897, 2.8294008855907764]], [0, [0.12243916188522636, 0.09609543557391674]]], + 'Si': [[0, [40.1763529442365, 13.078588758018803]], [0, [2.239495255980109, 3.340046848460941]], [0, [0.13204220229571037, 0.13836780632028686]]], + 'P': [[0, [46.66493733746877, 14.784964733718208]], [0, [2.6279568276824814, 3.8907484994126014]], [0, [0.1594036030260791, 0.2054997181258127]]], + 'S': [[0, [54.215297785332154, 16.531442577888246]], [0, [3.167647315145373, 4.489750969396064]], [0, [0.22671769463490918, 0.3487263502408896]]], + 'Cl': [[0, [62.03053259370884, 18.377558847158248]], [0, [3.700397336007754, 5.10183267645642]], [0, [0.28974576291563425, 0.5086598918385119]]], + 'Ar': [[0, [70.09781762916084, 20.327210911919742]], [0, [4.219331463603571, 5.7261201541458835]], [0, [0.35198503878294074, 0.6852541804555327]]], + 'K': [[0, [81.43333911489003, 22.298648478534886]], [0, [4.962141983102695, 6.635182027193869]], [0, [0.3731894290462584, 0.8199734170834927]]], + 'Ca': [[0, [74.41569962693416, 24.64109121546901]], [0, [4.071511193479644, 6.221216302620745]], [0, [0.28671657971452114, 0.6443784267175638]]], + 'Sc': [[0, [101.20791061796358, 26.412487878859128]], [0, [6.446723728431389, 8.03204205372517]], [0, [0.7834045065030468, 1.0165590397404942]], [0, [0.22063826662337555, 0.32378518633489617]]], + 'Ti': [[0, [114.39209603243822, 28.397321564939656]], [0, [7.751031064083496, 8.705787116537461]], [0, [1.3409079006639175, 1.4131635450481188]], [0, [0.29105515283701394, 0.5831178684215473]]], + 'V': [[0, [128.16680647954118, 30.409558254731866]], [0, [9.216695666616227, 9.327297806386307]], [0, [1.858353853141026, 2.0326669623504348]], [0, [0.3403321818150978, 0.7596484454818172]]], + 'Cr': [[0, [142.77227533657648, 32.44721271300131]], [0, [10.841336380933656, 9.962832486421375]], [0, [2.324791884735429, 2.760501995261298]], [0, [0.386053542334779, 0.9188917102547013]]], + 'Mn': [[0, [158.3329221300052, 34.5079863370424]], [0, [12.618932464144514, 10.650940881473641]], [0, [2.751317714668748, 3.5518867608144586]], [0, [0.4313986524269153, 1.0757402574568604]]], + 'Fe': [[0, [171.59971217722958, 36.95724348896622]], [0, [12.84888931599388, 12.016251508544268]], [0, [2.4208579434027238, 3.549284884553206]], [0, [0.44179048296062723, 1.0407595913547492]]], + 'Co': [[0, [186.74822291999737, 39.328592943410854]], [0, [13.74313677181651, 13.158002775580817]], [0, [2.3981274633981866, 3.9707170856647522]], [0, [0.4585307413973846, 1.0414789524987433]]], + 'Ni': [[0, [203.1786382537903, 41.69876688913565]], [0, [14.904507246734576, 14.254460551474274]], [0, [2.50518240559692, 4.554224625007843]], [0, [0.4821723138309187, 1.0776113454467862]]], + 'Cu': [[0, [220.69456355904438, 44.09047093492658]], [0, [16.228087185242245, 15.352003273713255]], [0, [2.6799011471909333, 5.235665891996169]], [0, [0.5116345189692049, 1.1415411696644089]]], + 'Zn': [[0, [239.22585729085247, 46.51256599644878]], [0, [17.67628445660993, 16.466826311793856]], [0, [2.8967970112707353, 5.996032281556834]], [0, [0.5457841216300788, 1.2269377067442289]]], + 'Ga': [[0, [207.77682210003923, 50.56169123112327]], [0, [11.856343239908128, 17.3168271457457]], [0, [1.435457236821561, 4.342639055962598]], [0, [0.17389779129425226, 0.1653080552415563]]], + 'Ge': [[0, [225.24685392781288, 53.26905157611517]], [0, [12.852764572486532, 18.78877288270596]], [0, [1.5215113800917213, 4.764856999241978]], [0, [0.1323014777214065, 0.14240658459873975]]], + 'As': [[0, [247.9086110252744, 56.028840702070525]], [0, [14.300295034709391, 20.402255662442446]], [0, [1.6861444954415892, 5.341032330752718]], [0, [0.13416499137137874, 0.173484907424891]]], + 'Se': [[0, [273.8649943966164, 58.799515164217304]], [0, [16.08110012602841, 22.06344368325976]], [0, [1.932031883742216, 6.021612670882894]], [0, [0.1792331056182616, 0.28093949286892783]]], + 'Br': [[0, [300.01299028853975, 61.61553868303424]], [0, [17.856104656403406, 23.74892022762549]], [0, [2.1685919518796943, 6.686153971304707]], [0, [0.21670708306814743, 0.39290870861189514]]], + 'Kr': [[0, [326.6902386555732, 64.48778748553242]], [0, [19.647882619248065, 25.46975468620672]], [0, [2.3989880465652473, 7.3449776683736046]], [0, [0.2510302844453139, 0.5108510489663171]]], + 'Rb': [[0, [371.63737826238116, 67.11280281784138]], [0, [23.248544533094694, 27.589207985709436]], [0, [2.8726598551454177, 8.65313863037671]], [0, [0.2777391154691928, 0.6698990577646332]]], + 'Sr': [[0, [370.72341161894167, 70.53669290120207]], [0, [22.369820917706722, 28.689970312925873]], [0, [2.74311782634614, 8.459944450402055]], [0, [0.2267095844986972, 0.568426058335472]]], + 'Y': [[0, [381.9433918083273, 73.79627223351419]], [0, [22.657005971801517, 30.132831274164765]], [0, [2.78181561035276, 8.545479781705756]], [0, [0.23142082707081157, 0.6180042289006495]]], + 'Zr': [[0, [404.12975489707895, 77.02187364839594]], [0, [23.913120490512053, 31.876617008220126]], [0, [2.928122394174498, 8.966554439771224]], [0, [0.2509660177582832, 0.715944254052498]]], + 'Nb': [[0, [432.000753226581, 80.2736752206378]], [0, [25.649609677505364, 33.771572054045144]], [0, [3.127263691497556, 9.56516637789088]], [0, [0.27614878489011474, 0.8386582279030884]]], + 'Mo': [[0, [462.68918558642525, 83.56383349210992]], [0, [27.620091978879955, 35.74744481883726]], [0, [3.350241541886852, 10.246548405106552]], [0, [0.3046372235349947, 0.9794709766343448]]], + 'Tc': [[0, [495.0483050672117, 86.89968164312515]], [0, [29.724523813339573, 37.779386272068194]], [0, [3.584071928719099, 10.970538625909704]], [0, [0.3354202932589703, 1.135308320005783]]], + 'Ru': [[0, [507.88917516825205, 90.49349034803686]], [0, [30.119638007448955, 39.47115792065111]], [0, [3.5989334168125757, 11.01711542201227]], [0, [0.36368976141299975, 1.2658304433949406]]], + 'Rh': [[0, [524.9638375058429, 94.07757110910124]], [0, [30.919224799048706, 41.29611262024772]], [0, [3.6440391113842647, 11.211906105364292]], [0, [0.39297773558866045, 1.4040392885288977]]], + 'Pd': [[0, [545.3714847389803, 97.68710808884666]], [0, [32.017351723397205, 43.22084107574552]], [0, [3.7145429669883927, 11.526806131814698]], [0, [0.4233961197550389, 1.5506684631945555]]], + 'Ag': [[0, [568.4833402808085, 101.34010161842532]], [0, [33.34592493342003, 45.22617974954437]], [0, [3.805924686600645, 11.93953395818886]], [0, [0.45499468116272807, 1.7061526933158015]]], + 'Cd': [[0, [593.885889821288, 105.04578226656376]], [0, [34.86259758880326, 47.30114504155227]], [0, [3.914677789101674, 12.43424303591464]], [0, [0.48779609479059793, 1.870799445536182]]], + 'In': [[0, [745.0453334167443, 107.35066272796716]], [0, [47.58971626349325, 50.926236984161385]], [0, [5.971203828280484, 16.665850453251153]], [0, [0.7430042209193889, 3.01900686459137]], [0, [0.08466099556037841, 0.0621837900529884]]], + 'Sn': [[0, [797.0285265417332, 110.81033718024638]], [0, [51.53384351159246, 53.28995874125511]], [0, [6.509259671570793, 17.920299624455424]], [0, [0.8054405533286846, 3.3669375931477203]], [0, [0.0663261465359773, 0.06275078847805306]]], + 'Sb': [[0, [854.332635733605, 114.22836471354059]], [0, [56.03010644110775, 55.71293325193932]], [0, [7.143402312658964, 19.30403594352593]], [0, [0.8834712631942941, 3.765970580984084]], [0, [0.06733579855319452, 0.07997832797135018]]], + 'Te': [[0, [912.166800137674, 117.7227728097036]], [0, [60.59380968267101, 58.126521408848]], [0, [7.8128697037188815, 20.648839233084743]], [0, [0.9864784933135154, 4.17777092264925]], [0, [0.1013950805635164, 0.14842059360104934]]], + 'I': [[0, [971.2326574046591, 121.29884984899455]], [0, [65.25254319977338, 60.56284562091366]], [0, [8.49547368191436, 22.00152224539976]], [0, [1.0874117592443089, 4.595440718085864]], [0, [0.1269930211411724, 0.2193048890155139]]], + 'Xe': [[0, [1031.8412270302938, 124.95241815534052]], [0, [70.03990230763266, 63.02489883673915]], [0, [9.196997644894727, 23.372474126117584]], [0, [1.1869856131289065, 5.023453919720388]], [0, [0.14857777514764356, 0.29253760225088343]]], + 'Cs': [[0, [1149.3826345739149, 127.00628909302009]], [0, [82.07400302361911, 65.73151848347841]], [0, [11.211986732106572, 26.3413877290317]], [0, [1.527946233339621, 6.140575117802016]], [0, [0.20092255135604542, 0.5358903213328864]]], + 'Ba': [[0, [1169.694775701659, 132.11174088077792]], [0, [81.77157679719838, 67.95207434814108]], [0, [11.07493150328844, 26.47785773461083]], [0, [1.5039225873956188, 6.206376352030552]], [0, [0.17008617153559455, 0.46825530317342057]]], + 'La': [[0, [1222.2841796269504, 136.44950093257452]], [0, [85.30738538527056, 70.43142971440702]], [0, [11.534893922454101, 27.525891530415866]], [0, [1.565216495786674, 6.497397763993776]], [0, [0.17220089046882703, 0.5081058416183188]]], + 'Ce': [[0, [1272.0807279952528, 141.03286793355306]], [0, [88.36657550864288, 72.8898142136249]], [0, [11.818291666733137, 28.480233667510007]], [0, [1.6071799883505355, 6.807847988718011]], [0, [0.18079491474660023, 0.5362729882922856]]], + 'Pr': [[0, [1322.1469857952939, 145.7514595496068]], [0, [91.39656432137411, 75.33333265405476]], [0, [12.1071170797223, 29.4276517750929]], [0, [1.6537813057728807, 7.163429031604369]], [0, [0.18837281505285847, 0.5591574497687]]], + 'Nd': [[0, [1372.5263581315082, 150.60889920740257]], [0, [94.38256339798501, 77.77663661280765]], [0, [12.388920046052041, 30.374947266803076]], [0, [1.7024088534425037, 7.543136623138128]], [0, [0.1952797675688985, 0.578520067184905]]], + 'Pm': [[0, [1423.2939068289443, 155.6052462723981]], [0, [97.32865783461368, 80.22405611653822]], [0, [12.661617245227912, 31.327874011441633]], [0, [1.7523980768357152, 7.939936976159989]], [0, [0.20168624380514635, 0.5952857313410442]]], + 'Sm': [[0, [1474.5487132299602, 160.73950011930427]], [0, [100.24430189239065, 82.67778973404094]], [0, [12.92552209508792, 32.292045829866765]], [0, [1.803472701882244, 8.350338175674331]], [0, [0.20769358126601012, 0.6100257919951613]]], + 'Eu': [[0, [1526.4150536618301, 166.01206305202103]], [0, [103.1400484979067, 85.1394038516512]], [0, [13.18165318229166, 33.27271244759562]], [0, [1.8554646230381475, 8.772175676339293]], [0, [0.2133674470507607, 0.6231207671624414]]], + 'Gd': [[0, [1578.9784735126057, 171.42174791339716]], [0, [106.0250115856627, 87.60976291496155]], [0, [13.431158961640053, 34.274486275744934]], [0, [1.908240993848972, 9.203901002823144]], [0, [0.218751211476024, 0.6348308667972493]]], + 'Tb': [[0, [1627.9257815186743, 177.01885393927807]], [0, [108.58046918725347, 89.98169013002152]], [0, [13.712227614246242, 35.08177288335976]], [0, [1.9934065152409421, 9.778608254703611]], [0, [0.22607538133284424, 0.6586164748301768]]], + 'Dy': [[0, [1675.7366368653, 182.79539454026207]], [0, [110.92891605390538, 92.35169430402841]], [0, [13.944492993755349, 35.88756038236498]], [0, [2.071361368544463, 10.324371945021113]], [0, [0.23293090457243362, 0.6793043698109138]]], + 'Ho': [[0, [1722.45102323609, 188.74643520738246]], [0, [113.09508331285164, 94.71282327683552]], [0, [14.135448633292121, 36.700404016777306]], [0, [2.143446883392562, 10.84407478396342]], [0, [0.23939934892582107, 0.6974618085298848]]], + 'Er': [[0, [1768.1273694341087, 194.86460193618592]], [0, [115.1045170512384, 97.059678831912]], [0, [14.292436046614585, 37.52919176101882]], [0, [2.210644796273452, 11.34014341321473]], [0, [0.24552713219420616, 0.7134490516878607]]], + 'Tm': [[0, [1812.8387097425375, 201.1430178623664]], [0, [116.98049742273002, 99.38821488326626]], [0, [14.422182620212654, 38.38205159874525]], [0, [2.273693630826137, 11.81459993451274]], [0, [0.25134177827762766, 0.7275092899716751]]], + 'Yb': [[0, [1856.6685254316594, 207.57575706111513]], [0, [118.7434510713386, 101.69567162657735]], [0, [14.53068606589803, 39.26596911718243]], [0, [2.333173888140194, 12.269202867396743]], [0, [0.25686096532697333, 0.7398210931489899]]], + 'Lu': [[0, [1899.7375959050908, 214.15900283658507]], [0, [120.41203401518791, 103.9808323573287]], [0, [14.623290920474743, 40.186822923208226]], [0, [2.389573140054433, 12.70562314337766]], [0, [0.2620987513832905, 0.7505337675010088]]], + 'Hf': [[0, [1967.3161513254126, 220.4818242350529]], [0, [124.34022381142523, 106.59712230324918]], [0, [15.07308379422105, 41.593870479307554]], [0, [2.480818570417923, 13.232329324645132]], [0, [0.2661713168622087, 0.7967557336599056]]], + 'Ta': [[0, [2070.081376528822, 226.40779086022104]], [0, [131.49175287923276, 109.64042202051841]], [0, [15.99100132534389, 43.55837828988383]], [0, [2.6318773077354023, 14.102161904766191]], [0, [0.28043101980165813, 0.889950291909441]]], + 'W': [[0, [2189.549358149728, 232.19925625507935]], [0, [140.25127395857805, 112.8716890901911]], [0, [17.13344756974962, 45.783279372087385]], [0, [2.8098978707573803, 15.126150439697248]], [0, [0.29967949093936525, 1.0098967362808797]]], + 'Re': [[0, [2318.4462036806926, 237.9628858834694]], [0, [149.9937976368708, 116.1966038301959]], [0, [18.404731677310796, 48.15430966963478]], [0, [3.001608061476728, 16.225684227300484]], [0, [0.32187252568656627, 1.1489884194097733]]], + 'Os': [[0, [2388.049070715689, 245.10047388036585]], [0, [153.7734923225801, 118.7983419940856]], [0, [18.857909721345862, 49.56025601814822]], [0, [3.11008633126925, 16.704279730923567]], [0, [0.34445180147113785, 1.2819732299675402]]], + 'Ir': [[0, [2467.4297047382197, 252.25866435592235]], [0, [158.46189701303072, 121.50759831320175]], [0, [19.439726509029725, 51.14252734248185]], [0, [3.2301679883724033, 17.26079206285206]], [0, [0.36862565727337615, 1.4293825704029288]]], + 'Pt': [[0, [2554.3592830018633, 259.50162267370445]], [0, [163.79362965973422, 124.30357066368583]], [0, [20.109681026841002, 52.84983429008009]], [0, [3.3577853513983555, 17.869227647362816]], [0, [0.3940749704373637, 1.58963560402845]]], + 'Au': [[0, [2647.942367886888, 266.86258142778973]], [0, [169.65288496596833, 127.17818206724604]], [0, [20.849001035167127, 54.65811794336809]], [0, [3.490999195504156, 18.517519625282052]], [0, [0.42063212480622336, 1.761992991117359]]], + 'Hg': [[0, [2747.928134675977, 274.3568830800225]], [0, [175.9987808231551, 130.12984199024015]], [0, [21.64967483069611, 56.556629698785166]], [0, [3.6289678032455828, 19.200696161193015]], [0, [0.44820670782959365, 1.946165274513181]]], + 'Tl': [[0, [3164.4430017902673, 275.33425125783316]], [0, [215.72530656955274, 136.5134853959493]], [0, [27.161342191636468, 62.3870818915601]], [0, [4.491427780128169, 23.28557562205648]], [0, [0.600440165078794, 2.698540051334935]], [0, [0.06853093205018684, 0.04464665376841102]]], + 'Pb': [[0, [3332.646311029052, 281.94999231460247]], [0, [229.0031090563242, 140.1437908618318]], [0, [28.975969438335593, 64.84409912856167]], [0, [4.810558525708687, 24.740295168731812]], [0, [0.6428089815920308, 2.996264069031993]], [0, [0.047924330717300866, 0.041440305792256264]]], + 'Bi': [[0, [3512.216508855337, 288.64078359604133]], [0, [243.39214593688772, 143.84037317308596]], [0, [30.97853246581632, 67.34690546839133]], [0, [5.172423589452024, 26.30220507176237]], [0, [0.6972914789511377, 3.343004331160787]], [0, [0.046483440633541435, 0.051734625570903164]]], + 'Po': [[0, [3687.56394734581, 295.7938412376767]], [0, [256.99663742660033, 147.48098812190506]], [0, [32.88525041027444, 69.69484331872636]], [0, [5.53728075165879, 27.772403578343333]], [0, [0.7669524019010989, 3.688915796422927]], [0, [0.07790941705971591, 0.10638615361580807]]], + 'At': [[0, [3867.2700297383376, 303.2212169509859]], [0, [270.7970840945, 151.14251555680508]], [0, [34.83422183513534, 72.0152741956569]], [0, [5.916332606555011, 29.262941710845208]], [0, [0.8371425774457082, 4.051982567189525]], [0, [0.10024708223935702, 0.16345538990921718]]], + 'Rn': [[0, [4051.137069750091, 310.9250988056561]], [0, [284.775937870393, 154.8229392338331]], [0, [36.824980911702696, 74.30862717101918]], [0, [6.309600846955326, 30.773810590825363]], [0, [0.9078056249178833, 4.433403377786259]], [0, [0.11834115297115232, 0.22290023511611973]]], + 'Fr': [[0, [4353.984794381789, 316.43748235638924]], [0, [315.0221897544036, 158.68006092193193]], [0, [41.78089597517104, 77.69002281082362]], [0, [7.324635183209355, 33.751487240198095]], [0, [1.2167680803927787, 5.5432018138923755]], [0, [0.18531646855260042, 0.5267499036883455]]], + 'Ra': [[0, [4448.350621866084, 326.7862729655624]], [0, [316.37235237572963, 162.10461390693163]], [0, [41.599655197053686, 78.94892733652733]], [0, [7.332003018858603, 34.0447122605427]], [0, [1.1859426306615157, 5.525797524492575]], [0, [0.16058344554826792, 0.4663667294265322]]], + 'Ac': [[0, [4603.153156415366, 336.6052579927967]], [0, [324.3007110669224, 165.83572180244386]], [0, [42.43724662599295, 80.83159851353724]], [0, [7.489245069769097, 34.88956929049148]], [0, [1.1874081908615182, 5.619709507030846]], [0, [0.1516443671133239, 0.4600690932932841]]], + 'Th': [[0, [4797.115103044666, 345.62274075733126]], [0, [338.2688019281645, 169.59801924262376]], [0, [44.29533540738354, 83.26599491898011]], [0, [7.831908387375935, 36.191673821065116]], [0, [1.2477414312105595, 5.98848264294172]], [0, [0.16327011497437025, 0.5198232907672228]]], + 'Pa': [[0, [4988.384347334797, 355.26683210258017]], [0, [350.79261962815326, 173.42724087767974]], [0, [45.86062629910834, 85.59403110807175]], [0, [8.127515730749716, 37.310058714683365]], [0, [1.2948293491755238, 6.333674516621873]], [0, [0.17433619558418045, 0.5704050762598124]]], + 'U': [[0, [5183.066162972329, 365.34950418100794]], [0, [362.8661720831389, 177.3353358479058]], [0, [47.296628009567854, 87.92696673647268]], [0, [8.393112085927632, 38.33948371626881]], [0, [1.3330953102037348, 6.665271903877122]], [0, [0.18401729827822358, 0.6114454029674808]]], + 'Np': [[0, [5384.543572863619, 375.8372854168172]], [0, [374.92172838433873, 181.32316439868376]], [0, [48.68255893505675, 90.29615221964652]], [0, [8.643048125493124, 39.32468442063093]], [0, [1.366773826787788, 6.999180874689732]], [0, [0.19260759242970482, 0.6449897137528064]]], + 'Pu': [[0, [5593.455128220382, 386.68279487022807]], [0, [387.1377374155312, 185.3845839171116]], [0, [50.05395085463852, 92.71346043659159]], [0, [8.883918591250191, 40.285846844592555]], [0, [1.3980540898103984, 7.34254653337652]], [0, [0.20029695125356592, 0.6723351335298008]]], + 'Am': [[0, [5811.506710991388, 397.9252363037504]], [0, [399.6133434666023, 189.52305406251924]], [0, [51.42829601635813, 95.1840529019412]], [0, [9.11898176976415, 41.23292843473057]], [0, [1.428182916482645, 7.698670102009173]], [0, [0.20721869574024737, 0.6944251274901461]]], + 'Cm': [[0, [6038.636135983948, 409.54741384643347]], [0, [412.3835587172814, 193.7348284033143]], [0, [52.81383174045898, 97.70910187250404]], [0, [9.34982889512178, 42.170969011372755]], [0, [1.4578869343677483, 8.06893586629329]], [0, [0.21346404640287797, 0.7119656673394167]]], + 'Bk': [[0, [6260.948816390237, 422.0410453374906]], [0, [422.91662308239006, 197.9666685432441]], [0, [53.90461251056316, 99.8974110849659]], [0, [9.602570532994145, 42.922482192514806]], [0, [1.4984353150383831, 8.5490189788099]], [0, [0.21896757528599817, 0.7251176961416461]]], + 'Cf': [[0, [6489.740134684498, 435.0218448995875]], [0, [433.34955871057997, 202.2541721796898]], [0, [54.94480645654533, 102.13481550166794]], [0, [9.83732955524915, 43.61885739445619]], [0, [1.5380653646173517, 9.027993242268069]], [0, [0.22430150549872171, 0.7368019584166986]]], + 'Es': [[0, [6726.1549694950845, 448.53297901385343]], [0, [443.7204453771275, 206.597327749617]], [0, [55.94404617815157, 104.4220836472625]], [0, [10.056118549065122, 44.26511722213098]], [0, [1.5773227998811425, 9.508944788748774]], [0, [0.22947752376909678, 0.7472304930317006]]], + 'Fm': [[0, [6970.136189873972, 462.57135978364494]], [0, [454.0434318759234, 210.99147916464239]], [0, [56.908789981374724, 106.75984221536478]], [0, [10.259912457405377, 44.864990614224354]], [0, [1.6164506247480044, 9.993218017831559]], [0, [0.23449400578056606, 0.7564937172127681]]], + 'Md': [[0, [7223.144242560901, 477.19533902849173]], [0, [464.3482819478374, 215.43979750119988]], [0, [57.84502682099961, 109.14985702691983]], [0, [10.449227089089558, 45.42214815564216]], [0, [1.6555508524664384, 10.481198923814517]], [0, [0.23935006172299833, 0.7646522895633704]]], + 'No': [[0, [7485.613396623628, 492.4146999962472]], [0, [474.6647744288757, 219.94073267865755]], [0, [58.76014532783936, 111.59422811133064]], [0, [10.624698106362063, 45.94171825601543]], [0, [1.6946873774494313, 10.973021567717673]], [0, [0.24404577984956427, 0.7717668700220324]]], + } diff --git a/qstack/spahm/guesses.py b/qstack/spahm/guesses.py index 29c102d6..29759726 100644 --- a/qstack/spahm/guesses.py +++ b/qstack/spahm/guesses.py @@ -6,157 +6,163 @@ def hcore(mol, *_): - """Computes guess Hamiltonian from core contributions (kinetic + nuclear + ECP). + """Computes guess Hamiltonian from core contributions (kinetic + nuclear + ECP). - Args: - mol (pyscf Mole): pyscf Mole object. - *_: Unused positional arguments (for interface compatibility). + Args: + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). + + Returns: + numpy ndarray: 2D array containing the core Hamiltonian matrix in AO basis. + """ + return scf.hf.get_hcore(mol) - Returns: - numpy ndarray: 2D array containing the core Hamiltonian matrix in AO basis. - """ - return scf.hf.get_hcore(mol) def GWH(mol, *_): - """Computes guess Hamiltonian using Generalized Wolfsberg-Helmholtz (GWH) method. - - Uses the empirical formula: H_ij = 0.5 * K * (H_ii + H_jj) * S_ij - where K = 1.75 (from J. Chem. Phys. 1952, 20, 837). - - Args: - mol (pyscf Mole): pyscf Mole object. - *_: Unused positional arguments (for interface compatibility). - - Returns: - numpy ndarray: 2D GWH Hamiltonian matrix in AO basis. - """ - h = hcore(mol) - S = mol.intor_symmetric('int1e_ovlp') - K = 1.75 # See J. Chem. Phys. 1952, 20, 837 - h_gwh = np.zeros_like(h) - for i in range(h.shape[0]): - for j in range(h.shape[1]): - if i != j: - h_gwh[i,j] = 0.5 * K * (h[i,i] + h[j,j]) * S[i,j] - else: - h_gwh[i,j] = h[i,i] - return h_gwh + """Computes guess Hamiltonian using Generalized Wolfsberg-Helmholtz (GWH) method. + + Uses the empirical formula: H_ij = 0.5 * K * (H_ii + H_jj) * S_ij with K = 1.75. + + Reference: + M. Wolfsberg, L. Helmholtz, + "The spectra and electronic structure of the tetrahedral ions MnO4-, CrO4--, and ClO4-", + J. Chem. Phys. 20 837-843 (1952), doi:10.1063/1.1700580. + + Args: + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). + + Returns: + numpy ndarray: 2D GWH Hamiltonian matrix in AO basis. + """ + + h = hcore(mol).diagonal() + S = mol.intor_symmetric('int1e_ovlp') + K = 1.75 + h_gwh = 0.5 * K * (h[:,None] + h[None,:]) * S + h_gwh[np.diag_indices_from(h_gwh)] = h + return h_gwh + def SAD(mol, func): - """Computes guess Hamiltonian using Superposition of Atomic Densities (SAD). - - Constructs the Fock matrix from atomic Hartree-Fock density matrices - summed together as an initial guess for molecular calculations. - - Args: - mol (pyscf Mole): pyscf Mole object. - func (str): Exchange-correlation functional name (e.g., 'pbe', 'b3lyp'). - - Returns: - numpy ndarray: 2D Fock matrix in AO basis computed from SAD. - - Warns: - RuntimeWarning: If alpha and beta effective potentials differ for the functional. - """ - hc = hcore(mol) - dm = scf.hf.init_guess_by_atom(mol) - mf = dft.RKS(mol) - mf.xc = func - vhf = mf.get_veff(dm=dm) - if vhf.ndim == 2: - fock = hc + vhf - else: - fock = hc + vhf[0] - if not np.array_equal(vhf[0], vhf[1]): - msg = f'The effective potential ({func}) return different alpha and beta matrix components from atomicHF DM' - warnings.warn(msg, RuntimeWarning, stacklevel=2) - return fock + """Computes guess Hamiltonian using Superposition of Atomic Densities (SAD). + + Constructs the Fock matrix from atomic Hartree-Fock density matrices + summed together as an initial guess for molecular calculations. + + Args: + mol (pyscf Mole): pyscf Mole object. + func (str): Exchange-correlation functional name (e.g., 'pbe', 'b3lyp'). + + Returns: + numpy ndarray: 2D Fock matrix in AO basis computed from SAD. + + Warns: + RuntimeWarning: If alpha and beta effective potentials differ for the functional. + """ + hc = hcore(mol) + dm = scf.hf.init_guess_by_atom(mol) + mf = dft.RKS(mol) + mf.xc = func + vhf = mf.get_veff(dm=dm) + if vhf.ndim == 2: + fock = hc + vhf + else: + fock = hc + vhf[0] + if not np.array_equal(vhf[0], vhf[1]): + msg = f'The effective potential ({func}) returned different alpha and beta matrix components from atomicHF DM' + warnings.warn(msg, RuntimeWarning, stacklevel=2) + return fock + def SAP(mol, *_): - """Computes guess Hamiltonian using Superposition of Atomic Potentials (SAP). + """Computes guess Hamiltonian using Superposition of Atomic Potentials (SAP). - Constructs initial Hamiltonian from kinetic energy plus summed atomic potentials. + Constructs initial Hamiltonian from kinetic energy plus summed atomic potentials. - Args: - mol (pyscf Mole): pyscf Mole object. - *_: Unused positional arguments (for interface compatibility). + Args: + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). + + Returns: + numpy ndarray: 2D Hamiltonian matrix (T + V_SAP) in AO basis. + """ + mf = dft.RKS(mol) + vsap = mf.get_vsap() + t = mol.intor_symmetric('int1e_kin') + fock = t + vsap + return fock - Returns: - numpy ndarray: 2D Hamiltonian matrix (T + V_SAP) in AO basis. - """ - mf = dft.RKS(mol) - vsap = mf.get_vsap() - t = mol.intor_symmetric('int1e_kin') - fock = t + vsap - return fock def LB(mol, *_): - """Computes guess Hamiltonian using Laikov-Briling 2020 model with HF parameters. + """Computes guess Hamiltonian using Laikov-Briling 2020 model with HF parameters. + + Uses auxiliary basis representation optimized for Hartree-Fock calculations. - Uses auxiliary basis representation optimized for Hartree-Fock calculations. + Args: + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). - Args: - mol (pyscf Mole): pyscf Mole object. - *_: Unused positional arguments (for interface compatibility). + Returns: + numpy ndarray: 2D effective Hamiltonian matrix from LB2020 model in AO basis. + """ + return LB20(parameters='HF').Heff(mol) - Returns: - numpy ndarray: 2D effective Hamiltonian matrix from LB2020 model in AO basis. - """ - return LB20(parameters='HF').Heff(mol) def LB_HFS(mol, *_): - """Computes guess Hamiltonian using Laikov-Briling 2020 model with HFS parameters. + """Computes guess Hamiltonian using Laikov-Briling 2020 model with HFS parameters. + + Uses auxiliary basis representation optimized for Hartree-Fock-Slater calculations. - Uses auxiliary basis representation optimized for Hartree-Fock-Slater calculations. + Args: + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). - Args: - mol (pyscf Mole): pyscf Mole object. - *_: Unused positional arguments (for interface compatibility). + Returns: + numpy ndarray: 2D effective Hamiltonian matrix from LB2020-HFS model in AO basis. + """ + return LB20(parameters='HFS').Heff(mol) - Returns: - numpy ndarray: 2D effective Hamiltonian matrix from LB2020-HFS model in AO basis. - """ - return LB20(parameters='HFS').Heff(mol) def solveF(mol, fock): - """Solves generalized eigenvalue problem FC = SCε for the Fock/Hamiltonian matrix. + """Solves generalized eigenvalue problem FC = SCε for the Fock/Hamiltonian matrix. - Args: - mol (pyscf Mole): pyscf Mole object. - fock (numpy ndarray): 2D Fock or Hamiltonian matrix in AO basis. + Args: + mol (pyscf Mole): pyscf Mole object. + fock (numpy ndarray): 2D Fock or Hamiltonian matrix in AO basis. - Returns: - tuple: (eigenvalues, eigenvectors) where: - - eigenvalues: 1D array of orbital energies - - eigenvectors: 2D array of MO coefficients (columns are MOs) - """ - s1e = mol.intor_symmetric('int1e_ovlp') - return scipy.linalg.eigh(fock, s1e) + Returns: + tuple: (eigenvalues, eigenvectors) where: + - eigenvalues: 1D array of orbital energies + - eigenvectors: 2D array of MO coefficients (columns are MOs) + """ + s1e = mol.intor_symmetric('int1e_ovlp') + return scipy.linalg.eigh(fock, s1e) def get_guess(arg): - """Returns guess Hamiltonian function by name. - - Args: - arg (str): Guess method name. Available options: - - 'core': Core Hamiltonian (H_core) - - 'sad': Superposition of Atomic Densities - - 'sap': Superposition of Atomic Potentials - - 'gwh': Generalized Wolfsberg-Helmholtz - - 'lb': Laikov-Briling 2020 (HF parameters) - - 'lb-hfs': Laikov-Briling 2020 (HFS parameters) - - 'huckel': Extended Hückel method - - Returns: - callable: Guess Hamiltonian function with signature f(mol, xc) -> numpy.ndarray. - - Raises: - RuntimeError: If the specified guess method is not available. - """ - arg = arg.lower() - if arg not in guesses_dict: - raise RuntimeError(f'Unknown guess. Available guesses: {list(guesses_dict.keys())}') - return guesses_dict[arg] + """Returns guess Hamiltonian function by name. + + Args: + arg (str): Guess method name. Available options: + - 'core': Core Hamiltonian (H_core) + - 'sad': Superposition of Atomic Densities + - 'sap': Superposition of Atomic Potentials + - 'gwh': Generalized Wolfsberg-Helmholtz + - 'lb': Laikov-Briling 2020 (HF parameters) + - 'lb-hfs': Laikov-Briling 2020 (HFS parameters) + - 'huckel': Extended Hückel method + + Returns: + callable: Guess Hamiltonian function with signature f(mol, xc) -> numpy.ndarray. + + Raises: + RuntimeError: If the specified guess method is not available. + """ + arg = arg.lower() + if arg not in guesses_dict: + raise RuntimeError(f'Unknown guess. Available guesses: {list(guesses_dict.keys())}') + return guesses_dict[arg] def check_nelec(nelec, nao): @@ -205,29 +211,29 @@ def get_occ(e, nelec, spin): def get_dm(v, nelec, spin): - """Constructs density matrix from occupied molecular orbitals. - - Args: - v (numpy ndarray): 2D array of MO coefficients (eigenvectors), columns are MOs. - nelec (tuple): Number of (alpha, beta) electrons. - spin (int or None): Spin multiplicity. If None, assumes closed-shell (RHF). - - Returns: - numpy ndarray: Density matrix in AO basis. - - Closed-shell: 2D array (nao, nao) - - Open-shell: 3D array (2, nao, nao) for alpha and beta - """ - - check_nelec(nelec, len(v)) - if spin is None: - nocc = nelec[0] - dm = v[:,:nocc] @ v[:,:nocc].T - return 2.0*dm - else: - nocc = nelec - dm0 = v[:,:nocc[0]] @ v[:,:nocc[0]].T - dm1 = v[:,:nocc[1]] @ v[:,:nocc[1]].T - return np.array((dm0,dm1)) + """Constructs density matrix from occupied molecular orbitals. + + Args: + v (numpy ndarray): 2D array of MO coefficients (eigenvectors), columns are MOs. + nelec (tuple): Number of (alpha, beta) electrons. + spin (int or None): Spin multiplicity. If None, assumes closed-shell (RHF). + + Returns: + numpy ndarray: Density matrix in AO basis. + - Closed-shell: 2D array (nao, nao) + - Open-shell: 3D array (2, nao, nao) for alpha and beta + """ + + check_nelec(nelec, len(v)) + if spin is None: + nocc = nelec[0] + dm = v[:,:nocc] @ v[:,:nocc].T + return 2.0*dm + else: + nocc = nelec + dm0 = v[:,:nocc[0]] @ v[:,:nocc[0]].T + dm1 = v[:,:nocc[1]] @ v[:,:nocc[1]].T + return np.array((dm0,dm1)) ############################################################################### diff --git a/tests/test_spahm.py b/tests/test_spahm.py index f405ca3e..6ad7c6ac 100755 --- a/tests/test_spahm.py +++ b/tests/test_spahm.py @@ -6,6 +6,16 @@ from qstack.spahm import compute_spahm +def test_spahm_GWH(): + path = os.path.dirname(os.path.realpath(__file__)) + mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'minao', charge=1, spin=1) + R = compute_spahm.get_spahm_representation(mol, 'gwh') + true_R = np.array([[-33.02835203, -8.92909895, -8.00935971, -7.51145492, -7.32962602], + [-33.02835203, -8.92909895, -8.00935971, -7.51145492, 0. ]]) + assert(R.shape == (2,5)) + assert(np.allclose(R, true_R)) + + def test_spahm_huckel(): path = os.path.dirname(os.path.realpath(__file__)) mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'def2svp', charge=0, spin=0) @@ -13,7 +23,7 @@ def test_spahm_huckel(): true_R = np.array([[-20.78722617, -1.29750913, -0.51773954, -0.4322361 , -0.40740531], [-20.78722617, -1.29750913, -0.51773954, -0.4322361 , -0.40740531]]) assert(R.shape == (2,5)) - assert(np.abs(np.sum(R-true_R)) < 1e-05) + assert(np.allclose(R, true_R)) def test_spahm_LB(): @@ -23,7 +33,7 @@ def test_spahm_LB(): true_R = np.array( [[-18.80209878, -1.28107468, -0.79949967, -0.63587071, -0.57481672], [-18.80209878, -1.28107468, -0.79949967, -0.63587071, 0. ]]) assert(R.shape == (2,5)) - assert(np.abs(np.sum(R-true_R)) < 1e-05) + assert(np.allclose(R, true_R)) def test_spahm_LB_ecp(): @@ -68,6 +78,7 @@ def test_generate_reps(): if __name__ == '__main__': + test_spahm_GWH() test_spahm_huckel() test_spahm_LB() test_spahm_LB_ecp() From 9530f5b59d5d185756889a0ae70be10065cd261b Mon Sep 17 00:00:00 2001 From: Ksenia Date: Fri, 7 Nov 2025 15:30:42 +0100 Subject: [PATCH 14/23] Update docs --- qstack/basis_opt/__init__.py | 2 + qstack/basis_opt/basis_tools.py | 7 +- qstack/basis_opt/opt.py | 30 +-- qstack/c2mio.py | 12 +- qstack/compound.py | 28 +-- qstack/constants.py | 5 +- qstack/equio.py | 101 ++++----- qstack/fields/__init__.py | 4 + qstack/fields/decomposition.py | 101 ++++++--- qstack/fields/density2file.py | 5 +- qstack/fields/dm.py | 11 +- qstack/fields/dori.py | 123 +++++------ qstack/fields/excited.py | 69 +++--- qstack/fields/hf_otpd.py | 6 +- qstack/fields/hirshfeld.py | 19 +- qstack/fields/moments.py | 37 ++-- qstack/mathutils/__init__.py | 4 +- qstack/mathutils/array.py | 45 +++- qstack/mathutils/fps.py | 6 +- qstack/mathutils/matrix.py | 2 + qstack/mathutils/rotation_matrix.py | 3 +- qstack/mathutils/wigner.py | 6 +- qstack/mathutils/xyz_integrals_float.py | 6 +- qstack/mathutils/xyz_integrals_sym.py | 6 +- qstack/orcaio.py | 25 ++- qstack/qml/__init__.py | 2 + qstack/qml/b2r2.py | 120 ++++++++--- qstack/qml/slatm.py | 228 +++++++++++++------- qstack/regression/__init__.py | 4 +- qstack/regression/condition.py | 29 +-- qstack/regression/cross_validate_results.py | 42 ++-- qstack/regression/final_error.py | 32 +-- qstack/regression/global_kernels.py | 7 +- qstack/regression/hyperparameters.py | 38 ++-- qstack/regression/kernel.py | 12 +- qstack/regression/kernel_utils.py | 34 ++- qstack/regression/local_kernels.py | 10 +- qstack/regression/oos.py | 27 +-- qstack/regression/parser.py | 18 +- qstack/regression/regression.py | 37 ++-- qstack/reorder.py | 5 +- qstack/spahm/LB2020guess.py | 163 ++++++++++---- qstack/spahm/__init__.py | 3 +- qstack/spahm/compute_spahm.py | 38 ++-- qstack/spahm/guesses.py | 111 +++++----- qstack/spahm/rho/Dmatrix.py | 9 +- qstack/spahm/rho/__init__.py | 1 + qstack/spahm/rho/__main__.py | 3 + qstack/spahm/rho/atom.py | 2 + qstack/spahm/rho/atomic_density.py | 6 +- qstack/spahm/rho/bond.py | 2 + qstack/spahm/rho/bond_selected.py | 5 +- qstack/spahm/rho/compute_rho_spahm.py | 24 ++- qstack/spahm/rho/dmb_rep_atom.py | 81 +++++-- qstack/spahm/rho/dmb_rep_bond.py | 141 ++++++++++-- qstack/spahm/rho/lowdin.py | 4 +- qstack/spahm/rho/parser.py | 2 + qstack/spahm/rho/sym.py | 106 ++++++++- qstack/spahm/rho/utils.py | 105 ++++++--- qstack/tools.py | 5 +- tests/test_compound.py | 2 +- tests/test_excited.py | 16 +- tests/test_fitting.py | 6 +- tests/test_moments.py | 0 64 files changed, 1392 insertions(+), 751 deletions(-) mode change 100644 => 100755 tests/test_moments.py diff --git a/qstack/basis_opt/__init__.py b/qstack/basis_opt/__init__.py index 7f71607c..85a9221e 100644 --- a/qstack/basis_opt/__init__.py +++ b/qstack/basis_opt/__init__.py @@ -1,2 +1,4 @@ +"""Basis set optimization module.""" + from . import opt from . import basis_tools diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py index bcb80208..8afb57e6 100644 --- a/qstack/basis_opt/basis_tools.py +++ b/qstack/basis_opt/basis_tools.py @@ -1,3 +1,5 @@ +"""Utility functions for basis set manipulation.""" + import copy import numpy as np from pyscf import df, dft @@ -39,10 +41,9 @@ def gradient_mol(nexp, newbasis, moldata): Returns: tuple: A tuple containing: - - E (float): Loss function value. - - dE_da (numpy.ndarray): Gradient of loss function with respect to exponents. + - E (float): Loss function value. + - dE_da (numpy.ndarray): Gradient of loss function with respect to exponents. """ - mol = moldata['mol' ] rho = moldata['rho' ] coords = moldata['coords' ] diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index 2e097dc0..3cfd9cbc 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -1,4 +1,7 @@ +"""Basis set optimization routines and command-line interface.""" + import sys +import argparse from ast import literal_eval import numpy as np import scipy.optimize @@ -24,8 +27,6 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in= Dictionary containing the optimized basis. """ - - def energy(x): """Compute total loss function (fitting error) for given exponents. @@ -42,6 +43,7 @@ def energy(x): E += qbbt.energy_mol(newbasis, m) return E + def gradient(x): """Compute total loss function (fitting error) and gradient for given exponents. @@ -50,8 +52,8 @@ def gradient(x): Returns: tuple: A tuple containing: - - E (float): Loss function value. - - dE_dx (numpy.ndarray): Gradient with respect to log(exponents). + - E (float): Loss function value. + - dE_dx (numpy.ndarray): Gradient with respect to log(exponents). """ exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) @@ -135,18 +137,18 @@ def make_moldata(fname): Args: fname (str or dict): Path to .npz file or dictionary containing molecular structure, - grid coordinates and weights, and reference density evaluated on it. + grid coordinates and weights, and reference density evaluated on it. Returns: dict: Dictionary containing: - mol (pyscf Mole): pyscf Mole object. - rho (numpy.ndarray): Reference density values on the grid. - coords (numpy.ndarray): Grid coordinates. - weights (numpy.ndarray): Grid weights. - self (float): Integral of the squared reference density. - idx (numpy.ndarray): Basis function indices for each AO. - centers (list): Atomic center indices for each AO. - distances (numpy.ndarray): Squared distances from each atom to each grid point. + mol (pyscf Mole): pyscf Mole object. + rho (numpy.ndarray): Reference density values on the grid. + coords (numpy.ndarray): Grid coordinates. + weights (numpy.ndarray): Grid weights. + self (float): Integral of the squared reference density. + idx (numpy.ndarray): Basis function indices for each AO. + centers (list): Atomic center indices for each AO. + distances (numpy.ndarray): Squared distances from each atom to each grid point. """ if isinstance(fname, str): rho_data = np.load(fname) @@ -232,8 +234,6 @@ def make_moldata(fname): def main(): """Main function for basis set optimization command-line interface.""" - import argparse - parser = argparse.ArgumentParser(description='Optimize a density fitting basis set.') parser.add_argument('--elements', type=str, dest='elements', nargs='+', help='elements for optimization') parser.add_argument('--basis', type=str, dest='basis', nargs='+', help='initial df bases', required=True) diff --git a/qstack/c2mio.py b/qstack/c2mio.py index 78df4bd4..1c0dbc5b 100644 --- a/qstack/c2mio.py +++ b/qstack/c2mio.py @@ -1,3 +1,5 @@ +"""Converter from cell2mol Cell objects to PySCF Mole.""" + import sys import os import io @@ -14,10 +16,10 @@ def get_cell2mol_xyz(mol): Returns: tuple: A tuple containing: - - xyz (str): XYZ coordinate string. - - charge (int): Total charge of the molecule. - - spin (int): Number of unpaired electrons of the molecule (multiplicity - 1) - for molecules and None for ligands. + - xyz (str): XYZ coordinate string. + - charge (int): Total charge of the molecule. + - spin (int): Number of unpaired electrons of the molecule (multiplicity - 1) + for molecules and None for ligands. """ f = io.StringIO() sys.stdout, stdout = f, sys.stdout @@ -37,7 +39,7 @@ def get_cell(fpath, workdir='.'): Args: fpath (str): Path to the input file (.cell or .cif). workdir (str): Directory to read / write .cell file and logs if a .cif file - is provided. Defaults to '.'. + is provided. Defaults to '.'. Returns: cell2mol.unitcell: Unit cell object. diff --git a/qstack/compound.py b/qstack/compound.py index d061680d..a0ca482f 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -1,3 +1,5 @@ +"""Molecular structure parsing and manipulation.""" + import json import re import warnings @@ -5,7 +7,7 @@ from pyscf import gto, data from qstack import constants from qstack.reorder import get_mrange -from qstack.mathutils.array import vstack_padding +from qstack.mathutils.array import stack_padding from qstack.mathutils.rotation_matrix import rotate_euler @@ -22,6 +24,7 @@ # matches a floating-point number in any format python reads _re_float = re.compile(r'[+-]?[0-9]*?([0-9]\.|\.[0-9]|[0-9])[0-9]*?([eEdD][+-]?[0-9]+)?') + def xyz_comment_line_parser(line): """Reads the 'comment' line of a XYZ file and tries to infer its meaning. @@ -105,7 +108,6 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit Raises: RuntimeError: If units are not recognized or if minao basis requires ECP for heavy atoms. """ - if '\n' in inp: molxyz = gto.fromstring(inp) else: @@ -180,7 +182,6 @@ def mol_to_xyz(mol, fout, fmt="xyz"): Raises: NotImplementedError: If fmt is not "xyz". """ - fmt = fmt.lower() output = [] if fmt == "xyz": @@ -267,7 +268,6 @@ def fragment_partitioning(fragments, prop_atom_inp, normalize=True): Returns: list or numpy.ndarray: Contribution of each fragment. Returns list if input was list, array otherwise. """ - props_atom = prop_atom_inp if type(prop_atom_inp) is list else [prop_atom_inp] props_frag = [] @@ -312,9 +312,9 @@ def singleatom_basis_enumerator(basis): Returns: tuple: A tuple containing: - - l_per_bas (list): Angular momentum quantum number l for each basis function. - - n_per_bas (list): Radial function counter n (starting at 0) for each basis function. - - ao_starts (list): Starting index in AO array for each basis function. + - l_per_bas (list): Angular momentum quantum number l for each basis function. + - n_per_bas (list): Radial function counter n (starting at 0) for each basis function. + - ao_starts (list): Starting index in AO array for each basis function. """ ao_starts = [] l_per_bas = [] @@ -350,14 +350,14 @@ def basis_flatten(mol, return_both=True): Returns: - numpy.ndarray: 3×mol.nao int array where each column corresponds to an AO and rows are: - - 0: atom index - - 1: angular momentum quantum number l - - 2: magnetic quantum number m + - 0: atom index + - 1: angular momentum quantum number l + - 2: magnetic quantum number m If return_both is True, also returns: - numpy.ndarray: 2×mol.nao×max_n float array where index (i,j,k) means: - - i: 0 for exponent, 1 for contraction coefficient of a primitive Gaussian - - j: AO index - - k: radial function index (padded with zeros if necessary) + - i: 0 for exponent, 1 for contraction coefficient of a primitive Gaussian + - j: AO index + - k: radial function index (padded with zeros if necessary) """ x = [] y = np.zeros((3, mol.nao), dtype=int) @@ -377,7 +377,7 @@ def basis_flatten(mol, return_both=True): y[2,i:i+msize*n] = [*get_mrange(l)]*n i += msize*n if return_both: - x = vstack_padding(x).transpose((1,0,2)) + x = stack_padding(x).transpose((1,0,2)) return y, x else: return y diff --git a/qstack/constants.py b/qstack/constants.py index 74b0c6e4..b5d7176d 100644 --- a/qstack/constants.py +++ b/qstack/constants.py @@ -1,10 +1,9 @@ -''' +""" NIST physical constants and unit conversion https://physics.nist.gov/cuu/Constants/ https://physics.nist.gov/cuu/Constants/Table/allascii.txt -''' - +""" # Constants SPEED_LIGHT = 299792458.0 PLANCK = 6.62607004e-34 diff --git a/qstack/equio.py b/qstack/equio.py index 973addfd..e56a1372 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -1,11 +1,15 @@ +"""Equilibrium geometry and molecular structure I/O utilities.""" + +import itertools from functools import reduce -import numpy as np from types import SimpleNamespace +import numpy as np from pyscf import data import metatensor from qstack.reorder import get_mrange from qstack.compound import singleatom_basis_enumerator + vector_label_names = SimpleNamespace( tm = ['o3_lambda', 'center_type'], block_prop = ['radial_channel'], @@ -48,8 +52,9 @@ def _get_tsize(tensor): """ return sum([np.prod(tensor.block(key).values.shape) for key in tensor.keys]) + def _labels_to_array(labels): - """Represents a set of metatensor labels as an array of the labels, using custom dtypes. + """Represents a set of metatensor labels as an array. Args: labels (metatensor.Labels): Labels object. @@ -58,22 +63,23 @@ def _labels_to_array(labels): numpy.ndarray: 1D structured array containing the same labels. """ values = labels.values - dtype = [ (name,values.dtype) for name in labels.names] + dtype = [(name, values.dtype) for name in labels.names] return values.view(dtype=dtype).reshape(values.shape[0]) + def vector_to_tensormap(mol, c): - """Transform a vector into a tensor map. Used by :py:func:`array_to_tensormap`. + """Transforms an vector into a tensor map. + + Each element of the vector corresponds to an atomic orbital of the molecule. Args: mol (pyscf.gto.Mole): pyscf Mole object. - c (numpy.ndarray): Vector to transform. + c (numpy.ndarray): vector to transform. Returns: metatensor.TensorMap: Tensor map representation of the vector. """ - - atom_charges = list(mol.atom_charges()) - elements = sorted(set(atom_charges)) + atom_charges = mol.atom_charges() tm_label_vals = [] block_prop_label_vals = {} @@ -87,18 +93,18 @@ def vector_to_tensormap(mol, c): llists = _get_llist(mol) - for q in elements: + for q, samples_count in zip(*np.unique(atom_charges, return_counts=True), strict=True): llist = llists[q] + block_samp_label_vals_q = np.where(atom_charges==q)[0].reshape(-1,1) for l in sorted(set(llist)): label = (l, q) tm_label_vals.append(label) - samples_count = atom_charges.count(q) components_count = 2*l+1 properties_count = llist.count(l) blocks[label] = np.zeros((samples_count, components_count, properties_count)) block_comp_label_vals[label] = np.arange(-l, l+1).reshape(-1,1) block_prop_label_vals[label] = np.arange(properties_count).reshape(-1,1) - block_samp_label_vals[label] = np.where(atom_charges==q)[0].reshape(-1,1) + block_samp_label_vals[label] = block_samp_label_vals_q tm_labels = metatensor.Labels(vector_label_names.tm, np.array(tm_label_vals)) @@ -108,7 +114,7 @@ def vector_to_tensormap(mol, c): # Fill in the blocks - iq = dict.fromkeys(elements, 0) + iq = dict.fromkeys(llists.keys(), 0) i = 0 for q in atom_charges: if llists[q]==sorted(llists[q]): @@ -141,11 +147,11 @@ def vector_to_tensormap(mol, c): def tensormap_to_vector(mol, tensor): - """Transform a tensor map into a vector. Used by :py:func:`tensormap_to_array`. + """Transform a tensor map into a vector. Args: mol (pyscf.gto.Mole): pyscf Mole object. - tensor (metatensor.TensorMap): Tensor to transform. + tensor (metatensor.TensorMap): tensor to transform. Returns: numpy.ndarray: 1D array (vector) representation. @@ -153,7 +159,6 @@ def tensormap_to_vector(mol, tensor): Raises: RuntimeError: If tensor size does not match mol.nao. """ - nao = _get_tsize(tensor) if mol.nao != nao: raise RuntimeError(f'Tensor size mismatch ({nao} instead of {mol.nao})') @@ -178,62 +183,51 @@ def tensormap_to_vector(mol, tensor): def matrix_to_tensormap(mol, dm): - """Transform a matrix into a tensor map. Used by :py:func:`array_to_tensormap`. + """Transforms a matrix into a tensor map. + + Each element of the matrix corresponds to a pair of atomic orbitals. Args: mol (pyscf.gto.Mole): pyscf Mole object. - dm (numpy.ndarray): Matrix to transform. + dm (numpy.ndarray): matrix to transform. Returns: metatensor.TensorMap: Tensor map representation of the matrix. """ - - def pairs(list1, list2): - """Generate all pairs from two lists. - - Args: - list1 (list): First list. - list2 (list): Second list. - - Returns: - numpy.ndarray: Array of all (i,j) pairs. - """ - return np.array([(i,j) for i in list1 for j in list2]) - - atom_charges = list(mol.atom_charges()) - elements = sorted(set(atom_charges)) + atom_charges = mol.atom_charges() + elements, counts = np.unique(atom_charges, return_counts=True) + counts = dict(zip(elements, counts, strict=True)) + element_indices = {q: np.where(atom_charges==q)[0] for q in elements} + llists = _get_llist(mol) tm_label_vals = [] block_prop_label_vals = {} block_samp_label_vals = {} block_comp_label_vals = {} - blocks = {} - llists = _get_llist(mol) # Create labels for TensorMap, lables for blocks, and empty blocks for q1 in elements: for q2 in elements: + samples_count1 = counts[q1] + samples_count2 = counts[q2] llist1 = llists[q1] llist2 = llists[q2] + block_samp_label_vals_q1q2 = np.array([*itertools.product(element_indices[q1], element_indices[q2])]) for l1 in sorted(set(llist1)): + components_count1 = 2*l1+1 + properties_count1 = llist1.count(l1) for l2 in sorted(set(llist2)): - label = (l1, l2, q1, q2) - tm_label_vals.append(label) - - samples_count1 = atom_charges.count(q1) - components_count1 = 2*l1+1 - properties_count1 = llist1.count(l1) - - samples_count2 = atom_charges.count(q2) components_count2 = 2*l2+1 properties_count2 = llist2.count(l2) + label = (l1, l2, q1, q2) + tm_label_vals.append(label) blocks[label] = np.zeros((samples_count1*samples_count2, components_count1, components_count2, properties_count1*properties_count2)) block_comp_label_vals[label] = (np.arange(-l1, l1+1).reshape(-1,1), np.arange(-l2, l2+1).reshape(-1,1)) - block_prop_label_vals[label] = pairs(np.arange(properties_count1), np.arange(properties_count2)) - block_samp_label_vals[label] = pairs(np.where(atom_charges==q1)[0],np.where(atom_charges==q2)[0]) + block_prop_label_vals[label] = np.array([*itertools.product(np.arange(properties_count1), np.arange(properties_count2))]) + block_samp_label_vals[label] = block_samp_label_vals_q1q2 tm_labels = metatensor.Labels(matrix_label_names.tm, np.array(tm_label_vals)) @@ -309,11 +303,11 @@ def pairs(list1, list2): def tensormap_to_matrix(mol, tensor): - """Transform a tensor map into a matrix. Used by :py:func:`tensormap_to_array`. + """Transform a tensor map into a matrix. Args: mol (pyscf.gto.Mole): pyscf Mole object. - tensor (metatensor.TensorMap): Tensor to transform. + tensor (metatensor.TensorMap): tensor to transform. Returns: numpy.ndarray: 2D array (matrix) representation. @@ -321,7 +315,6 @@ def tensormap_to_matrix(mol, tensor): Raises: RuntimeError: If tensor size does not match mol.nao * mol.nao. """ - nao2 = _get_tsize(tensor) if mol.nao*mol.nao != nao2: raise RuntimeError(f'Tensor size mismatch ({nao2} instead of {mol.nao*mol.nao})') @@ -335,17 +328,14 @@ def tensormap_to_matrix(mol, tensor): il1 = dict.fromkeys(range(max(llist1) + 1), 0) for l1 in llist1: for m1 in get_mrange(l1): - i2 = 0 for iat2, q2 in enumerate(atom_charges): llist2 = llists[q2] il2 = dict.fromkeys(range(max(llist2) + 1), 0) for l2 in llist2: - block = tensor.block(o3_lambda1=l1, o3_lambda2=l2, center_type1=q1, center_type2=q2) id_samp = block.samples.position((iat1, iat2)) id_prop = block.properties.position((il1[l1], il2[l2])) - for m2 in get_mrange(l2): id_comp1 = block.components[0].position((m1,)) id_comp2 = block.components[1].position((m2,)) @@ -354,12 +344,14 @@ def tensormap_to_matrix(mol, tensor): il2[l2] += 1 i1 += 1 il1[l1] += 1 - return dm + def array_to_tensormap(mol, v): """Transform an array into a tensor map. + Wrapper for vector_to_tensormap and matrix_to_tensormap. + Args: mol (pyscf.gto.Mole): pyscf Mole object. v (numpy.ndarray): Array to transform. Can be a vector (1D) or matrix (2D). @@ -381,6 +373,8 @@ def array_to_tensormap(mol, v): def tensormap_to_array(mol, tensor): """Transform a tensor map into an array. + Wrapper for tensormap_to_vector and tensormap_to_matrix. + Args: mol (pyscf.gto.Mole): pyscf Mole object. tensor (metatensor.TensorMap): Tensor to transform. @@ -391,7 +385,6 @@ def tensormap_to_array(mol, tensor): Raises: RuntimeError: If tensor key names don't match expected format. """ - if tensor.keys.names==vector_label_names.tm: return tensormap_to_vector(mol, tensor) elif tensor.keys.names==matrix_label_names.tm: @@ -412,7 +405,6 @@ def join(tensors): Raises: RuntimeError: If tensors have different label names. """ - if not all(tensor.keys.names==tensors[0].keys.names for tensor in tensors): raise RuntimeError('Cannot merge tensors with different label names') tm_label_vals = set().union(*[set(_labels_to_array(tensor.keys)) for tensor in tensors]) @@ -459,12 +451,11 @@ def split(tensor): Returns: list or dict: Collection of metatensor.TensorMap objects, one per molecule. - Returns list if molecule indices are continuous, dict otherwise. + Returns list if molecule indices are continuous, dict otherwise. Raises: RuntimeError: If tensor does not contain multiple molecules. """ - if tensor.sample_names[0]!=_molid_name: raise RuntimeError('Tensor does not seem to contain several molecules') diff --git a/qstack/fields/__init__.py b/qstack/fields/__init__.py index 78c8a246..4ae39111 100644 --- a/qstack/fields/__init__.py +++ b/qstack/fields/__init__.py @@ -1,3 +1,7 @@ +"""Molecular field analysis module. + +Provides tools for density decomposition, molecular fields, and property computation.""" + from qstack.fields import decomposition from qstack.fields import dm from qstack.fields import hf_otpd diff --git a/qstack/fields/decomposition.py b/qstack/fields/decomposition.py index cbfaf65d..38c91179 100644 --- a/qstack/fields/decomposition.py +++ b/qstack/fields/decomposition.py @@ -1,9 +1,12 @@ +"""Density matrix decomposition onto auxiliary basis sets.""" + import numpy as np import scipy from pyscf import scf from qstack import compound from . import moments + def decompose(mol, dm, auxbasis): """Fit molecular density onto an atom-centered basis. @@ -13,57 +16,60 @@ def decompose(mol, dm, auxbasis): auxbasis (string / pyscf basis dictionary): Atom-centered basis to decompose on. Returns: - A copy of the pyscf Mole object with the auxbasis basis in a pyscf Mole object, and a 1D numpy array containing the decomposition coefficients. + Tuple containing: + - copy of the pyscf Mole object with the auxbasis basis in a pyscf Mole object, + - 1D numpy array containing the decomposition coefficients. """ - auxmol = compound.make_auxmol(mol, auxbasis) _S, eri2c, eri3c = get_integrals(mol, auxmol) c = get_coeff(dm, eri2c, eri3c) return auxmol, c + def get_integrals(mol, auxmol): - """Computes overlap and 2-/3-centers ERI matrices. + """Computes overlap integrals and 2-/3-centers ERI matrices. Args: mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix. - auxmol (pyscf Mole): pyscf Mole object holding molecular structure, composition and the auxiliary basis set. + auxmol (pyscf Mole): pyscf Mole object of the same molecule with an auxiliary basis set. Returns: - Three numpy ndarray containing: the overlap matrix, the 2-centers ERI matrix, and the 3-centers ERI matrix respectively. + Tuple of three numpy ndarray containing: + - overlap matrix (auxmol.nao,auxmol.nao) for the auxiliary basis, + - 2-centers ERI matrix (auxmol.nao,auxmol.nao) for the auxiliary basis, + - 3-centers ERI matrix (mol.nao,mol.nao,auxmol.nao) between AO and auxiliary basis. """ - - # Get overlap integral in the auxiliary basis S = auxmol.intor('int1e_ovlp_sph') - - # Concatenate standard and auxiliary basis set into a pmol object pmol = mol + auxmol - - # Compute 2- and 3-centers ERI integrals using the concatenated mol object eri2c = auxmol.intor('int2c2e_sph') eri3c = pmol.intor('int3c2e_sph', shls_slice=(0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas)) eri3c = eri3c.reshape(mol.nao_nr(), mol.nao_nr(), -1) - return S, eri2c, eri3c -def get_self_repulsion(mol, dm): - """Computes the Einstein summation of the Coulumb matrix and the density matrix. + +def get_self_repulsion(mol_or_mf, dm): + r"""Computes the self-repulsion of the density. + + \int \int \rho_DM(r1) 1/|r1-r2| \rho_DM(r2) dr1 dr2 Args: - mol (pyscf Mole): pyscf Mole object. - dm (numpy ndarray): Density matrix. + mol_or_mf (pyscf Mole or SCF): pyscf Mole or Mean Field object. + dm (2D numpy ndarray): Density matrix. Returns: - A nummpy ndarray result of the Einstein summation of the J matrix and the Density matrix. + float: Self-repulsion energy (a.u). """ - try: - j, _k = mol.get_jk() + j, _k = mol_or_mf.get_jk() except AttributeError: - j, _k = scf.hf.get_jk(mol, dm) + j, _k = scf.hf.get_jk(mol_or_mf, dm) return np.einsum('ij,ij', j, dm) -def decomposition_error(self_repulsion, c, eri2c): - """Computes the decomposition error for density fitting. + +def optimal_decomposition_error(self_repulsion, c, eri2c): + r"""Computes the decomposition error for optimal density fitting. + + \int \int \rho_DM(r1) 1/|r1-r2| \rho_DF(r2) dr1 dr2 Args: self_repulsion (float): Self-repulsion energy from the original density matrix. @@ -72,9 +78,37 @@ def decomposition_error(self_repulsion, c, eri2c): Returns: float: The decomposition error. + + Notes: + - It is assumed that `c` are the optimal coefficients obtained from the density matrix. + - `self_repulsion` can be set to 0 to avoid expensive computations when only the relative error is needed. """ return self_repulsion - c @ eri2c @ c + +def decomposition_error(self_repulsion, c, eri2c, eri3c, dm): + r"""Computes the decomposition error for optimal density fitting. + + \int \int \rho_DM(r1) 1/|r1-r2| \rho_DF(r2) dr1 dr2 + + Args: + self_repulsion (float): Self-repulsion energy from the original density matrix. + c (numpy ndarray): 1D array of density expansion coefficients. + eri2c (numpy ndarray): 2D array of 2-center ERIs. + eri3c (numpy ndarray): 3D array of 3-center ERIs. + dm (numpy ndarray): Density matrix. + + Returns: + float: The decomposition error. + + Notes: + - If `c` are the optimal coefficients obtained from the density matrix, `optimal_decomposition_error()` can be used instead. + - `self_repulsion` can be set to 0 to avoid expensive computations when only the relative error is needed. + """ + projection = np.einsum('ijp,ij->p', eri3c, dm) + return self_repulsion + c @ eri2c @ c - 2.0 * c @ projection + + def get_coeff(dm, eri2c, eri3c, slices=None): """Computes the density expansion coefficients. @@ -82,12 +116,14 @@ def get_coeff(dm, eri2c, eri3c, slices=None): dm (numpy ndarray): Density matrix. eri2c (numpy ndarray): 2-centers ERI matrix. eri3c (numpy ndarray): 3-centers ERI matrix. - slices (optional numpy ndarray): assume that eri2c is bloc-diagonal, by giving the boundaries of said blocks + slices (optional numpy ndarray): Assume that eri2c is bloc-diagonal, by giving the boundaries of said blocks. Returns: A numpy ndarray containing the expansion coefficients of the density onto the auxiliary basis. - """ + Raises: + RuntimeError: If the `slices` argument is incorrectly formatted or inconsistent with the auxiliary basis size. + """ # Compute the projection of the density onto auxiliary basis using a Coulomb metric projection = np.einsum('ijp,ij->p', eri3c, dm) @@ -105,16 +141,17 @@ def get_coeff(dm, eri2c, eri3c, slices=None): return c + def _get_inv_metric(mol, metric, v): """Computes the inverse metric applied to a vector. Args: - mol (pyscf Mole): pyscf Mole object. - metric (str or numpy ndarray): Metric type ('unit', 'overlap', 'coulomb') or a metric matrix. - v (numpy ndarray): Vector to apply the inverse metric to. + mol (pyscf Mole): pyscf Mole object. + metric (str or numpy ndarray): Metric type ('unit', 'overlap', 'coulomb') or a metric matrix. + v (numpy ndarray): Vector to apply the inverse metric to. Returns: - numpy ndarray: Result of applying the inverse metric to the input vector. + numpy ndarray: Result of applying the inverse metric to the input vector. """ if isinstance(metric, str): metric = metric.lower() @@ -130,7 +167,7 @@ def _get_inv_metric(mol, metric, v): def correct_N_atomic(mol, N, c0, metric='u'): - """Corrects decomposition coefficients to match the target electron count per atom. + """Corrects decomposition coefficients to match the target number of electrons per atom. Uses Lagrange multipliers to enforce the correct number of electrons per atom while minimizing changes to the decomposition coefficients. @@ -144,7 +181,6 @@ def correct_N_atomic(mol, N, c0, metric='u'): Returns: numpy ndarray: Corrected decomposition coefficients (1D array). """ - Q = moments.r2_c(mol, None, moments=[0], per_atom=True)[0] N0 = c0 @ Q O1q = _get_inv_metric(mol, metric, Q) @@ -154,19 +190,18 @@ def correct_N_atomic(mol, N, c0, metric='u'): def correct_N(mol, c0, N=None, mode='Lagrange', metric='u'): - """Corrects decomposition coefficients to match the target total electron count. + """Corrects decomposition coefficients to match the target total number of electrons. Args: mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix. c0 (numpy ndarray): 1D array of initial decomposition coefficients. N (int, optional): Target number of electrons. If None, uses mol.nelectron. Defaults to None. - mode (str): Correction method ('scale' or 'lagrange'). Defaults to 'Lagrange'. + mode (str): Correction method ('scale' or 'Lagrange'). Defaults to 'Lagrange'. metric (str): Metric type for Lagrange correction ('u', 's', or 'j'). Defaults to 'u'. Returns: numpy ndarray: Corrected decomposition coefficients (1D array). """ - mode = mode.lower() q = moments.r2_c(mol, None, moments=[0]) N0 = c0 @ q diff --git a/qstack/fields/density2file.py b/qstack/fields/density2file.py index b695f9a2..e6a72938 100644 --- a/qstack/fields/density2file.py +++ b/qstack/fields/density2file.py @@ -1,9 +1,12 @@ +"""Density field output to file formats (cube, etc.).""" + import numpy as np from pyscf.dft.numint import eval_ao from pyscf.tools.cubegen import Cube import pyscf.tools.molden from . import moments + def coeffs_to_cube(mol, coeffs, cubename, nx=80, ny=80, nz=80, resolution=0.1, margin=3.0): """Saves the electron density to a cube file. @@ -20,7 +23,6 @@ def coeffs_to_cube(mol, coeffs, cubename, nx=80, ny=80, nz=80, resolution=0.1, m Returns: None: Creates a file named .cube on disk. """ - grid = Cube(mol, nx, ny, nz, resolution, margin) coords = grid.get_coords() ao = eval_ao(mol, coords) @@ -40,7 +42,6 @@ def coeffs_to_molden(mol, coeffs, moldenname): Returns: None: Creates a file named .molden on disk. """ - with open(moldenname, 'w') as f: N = moments.r2_c(mol, coeffs, moments=[0])[0] pyscf.tools.molden.header(mol, f, True) diff --git a/qstack/fields/dm.py b/qstack/fields/dm.py index f91756c4..a79575ef 100644 --- a/qstack/fields/dm.py +++ b/qstack/fields/dm.py @@ -1,3 +1,5 @@ +"""Density matrix manipulation and analysis functions.""" + from pyscf import dft from qstack import constants import numpy as np @@ -14,10 +16,9 @@ def get_converged_mf(mol, xc, dm0=None, verbose=False): Returns: tuple: A tuple containing: - - mf (pyscf.dft.rks.RKS or pyscf.dft.uks.UKS): Converged mean-field object. - - dm (numpy ndarray): Converged density matrix in AO-basis. + - mf (pyscf.dft.rks.RKS or pyscf.dft.uks.UKS): Converged mean-field object. + - dm (numpy ndarray): Converged density matrix in AO-basis. """ - if mol.multiplicity == 1: mf = dft.RKS(mol) else: @@ -48,7 +49,6 @@ def get_converged_dm(mol, xc, verbose=False): Returns: A numpy ndarray containing the density matrix in AO-basis. """ - return get_converged_mf(mol, xc, dm0=None, verbose=verbose)[1] @@ -62,12 +62,12 @@ def make_grid_for_rho(mol, grid_level=3): Returns: pyscf Grid object. """ - grid = dft.gen_grid.Grids(mol) grid.level = grid_level grid.build() return grid + def sphericalize_density_matrix(mol, dm): """Sphericalize the density matrix in the sense of an integral over all possible rotations. @@ -78,7 +78,6 @@ def sphericalize_density_matrix(mol, dm): Returns: A numpy ndarray with the sphericalized density matrix. """ - idx_by_l = [[] for i in range(constants.MAX_L)] i0 = 0 for ib in range(mol.nbas): diff --git a/qstack/fields/dori.py b/qstack/fields/dori.py index ca679321..a15d96bd 100644 --- a/qstack/fields/dori.py +++ b/qstack/fields/dori.py @@ -1,32 +1,33 @@ +"""Density Overlap Regions Indicator (DORI) computation.""" + import numpy as np +from tqdm import trange from pyscf.dft.numint import eval_ao, _dot_ao_dm, _contract_rho from pyscf.tools.cubegen import Cube, RESOLUTION, BOX_MARGIN from .dm import make_grid_for_rho -from tqdm import tqdm def eval_rho_dm(mol, ao, dm, deriv=2): - r'''Calculate electron density and its derivatives from a density matrix. + """Calculates electron density and its derivatives from a density matrix. Modified from pyscf/dft/numint.py to return full second derivative matrices needed for DORI calculations. Args: mol (pyscf.gto.Mole): pyscf Mole object. - ao (numpy ndarray): 3D array of shape (nderiv, ngrids, nao) where: - - ao[0]: atomic orbital values on the grid - - ao[1:4]: first derivatives (if deriv>=1) - - ao[4:10]: second derivatives in triangular form (if deriv==2) - dm (numpy ndarray): 2D array (nao, nao) - Hermitian density matrix in AO basis. + ao (numpy ndarray): 3D array of shape (*, ngrids, nao) where + - ao[0]: atomic orbital values on the grid, + - ao[1:4]: first derivatives (if deriv>=1), + - ao[4:10]: second derivatives in upper triangular form (if deriv==2). + dm (numpy ndarray): Density matrix in AO basis. deriv (int): Maximum derivative order to compute (0, 1, or 2). Defaults to 2. Returns: tuple: Depending on deriv value: - - deriv=0: rho (1D array of size ngrids) - - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) - - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) - ''' - + - deriv=0: rho as (ngrids,) numpy ndarray, + - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) numpy ndarray, + - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) numpy ndarray. + """ AO, dAO_dr, d2AO_dr2 = np.split(ao, [1,4]) DM_AO = _dot_ao_dm(mol, AO[0], dm, None, None, None) rho = _contract_rho(AO[0], DM_AO) @@ -52,26 +53,22 @@ def eval_rho_dm(mol, ao, dm, deriv=2): def eval_rho_df(ao, c, deriv=2): - r'''Calculate electron density and its derivatives from density-fitting coefficients. - - Computes density and derivatives directly from fitted/decomposed density - representation using expansion coefficients. + """Calculates electron density and its derivatives from density-fitting coefficients. Args: - ao (numpy ndarray): 3D array of shape (nderiv, ngrids, nao) where: - - ao[0]: atomic orbital values on the grid - - ao[1:4]: first derivatives (if deriv>=1) - - ao[4:10]: second derivatives in triangular form (if deriv==2) + ao (numpy ndarray): 3D array of shape (*, ngrids, nao) where: + - ao[0]: atomic orbital values on the grid, + - ao[1:4]: first derivatives (if deriv>=1), + - ao[4:10]: second derivatives in upper triangular form (if deriv==2). c (numpy ndarray): 1D array of density fitting/expansion coefficients. deriv (int): Maximum derivative order to compute (0, 1, or 2). Defaults to 2. Returns: tuple: Depending on deriv value: - - deriv=0: rho (1D array of size ngrids) - - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) - - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) - ''' - + - deriv=0: rho as (ngrids,) numpy ndarray, + - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) numpy ndarray, + - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) numpy ndarray. + """ maxdim = 1 if deriv==0 else (4 if deriv==1 else 10) rho_all = np.tensordot(ao[:maxdim], c, 1) # corresponds to np.einsum('xip,p->xi', ao[:maxdim], c) if deriv==0: @@ -86,7 +83,7 @@ def eval_rho_df(ao, c, deriv=2): def compute_rho(mol, coords, dm=None, c=None, deriv=2, eps=1e-4): - r'''Wrapper to calculate electron density and derivatives efficiently. + """Wrapper to calculate electron density and derivatives efficiently. Computes density and its spatial derivatives on a grid from either a density matrix or fitting coefficients, with optimizations for numerical stability. @@ -101,13 +98,13 @@ def compute_rho(mol, coords, dm=None, c=None, deriv=2, eps=1e-4): Returns: tuple: Depending on deriv value: - - deriv=0: rho (1D array) - - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) - - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) + - deriv=0: rho as (ngrids,) numpy ndarray, + - deriv=1: (rho, drho_dr) where drho_dr is (3, ngrids) numpy ndarray, + - deriv=2: (rho, drho_dr, d2rho_dr2) where d2rho_dr2 is (3, 3, ngrids) numpy ndarray. Raises: RuntimeError: If both or neither of dm and c are provided. - ''' + """ if (c is None)==(dm is None): raise RuntimeError('Use either density matrix (dm) or density fitting coefficients (c)') if dm is not None: @@ -156,10 +153,7 @@ def compute_s2rho(rho, d2rho_dr2, eps=1e-4): def compute_dori(rho, drho_dr, d2rho_dr2, eps=1e-4): - r"""Computes Density Overlap Regions Indicator (DORI) analytically. - - DORI is a density-based descriptor for identifying covalent bonding regions, - with values close to 1 indicating strong electron sharing (covalent bonds). + """Computes Density Overlap Regions Indicator (DORI) analytically. Args: rho (numpy ndarray): 1D array (ngrids,) of electron density. @@ -169,18 +163,7 @@ def compute_dori(rho, drho_dr, d2rho_dr2, eps=1e-4): Returns: numpy ndarray: 1D array (ngrids,) of DORI values ranging from 0 to 1. - - Reference: - J. Chem. Theory Comput. 2014, 10, 9, 3745–3756 (10.1021/ct500490b) - - Note: - DORI is defined as: - DORI(r) = γ(r) = θ(r) / (1 + θ(r)) - where: - θ = |∇(k²)|² / |k|⁶ - k(r) = ∇ρ(r) / ρ(r) """ - idx = np.where(abs(rho)>=eps)[0] k = drho_dr[...,idx] / rho[idx] k2 = np.einsum('xi,xi->i', k, k) @@ -198,11 +181,11 @@ def compute_dori(rho, drho_dr, d2rho_dr2, eps=1e-4): def compute_dori_num(mol, coords, dm=None, c=None, eps=1e-4, dx=1e-4): - r"""Computes DORI using numerical differentiation (semi-numerical approach). + """Computes DORI using numerical differentiation (semi-numerical approach). Alternative to analytical DORI calculation using finite differences for - derivatives of k². Useful for validation or when analytical gradients - are problematic. + derivatives of k², where k=dρ/dr. Useful for validation or when analytical + gradients are problematic. Args: mol (pyscf.gto.Mole): pyscf Mole object. @@ -216,7 +199,6 @@ def compute_dori_num(mol, coords, dm=None, c=None, eps=1e-4, dx=1e-4): tuple: (dori, rho) where both are 1D arrays (ngrids,) containing DORI values and electron density respectively. """ - def grad_num(func, grid, d, **kwargs): g = np.zeros_like(grid) for j in range(3): @@ -259,23 +241,20 @@ def dori_on_grid(mol, coords, dm=None, c=None, eps=1e-4, alg='analytical', mem=1 c (numpy ndarray, optional): 1D density fitting coefficients. Conflicts with dm. eps (float): Density threshold for DORI calculation. Defaults to 1e-4. alg (str): Algorithm choice: 'analytical' or 'numerical'. Defaults to 'analytical'. - mem (float): Maximum memory in GiB for AO evaluation. Defaults to 1. + mem (float): Maximum memory in GiB for AO evaluation. Defaults to 1 GiB. dx (float): Step size in Bohr for numerical derivatives. Defaults to 1e-4. progress (bool): If True, displays progress bar. Defaults to False. Returns: tuple: (dori, rho, s2rho) where: - - dori: 1D array (ngrids,) of DORI values - - rho: 1D array (ngrids,) of electron density - - s2rho: 1D array (ngrids,) of signed density (None if numerical) + - dori (numpy ndarray): 1D array (ngrids,) of DORI values + - rho (numpy ndarray): 1D array (ngrids,) of electron density + - s2rho (numpy ndarray): 1D array (ngrids,) of signed density (None if numerical) """ - max_size = int(mem * 2**30) # mem * 1 GiB point_size = 10 * mol.nao * np.float64().itemsize # memory needed for 1 grid point dgrid = max_size // point_size - grid_chunks = range(0, len(coords), dgrid) - if progress: - grid_chunks = tqdm(grid_chunks) + grid_chunks = trange(0, len(coords), dgrid, disable=not progress) rho = np.zeros(len(coords)) @@ -308,10 +287,21 @@ def dori(mol, dm=None, c=None, dx=1e-4, mem=1, progress=False): """High-level interface to compute DORI with automatic grid generation and file output. - Computes the Density Overlap Regions Indicator (DORI) for analyzing chemical - bonding. Automatically generates appropriate grids and optionally saves results + Reference: + P. de Silva, C. Corminboeuf, + "Simultaneous visualization of covalent and noncovalent interactions using regions of density overlap", + J. Chem. Theory Comput. 10, 3745–3756 (2014), doi:10.1021/ct500490b. + + Computes the Density Overlap Regions Indicator (DORI). + Automatically generates appropriate grids and optionally saves results to cube files for visualization. + DORI is a density-based descriptor for identifying covalent bonding regions, + with values close to 1 indicating strong electron sharing (covalent bonds). + + DORI(r) = γ(r) = θ(r) / (1 + θ(r)), where: + θ = |∇(k²)|² / |k|⁶, and k(r) = ∇ρ(r) / ρ(r) + Args: mol (pyscf.gto.Mole): pyscf Mole object. dm (numpy ndarray, optional): 2D density matrix in AO basis. Conflicts with c. @@ -332,19 +322,18 @@ def dori(mol, dm=None, c=None, Returns: tuple: (dori, rho, s2rho, coords, weights) containing: - - dori (numpy ndarray): 1D array of DORI values - - rho (numpy ndarray): 1D array of electron density - - s2rho (numpy ndarray): 1D array of signed density (None if numerical) - - coords (numpy ndarray): 2D array (ngrids, 3) of grid coordinates - - weights (numpy ndarray): 1D array of grid weights + - dori (numpy ndarray): 1D array of DORI values + - rho (numpy ndarray): 1D array of electron density + - s2rho (numpy ndarray): 1D array of signed density (None if numerical) + - coords (numpy ndarray): 2D array (ngrids, 3) of grid coordinates + - weights (numpy ndarray): 1D array of grid weights Note: When cubename is provided with cube grid, creates three files: - .dori.cube: DORI values - - .rho.cube: electron density - - .sgnL2rho.cube: signed density (analytical only) + - .rho.cube: electron density + - .sgnL2rho.cube: signed density (analytical only) """ - if grid_type=='dft': grid = make_grid_for_rho(mol, grid_level=grid_level) weights = grid.weights diff --git a/qstack/fields/excited.py b/qstack/fields/excited.py index 67924332..79d43799 100644 --- a/qstack/fields/excited.py +++ b/qstack/fields/excited.py @@ -1,15 +1,18 @@ +"""Excited state density and property analysis.""" + import numpy as np from . import moments + def get_cis(mf, nstates): - """Performs CIS (Configuration Interaction Singles) calculation using TDA (Tamm-Dancoff Approximation). + """Wrapper for CIS (Configuration interaction singles) / TDA (Tamm-Dancoff approximation) computation. Args: - mf: Mean-field object (RHF, UHF, RKS, or UKS). + mf: Pyscf mean-field object. nstates (int): Number of excited states to compute. Returns: - TDA object: Converged TDA calculation object with excited state information. + TDA object: Converged TDA/CIS computation object with excited state information. """ td = mf.TDA() td.nstates = nstates @@ -18,29 +21,30 @@ def get_cis(mf, nstates): td.analyze() return td + def get_cis_tdm(td): """Extracts transition density matrices from TDA/CIS calculation. Args: - td: TDA or CIS calculation object containing excitation amplitudes. + td: TDA/CIS calculation object containing excitation amplitudes. Returns: numpy ndarray: Array of transition density matrices for all computed states. """ - return np.sqrt(2.0) * np.array([ xy[0] for xy in td.xy ]) + return np.sqrt(2.0) * np.array([xy[0] for xy in td.xy]) + def get_holepart(mol, x, coeff): - """Computes the hole and particle density matrices (atomic orbital basis) of selected states. + """Computes the hole and particle density matrices (in AO basis) for a selected state. Args: mol (pyscf Mole): pyscf Mole object. - x (numpy ndarray): Response vector (nstates×occ×virt) normalized to 1. + x (numpy ndarray): Response vector (occ×virt) normalized to 1. coeff (numpy ndarray): Ground-state molecular orbital vectors. Returns: Two numpy ndarrays containing the hole density matrices and the particle density matrices respectively. """ - def mo2ao(mat, coeff): return np.dot(coeff, np.dot(mat, coeff.T)) occ = mol.nelectron//2 @@ -50,45 +54,48 @@ def mo2ao(mat, coeff): part_ao = mo2ao(part_mo, coeff[:,occ:]) return hole_ao, part_ao + def get_transition_dm(mol, x_mo, coeff): - """ Compute the Transition Density Matrix. + """Computes the transition density matrix for a selected state. Args: mol (pyscf Mole): pyscf Mole object. - x_mo (numpy ndarray): Response vector (nstates×occ×virt) normalized to 1. + x_mo (numpy ndarray): Response vector (occ×virt) normalized to 1. coeff (numpy ndarray): Ground-state molecular orbital vectors. Returns: - A numpy ndarray containing the Transition Density Matrix. + numpy ndarray: transition density matrix. """ - occ = mol.nelectron//2 x_ao = coeff[:,:occ] @ x_mo @ coeff[:,occ:].T return x_ao def exciton_properties_c(mol, hole, part): - """ Computes the decomposed/predicted hole-particle distance, the hole size and the particle size. + """Computes the decomposed/predicted hole-particle distance, the hole size, and the particle size. Args: mol (pyscf Mole): pyscf Mole object. - hole (numpy ndarray): Hole density matrix. - part (numpy ndarray): Particle density matrix. + hole (numpy ndarray): Hole AO density decomposition coefficiants. + part (numpy ndarray): Particle density decomposition coefficiants. Returns: - Three floats: the hole-particle distance, the hole size, and the particle size respectively. + Tuple of floats: + - hole-particle distance, + - hole size, + - particle size. """ - _hole_N, hole_r, hole_r2 = moments.r2_c(mol, hole) _part_N, part_r, part_r2 = moments.r2_c(mol, part) dist = np.linalg.norm(hole_r-part_r) hole_extent = np.sqrt(hole_r2-hole_r@hole_r) part_extent = np.sqrt(part_r2-part_r@part_r) - return(dist, hole_extent, part_extent) + return dist, hole_extent, part_extent + def exciton_properties_dm(mol, hole, part): - """Computes the ab initio hole-particle distance, the hole size and the particle size. + """Computes the ab initio hole-particle distance, the hole size, and the particle size. Args: mol (pyscf Mole): pyscf Mole object. @@ -96,9 +103,11 @@ def exciton_properties_dm(mol, hole, part): part (numpy ndarray): Particle density matrix. Returns: - Three floats: the hole-particle distance, the hole size, and the particle size respectively. + Tuple of floats: + - hole-particle distance, + - hole size, + - particle size. """ - with mol.with_common_orig((0,0,0)): ao_r = mol.intor_symmetric('int1e_r', comp=3) ao_r2 = mol.intor_symmetric('int1e_r2') @@ -113,18 +122,26 @@ def exciton_properties_dm(mol, hole, part): part_extent = np.sqrt(part_r2-part_r@part_r) return(dist, hole_extent, part_extent) + def exciton_properties(mol, hole, part): - """Computes the ab initio or decomposed/predicted hole-particle distance, the hole size and the particle size according to the number of dimensions of the density matrices. + """Computes the ab initio or decomposed/predicted hole-particle distance, the hole size, and the particle size. + + Distance is defined as |_hole - _part|, and size as sqrt( - ^2). Args: mol (pyscf Mole): pyscf Mole object. - hole (numpy ndarray): Hole density matrix. - part (numpy ndarray): Particle density matrix. + hole (numpy ndarray): Hole density matrix in AO basis (2D) or decomposition coefficients (1D). + part (numpy ndarray): Particle density matrix in AO basis (2D) or decomposition coefficients (1D). Returns: - The hole-particle distance, the hole size, and the particle size as floats. - """ + Tuple of floats: + - hole-particle distance, + - hole size, + - particle size. + Raises: + RuntimeError: If the dimensions of hole and part do not match or are not 1D or 2D. + """ if hole.ndim==1 and part.ndim==1: return exciton_properties_c(mol, hole, part) elif hole.ndim==2 and part.ndim==2: diff --git a/qstack/fields/hf_otpd.py b/qstack/fields/hf_otpd.py index 88b13ffd..e8a7e292 100644 --- a/qstack/fields/hf_otpd.py +++ b/qstack/fields/hf_otpd.py @@ -1,3 +1,5 @@ +"""Hartree-Fock on-top pair density.""" + from pyscf import dft import numpy as np from .dm import make_grid_for_rho @@ -18,9 +20,8 @@ def hf_otpd(mol, dm, grid_level=3, save_otpd=False, return_all=False): Returns: numpy ndarray or tuple: If return_all is False, returns 1D array of OTPD values. - If return_all is True, returns tuple of (otpd, grid) where grid is the pyscf Grid object. + If return_all is True, returns tuple of (otpd, grid) where grid is the pyscf Grid object. """ - grid = make_grid_for_rho(mol, grid_level) ao = dft.numint.eval_ao(mol, grid.coords) @@ -50,6 +51,5 @@ def save_OTPD(mol, otpd, grid): Returns: None: Creates a file named _otpd_data.npz on disk. """ - output = ''.join(mol.elements)+"_otpd_data" np.savez(output, atom=mol.atom, rho=otpd, coords=grid.coords, weights=grid.weights) diff --git a/qstack/fields/hirshfeld.py b/qstack/fields/hirshfeld.py index 23c28081..ffaa64ec 100644 --- a/qstack/fields/hirshfeld.py +++ b/qstack/fields/hirshfeld.py @@ -1,3 +1,5 @@ +"""Hirshfeld partitioning and atomic charge analysis.""" + import numpy as np import pyscf from .dm import make_grid_for_rho @@ -16,7 +18,6 @@ def spherical_atoms(elements, atm_bas): Returns: dict: Dictionary mapping element symbols (str) to atomic density matrices (numpy 2D ndarrays). """ - dm_atoms = {} for q in elements: mol_atm = pyscf.gto.M(atom=[[q, [0,0,0]]], spin=pyscf.data.elements.ELEMENTS_PROTON[q]%2, basis=atm_bas) @@ -32,7 +33,7 @@ def _hirshfeld_weights(mol, grid_coord, atm_dm, atm_bas, dominant): each grid point exclusively to the atom with the highest weight. Args: - mol (pyscf Mole): molecular pyscf Mole object. + mol (pyscf Mole): pyscf Mole object. grid_coord (numpy ndarray): 2D array (ngrids, 3) of grid point coordinates in Bohr. atm_dm (dict): Dictionary mapping element symbols to atomic density matrices from `spherical_atoms`. atm_bas (str or dict): Basis set name or dictionary used for atomic density matrices. @@ -41,7 +42,6 @@ def _hirshfeld_weights(mol, grid_coord, atm_dm, atm_bas, dominant): Returns: numpy ndarray: 2D array (natm, ngrids) of partitioning weights for each atom at each grid point. """ - # promolecular density grid_n = len(grid_coord) rho_atm = np.zeros((mol.natm, grid_n), dtype=float) @@ -76,11 +76,11 @@ def hirshfeld_charges(mol, cd, dm_atoms=None, atm_bas=None, or density matrices, and supports both standard and dominant partitioning. Args: - mol (pyscf Mole): pyscf Mole object for the molecule. + mol (pyscf Mole): pyscf Mole object. cd (numpy ndarray or list): Density representation as: - - 1D array: density-fitting coefficients - - 2D array: density matrix in AO basis - - list: multiple densities (returns list of results) + - 1D array: Density-fitting coefficients + - 2D array: Density matrix in AO basis + - list: Multiple densities (returns list of results). dm_atoms (dict, optional): Pre-computed atomic density matrices from `spherical_atoms`. If None, computed automatically. Defaults to None. atm_bas (str or dict, optional): Basis set for atomic density matrices. @@ -93,10 +93,9 @@ def hirshfeld_charges(mol, cd, dm_atoms=None, atm_bas=None, Returns: numpy ndarray or list: Atomic charges or occupations. - - Single 1D array if cd is a single density - - List of 1D arrays if cd is a list of densities + - Single 1D array if cd is a single density. + - List of 1D arrays if cd is a list of densities. """ - def atom_contributions(cd, ao, tot_weights): if cd.ndim==1: tmp = np.einsum('i,xi->x', cd, ao) diff --git a/qstack/fields/moments.py b/qstack/fields/moments.py index 77bb739c..a5e6bfc3 100644 --- a/qstack/fields/moments.py +++ b/qstack/fields/moments.py @@ -1,3 +1,5 @@ +"""Multipole moment computation functions.""" + import numpy as np from qstack.compound import basis_flatten from qstack.mathutils.array import safe_divide, scatter @@ -61,30 +63,27 @@ def r2_c(mol, rho, moments=(0,1,2), per_atom=False): Returns: tuple: If rho!=None, values representing the requested moments, possibly containing: - - float: Zeroth moment (integrated density). - - numpy ndarray: First moment (3-component dipole vector). - - float: Second moment (mean square radius). - If rho is None, arrays representing the requested moments in AO basis so that + - float: Zeroth moment (integrated density). + - numpy ndarray: First moment (3-component dipole vector). + - float: Second moment (mean square radius). + If rho is None, arrays representing the requested moments in AO basis so that they can be contracted with the coefficients usin (returned array)@(rho). - if rho is None and per_atom is True: - 0st moment: (mol.nao, mol.natm) - 1st moment: (3, mol.nao, mol.natm) - 2nd moment: (mol.nao, mol.natm) - - if rho is None and per_atom is False: - 0st moment: (mol.nao,) - 1st moment: (3, mol.nao) - 2nd moment: (mol.nao,) - - if rho is not None and per_atom is True: - 0st moment: (mol.natm,) - 1st moment: (3, mol.natm) - 2nd moment: (mol.natm,) + if rho is None and per_atom is True: + 0st moment: (mol.nao, mol.natm) + 1st moment: (3, mol.nao, mol.natm) + 2nd moment: (mol.nao, mol.natm) + if rho is None and per_atom is False: + 0st moment: (mol.nao,) + 1st moment: (3, mol.nao) + 2nd moment: (mol.nao,) + if rho is not None and per_atom is True: + 0st moment: (mol.natm,) + 1st moment: (3, mol.natm) + 2nd moment: (mol.natm,) """ - if max(moments)>2: raise RuntimeError('Only moments 0, 1, and 2 are supported.') ret = {} diff --git a/qstack/mathutils/__init__.py b/qstack/mathutils/__init__.py index dcd5d900..b38ee9ff 100644 --- a/qstack/mathutils/__init__.py +++ b/qstack/mathutils/__init__.py @@ -1,3 +1,3 @@ +"""Mathematical utilities module.""" + from . import fps, matrix -# from . import xyz_integrals_float # hidden by default to make the next part more discoverable -# from . import wigner, xyz_integrals_sym # needs sympy diff --git a/qstack/mathutils/array.py b/qstack/mathutils/array.py index b593e261..4d760255 100644 --- a/qstack/mathutils/array.py +++ b/qstack/mathutils/array.py @@ -1,3 +1,5 @@ +"""Array manipulation utility functions.""" + import numpy as np @@ -34,12 +36,13 @@ def safe_divide(a, b): Returns: numpy.ndarray: Result of element-wise division of a by b, with zeros where b is zero. """ - return np.divide(a, b, out=np.zeros_like(b), where=b!=0) -def vstack_padding(xs): - """Vertically stack arrays with different shapes by padding smaller arrays with zeros. +def stack_padding(xs): + """Stack arrays with different shapes along a new axis by padding smaller arrays with zeros. + + Analogous to numpy.stack(axis=0). Args: xs (list): List of numpy arrays to be stacked. @@ -50,11 +53,45 @@ def vstack_padding(xs): Raises: ValueError: If input arrays have different number of dimensions. """ + xs = [np.asarray(x) for x in xs] if len({x.ndim for x in xs}) > 1: raise ValueError("All input arrays must have the same number of dimensions.") - max_size = max(x.shape for x in xs) + shapes = [x.shape for x in xs] + max_size = max(shapes) + if max_size == min(shapes): + return np.stack(xs, axis=0) X = np.zeros((len(xs), *max_size)) for i, x in enumerate(xs): slices = tuple(np.s_[0:s] for s in x.shape) X[i][slices] = x return X + + +def vstack_padding(xs): + """Vertically stack arrays with different shapes by padding smaller arrays with zeros. + + 1D input arrays of shape (N,) are reshaped to (1,N). + Analogous to numpy.vstack. + + Args: + xs (list): List of numpy arrays to be stacked. + + Returns: + numpy.ndarray : A stacked array with shape (sum(x.shape[0], *max_shape[1:]). + + Raises: + ValueError: If input arrays have different number of dimensions. + """ + xs = [np.atleast_2d(np.asarray(x)) for x in xs] + if len({x.ndim for x in xs}) > 1: + raise ValueError("All input arrays must have the same number of dimensions.") + shapes_common_axis, shapes_other_axes = np.split(np.array([x.shape for x in xs]), (1,), axis=1) + if len(np.unique(shapes_other_axes, axis=0))>1: + return np.vstack(xs) + X = np.zeros((shapes_common_axis.sum(), *shapes_other_axes.max(axis=0))) + idx = 0 + for x in xs: + slices = (np.s_[idx:idx+x.shape[0]], *(np.s_[0:s] for s in x.shape[1:])) + X[slices] = x + idx += x.shape[0] + return X diff --git a/qstack/mathutils/fps.py b/qstack/mathutils/fps.py index 9655f5fa..7c9a16b0 100644 --- a/qstack/mathutils/fps.py +++ b/qstack/mathutils/fps.py @@ -1,3 +1,5 @@ +"""Farthest Point Sampling algorithm implementation.""" + import numpy as np @@ -25,8 +27,8 @@ def do_fps(x, d=0): Returns: tuple: A tuple containing: - - iy (numpy.ndarray): Indices of sampled points. - - measure (numpy.ndarray): Distances to nearest selected point for each iteration. + - iy (numpy.ndarray): Indices of sampled points. + - measure (numpy.ndarray): Distances to nearest selected point for each iteration. """ n = len(x) if d==0: diff --git a/qstack/mathutils/matrix.py b/qstack/mathutils/matrix.py index b3b615d4..5bf60f0d 100644 --- a/qstack/mathutils/matrix.py +++ b/qstack/mathutils/matrix.py @@ -1,3 +1,5 @@ +"""Matrix operation utility functions.""" + import numpy as np diff --git a/qstack/mathutils/rotation_matrix.py b/qstack/mathutils/rotation_matrix.py index 54b504b0..d76c67f8 100644 --- a/qstack/mathutils/rotation_matrix.py +++ b/qstack/mathutils/rotation_matrix.py @@ -1,3 +1,5 @@ +"""Rotation matrix generation functions.""" + import numpy as np @@ -64,7 +66,6 @@ def rotate_euler(a, b, g, rad=False): Returns: numpy.ndarray: 3x3 rotation matrix. """ - if not rad: a, b, g = np.radians([a, b, g]) diff --git a/qstack/mathutils/wigner.py b/qstack/mathutils/wigner.py index 171dbeab..d3a4cc16 100755 --- a/qstack/mathutils/wigner.py +++ b/qstack/mathutils/wigner.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +"""Wigner D-matrices and spherical harmonic transformations.""" + import sys import sympy as sp from .xyz_integrals_sym import xyz as xyzint @@ -97,8 +99,8 @@ def product_Y(Y1,Y2): Returns: tuple: A tuple containing: - - coefficients (sympy.Matrix): Coefficients of the product. - - monomials (list): List of monomial powers. + - coefficients (sympy.Matrix): Coefficients of the product. + - monomials (list): List of monomial powers. """ prod = Y1 * Y2 prod = prod.expand().cancel() diff --git a/qstack/mathutils/xyz_integrals_float.py b/qstack/mathutils/xyz_integrals_float.py index a57a2e22..4d0543b7 100755 --- a/qstack/mathutils/xyz_integrals_float.py +++ b/qstack/mathutils/xyz_integrals_float.py @@ -1,7 +1,10 @@ #!/usr/bin/env python3 +"""Cartesian integrals for spherical harmonics (float version).""" + import sys + def xyz(n, m, k): """Computes the integral of x^2k y^2n z^2m over a unit sphere. @@ -16,7 +19,6 @@ def xyz(n, m, k): Returns: float: The value of the integral. """ - k,n,m = sorted([k,n,m], reverse=True) if n==0: # both n and m are 0 xyz = 2.0 * (1.0 - (2.0*k-1.0)/(2.0*k+1.0)) @@ -36,7 +38,6 @@ def I23(n,m,k): Returns: float: The value of the integral. """ - I23 = 0.0 for l in range(n+m+2): I23 = I23 + (-1)**l * trinomial( n+m+1, n+m+1-l, l) / (2.0*l+2.0*k-1.0) @@ -57,7 +58,6 @@ def trinomial(k1, k2, k3): Returns: float: The value of the trinomial coefficient. """ - k1,k2,k3 = sorted([k1,k2,k3], reverse=True) trinom = 1.0 for k in range(1,k2+1): diff --git a/qstack/mathutils/xyz_integrals_sym.py b/qstack/mathutils/xyz_integrals_sym.py index 527c17b1..8c37ad0d 100755 --- a/qstack/mathutils/xyz_integrals_sym.py +++ b/qstack/mathutils/xyz_integrals_sym.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 +"""Cartesian integrals for spherical harmonics (symbolic version).""" + import sys try: import sympy except ImportError: print(""" - ERROR: cannot import sympy. Have you installed qstack with the \"wigner\" option?\n\n (for instance, with `pip install qstack[wigner]` or `pip install qstack[all]`) @@ -27,7 +28,6 @@ def xyz(n, m, k): Returns: sympy.Expr: The value of the integral. """ - k,n,m = sorted([k,n,m], reverse=True) if n==0: # both n and m are 0 K = sympy.symbols('K') @@ -48,7 +48,6 @@ def I23(n,m,k): Returns: sympy.Expr: The value of the integral. """ - I23 = 0.0 K = sympy.symbols('K') for l in range(n+m+2): @@ -71,7 +70,6 @@ def trinomial(k1, k2, k3): Returns: sympy.Expr: The value of the trinomial coefficient. """ - k1,k2,k3 = sorted([k1,k2,k3]) trinom = sympy.FallingFactorial(k1+k2+k3, k3) / (sympy.factorial(k1)*sympy.factorial(k2)) return trinom diff --git a/qstack/orcaio.py b/qstack/orcaio.py index 2ea2de9b..21709c5d 100644 --- a/qstack/orcaio.py +++ b/qstack/orcaio.py @@ -1,3 +1,7 @@ +"""ORCA quantum chemistry package I/O utilities. + +Read and parse ORCA output files, including orbitals and densities binary files.""" + import warnings import struct import numpy as np @@ -20,7 +24,6 @@ def read_input(fname, basis, ecp=None): Returns: pyscf.gto.Mole: pyscf Mole object. """ - with open(fname) as f: lines = [x.strip() for x in f] @@ -65,7 +68,6 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re Raises: RuntimeError: If density matrix reordering is compromised for def2 basis with 3d elements. """ - path = directory+'/'+basename if version < 500: if version==400: @@ -115,15 +117,16 @@ def _parse_gbw(fname): Returns: tuple: A tuple containing: - - coefficients_ab (numpy.ndarray): 3D array of shape (s,nao,nao) with MO coefficients. - - energies_ab (numpy.ndarray): 2D array of shape (s,nao) with MO energies. - - occupations_ab (numpy.ndarray): 2D array of shape (s,nao) with MO occupation numbers. - - ls (dict): Dictionary mapping atom index to list of basis function angular momenta. + - coefficients_ab (numpy.ndarray): 3D array of shape (s,nao,nao) with MO coefficients. + - energies_ab (numpy.ndarray): 2D array of shape (s,nao) with MO energies. + - occupations_ab (numpy.ndarray): 2D array of shape (s,nao) with MO occupation numbers. + - ls (dict): Dictionary mapping atom index to list of basis function angular momenta. + s=1 for closed-shell and 2 for open-shell computation, + nao is the number of atomic/molecular orbitals. Raises: RuntimeError: If number of MO sets is not 1 or 2. """ - def read_array(f, n, dtype): return np.frombuffer(f.read(dtype().itemsize * n), dtype=dtype) @@ -252,10 +255,10 @@ def read_gbw(mol, fname, reorder_dest='pyscf', sort_l=True): Returns: tuple: A tuple containing: - - c (numpy.ndarray): 3D array of shape (s,nao,nao) with MO coefficients. - - e (numpy.ndarray): 2D array of shape (s,nao) with MO energies. - - occ (numpy.ndarray): 2D array of shape (s,nao) with MO occupation numbers. - Where s is 1 for closed-shell and 2 for open-shell computation, + - c (numpy.ndarray): 3D array of shape (s,nao,nao) with MO coefficients. + - e (numpy.ndarray): 2D array of shape (s,nao) with MO energies. + - occ (numpy.ndarray): 2D array of shape (s,nao) with MO occupation numbers. + Where s is 1 for closed-shell and 2 for open-shell computation, and nao is the number of atomic/molecular orbitals. Raises: diff --git a/qstack/qml/__init__.py b/qstack/qml/__init__.py index 6242d154..27ad73c6 100644 --- a/qstack/qml/__init__.py +++ b/qstack/qml/__init__.py @@ -1,3 +1,5 @@ +"""Quantum Machine Learning representations module.""" + from . import b2r2 from . import slatm diff --git a/qstack/qml/b2r2.py b/qstack/qml/b2r2.py index 0fa954f0..aa25e017 100644 --- a/qstack/qml/b2r2.py +++ b/qstack/qml/b2r2.py @@ -1,7 +1,14 @@ +"""Bond-based reaction representation (B2R2) for chemical reactions. + +Provides: + - defaults: default parameters for B2R2 computation. +""" + import itertools from types import SimpleNamespace import numpy as np from scipy.special import erf +from tqdm import tqdm defaults = SimpleNamespace(rcut=3.5, gridspace=0.03) @@ -18,14 +25,15 @@ def get_bags(unique_ncharges): """ combs = list(itertools.combinations(unique_ncharges, r=2)) combs = [list(x) for x in combs] - # add self interaction self_combs = [[x, x] for x in unique_ncharges] combs += self_combs return combs def get_mu_sigma(R): - """Computes Gaussian distribution parameters from interatomic distance. + """Get Gaussian distribution parameters from interatomic distance. + + The constants used here are taken from the original B2R2 implementation. Args: R (float): Interatomic distance. @@ -55,7 +63,7 @@ def get_gaussian(x, R): def get_skew_gaussian_l_both(x, R, Z_I, Z_J): - """Computes skewed Gaussian distributions for local B2R2 representation. + """Computes skewed Gaussian distributions for B2R2_l representation. Args: x (numpy ndarray): Grid points to evaluate the functions. @@ -81,7 +89,7 @@ def get_skew_gaussian_l_both(x, R, Z_I, Z_J): def get_skew_gaussian_n_both(x, R, Z_I, Z_J): - """Computes combined skewed Gaussian distribution for nuclear B2R2 representation. + """Computes combined skewed Gaussian distribution for B2R2_n representation. Args: x (numpy ndarray): Grid points to evaluate the function. @@ -108,7 +116,18 @@ def get_skew_gaussian_n_both(x, R, Z_I, Z_J): def get_b2r2_n_molecular(ncharges, coords, elements, rcut=defaults.rcut, gridspace=defaults.gridspace): + """Computes B2R2_n representation for a single molecule. + + Args: + ncharges (array-like): Atomic numbers for all atoms in the molecule. + coords (array-like): Atomic coordinates in Å, shape (natom, 3). + elements (array-like): Unique atomic numbers present in the dataset. + rcut (float): Cutoff radius for bond detection in Å. Defaults to 3.5. + gridspace (float): Grid spacing for discretization in Å. Defaults to 0.03. + Returns: + numpy.ndarray: B2R2_n representation (ngrid,). + """ idx_relevant_atoms = np.where(np.sum(np.array(ncharges)==np.array(elements)[:,None], axis=0)) ncharges = np.array(ncharges)[idx_relevant_atoms] coords = np.array(coords)[idx_relevant_atoms] @@ -129,7 +148,18 @@ def get_b2r2_n_molecular(ncharges, coords, elements, def get_b2r2_a_molecular(ncharges, coords, elements, rcut=defaults.rcut, gridspace=defaults.gridspace): + """Computes B2R2_a representation for a single molecule. + + Args: + ncharges (array-like): Atomic numbers for all atoms in the molecule. + coords (array-like): Atomic coordinates in Å, shape (natom, 3). + elements (array-like): Unique atomic numbers present in the dataset. + rcut (float): Cutoff radius for bond detection in Å. Defaults to 3.5. + gridspace (float): Grid spacing for discretization in Å. Defaults to 0.03. + Returns: + numpy.ndarray: B2R2_a representation (n_pairs*ngrid,). + """ idx_relevant_atoms = np.where(np.sum(np.array(ncharges)==np.array(elements)[:,None], axis=0)) ncharges = np.array(ncharges)[idx_relevant_atoms] coords = np.array(coords)[idx_relevant_atoms] @@ -155,7 +185,18 @@ def get_b2r2_a_molecular(ncharges, coords, elements, def get_b2r2_l_molecular(ncharges, coords, elements, rcut=defaults.rcut, gridspace=defaults.gridspace): + """Computes B2R2_l representation for a single molecule. + + Args: + ncharges (array-like): Atomic numbers for all atoms in the molecule. + coords (array-like): Atomic coordinates in Å, shape (natom, 3). + elements (array-like): Unique atomic numbers present in the dataset. + rcut (float): Cutoff radius for bond detection in Å. Defaults to 3.5. + gridspace (float): Grid spacing for discretization in Å. Defaults to 0.03. + Returns: + numpy.ndarray: B2R2_l representation (n_elements*ngrid,). + """ idx_relevant_atoms = np.where(np.sum(np.array(ncharges)==np.array(elements)[:,None], axis=0)) ncharges = np.array(ncharges)[idx_relevant_atoms] coords = np.array(coords)[idx_relevant_atoms] @@ -181,6 +222,35 @@ def get_b2r2_l_molecular(ncharges, coords, elements, def get_b2r2(reactions, variant='l', progress=False, rcut=defaults.rcut, gridspace=defaults.gridspace): + """High-level interface for computing bond-based reaction representations (B2R2). + + Reference: + P. van Gerwen, A. Fabrizio, M. D. Wodrich, C. Corminboeuf, + "Physics-based representations for machine learning properties of chemical reactions", + Mach. Learn.: Sci. Technol. 3, 045005 (2022), doi:10.1088/2632-2153/ac8f1a. + + Args: + reactions (List[rxn]): List of reaction objects with attributes: + - rxn.reactants (List[Mol]): List of reactant molecules. + - rxn.products (List[Mol]): List of product molecules. + Mol can be any type with .numbers and .positions (Å) attributes, + for example ASE Atoms objects. + variant (str): B2R2 variant to compute. Options: + - 'l': Local variant with element-resolved skewed Gaussians (default). + - 'a': Agnostic variant with element-pair Gaussians. + - 'n': Nuclear variant with combined skewed Gaussians. + progress (bool): If True, displays progress bar. Defaults to False. + rcut (float): Cutoff radius for bond detection in Å. Defaults to 3.5. + gridspace (float): Grid spacing for discretization in Å. Defaults to 0.03. + + Returns: + numpy.ndarray: B2R2 representations of shape (n_reactions, n_features). + For variants 'l' and 'a', returns difference (products - reactants). + For variant 'n', returns concatenation [reactants, products]. + + Raises: + RuntimeError: If an unknown variant is specified. + """ if variant=='l': get_b2r2_molecular=get_b2r2_l_molecular combine = lambda r,p: p-r @@ -200,37 +270,35 @@ def get_b2r2_inner(reactions, progress=False, rcut=defaults.rcut, gridspace=defaults.gridspace, get_b2r2_molecular=None, combine=None): + """Computes the B2R2 representations for a list of reactions. - """ Computes the B2R2 representations for a list of reactions. - - Reference: - P. van Gerwen, A. Fabrizio, M. D. Wodrich, C. Corminboeuf, - "Physics-based representations for machine learning properties of chemical reactions", - Mach. Learn.: Sci. Technol. 3, 045005 (2022), doi:10.1088/2632-2153/ac8f1a. + Internal implementation function that computes B2R2 representations using + provided molecular representation function and combination strategy. + Automatically determines element set from all reactant molecules. Args: - reactions (List[rxn]): a list of rxn objects containing reaction information. - rxn.reactants (List[ase.Atoms]) is a list of reactants (ASE molecules), - rxn.products (List[ase.Atoms]) is a list of products. - rcut (float): cutoff radius (Å) - gridspace (float): grid spacing (Å) - get_b2r2_molecular (func): function to compute the molecular representations, - i.e. one of `get_b2r2_{l,n,a}_molecular` - combine (func(r: ndarray, p: ndarray)): function to combine the reactants and products representations, - e.g. difference or concatenation + reactions (List[rxn]): List of reaction objects with attributes: + - rxn.reactants (List[Mol]): List of reactant molecules. + - rxn.products (List[Mol]): List of product molecules. + Mol can be any type with .numbers and .positions (Å) attributes, + for example ASE Atoms objects. + progress (bool): If True, displays progress bar. Defaults to False. + rcut (float): Cutoff radius for bond detection in Å. Defaults to 3.5. + gridspace (float): Grid spacing for discretization in Å. Defaults to 0.03. + get_b2r2_molecular (callable): Function to compute molecular representations. + Should be one of get_b2r2_{l,n,a}_molecular. + combine (callable): Function(r: ndarray, p: ndarray) -> ndarray to combine + reactant and product representations (e.g., difference or concatenation). + Returns: - ndrarray containing the B2R2 representation for each reaction + numpy.ndarray: B2R2 representations of shape (n_reactions, n_features), + where each row represents a reaction according to the combine function. """ - qs = [mol.numbers for rxn in reactions for mol in rxn.reactants] elements = np.unique(np.concatenate(qs)) - if progress: - import tqdm - reactions = tqdm.tqdm(reactions) - b2r2_diff = [] - for reaction in reactions: + for reaction in tqdm(reactions, disable=not progress): b2r2_reactants, b2r2_products = [ sum(get_b2r2_molecular(mol.numbers, mol.positions, rcut=rcut, diff --git a/qstack/qml/slatm.py b/qstack/qml/slatm.py index 9bef6b2e..d680a872 100644 --- a/qstack/qml/slatm.py +++ b/qstack/qml/slatm.py @@ -1,6 +1,14 @@ +"""Spectrum of London and Axilrod-Teller-Muto potential (SLATM) representation. + +Provides: + - defaults: Default parameters for SLATM representation. +""" + import itertools from types import SimpleNamespace import numpy as np +from tqdm import tqdm +from qstack.mathutils.array import stack_padding, vstack_padding defaults = SimpleNamespace(sigma2=0.05, r0=0.1, rcut=4.8, dgrid2=0.03, theta0=20.0*np.pi/180.0, sigma3=0.05, dgrid3=0.03) @@ -15,11 +23,10 @@ def get_mbtypes(qs, qml=False): Returns: dict: Dictionary with keys 1, 2, 3 containing: - - 1: Array of unique elements - - 2: List of element pairs (including self-pairs) - - 3: List of valid element triples + - 1: Array of unique elements + - 2: List of element pairs (including self-pairs) + - 3: List of valid element triples """ - # all the elements elements = itertools.chain.from_iterable(list(i) for i in qs) if qml: @@ -44,26 +51,27 @@ def get_mbtypes(qs, qml=False): return {1: elements, 2: pairs, 3: triples} -def pad_zeros(slatm): - """Pads SLATM representations with zeros to have uniform size. +def get_two_body(i, mbtype, q, dist, + r0=defaults.r0, rcut=defaults.rcut, + sigma=defaults.sigma2, dgrid=defaults.dgrid2): + """Computes two-body London dispersion contribution for atom i. + + Evaluates the two-body term from pairwise 1/r^6 London dispersion interactions, + projected onto a radial grid with Gaussian broadening of interatomic distances. Args: - slatm (list): List of SLATM representation arrays with potentially different sizes. + i (int): Index of the central atom. + mbtype (tuple): Element pair (q1, q2) defining the two-body interaction type. + q (numpy.ndarray): Array of atomic numbers for all atoms in molecule. + dist (numpy.ndarray): Pairwise distance matrix (natom,natom) in Å. + r0 (float): Minimum radial distance for grid. Defaults to 0.1 Å. + rcut (float): Radial cutoff distance. Defaults to 4.8 Å. + sigma (float): Gaussian width for distance broadening. Defaults to 0.05 Å. + dgrid (float): Grid spacing for radial discretization. Defaults to 0.03 Å. Returns: - list: List of zero-padded SLATM arrays with uniform size. + numpy.ndarray: Two-body contribution on radial grid (ngrid,). """ - n_features = np.array([x.shape[-1] for x in slatm]) - pad_sizes = max(n_features)-n_features - for i in range(len(slatm)): - if pad_sizes[i]: - slatm[i] = np.pad(slatm[i], (0, pad_sizes[i])) - return slatm - - -def get_two_body(i, mbtype, q, dist, - r0=defaults.r0, rcut=defaults.rcut, - sigma=defaults.sigma2, dgrid=defaults.dgrid2): ngrid = int((rcut - r0)/dgrid) + 1 rgrid = np.linspace(r0, rcut, ngrid) @@ -93,12 +101,40 @@ def get_two_body(i, mbtype, q, dist, return 0.5 * dgrid * london * deltas - def get_three_body(j, mbtype, q, r, dist, rcut=defaults.rcut, theta0=defaults.theta0, sigma=defaults.sigma3, dgrid=defaults.dgrid3): + """Computes three-body Axilrod-Teller-Muto contribution for atom j. + + Evaluates the three-body ATM term from triple-dipole interactions, + projected onto an angular grid with Gaussian broadening of bond angles. + + Args: + j (int): Index of the central atom in the triplet. + mbtype (tuple): Element triple (q1, q2, q3) defining the three-body interaction type. + q (numpy.ndarray): Array of atomic numbers for all atoms. + r (numpy.ndarray): Atomic position array (natom,3) in Å. + dist (numpy.ndarray): Pairwise distance matrix (natom,natom) in Å. + rcut (float): Distance cutoff for triplet formation. Defaults to 4.8 Å. + theta0 (float): Margin for angular grid in radians. Defaults to 20°. + sigma (float): Gaussian width for angle broadening in radians. Defaults to 0.05. + dgrid (float): Grid spacing for angular discretization in radians. Defaults to 0.03. + + Returns: + numpy.ndarray: Three-body contribution on angular grid (ngrid,). + """ def get_cos(a, b, c): + """Computes cosine of angle abc from atomic positions. + + Args: + a (int): Index of first atom. + b (int): Index of vertex atom. + c (int): Index of third atom. + + Returns: + float: Cosine of angle abc. + """ v1 = r[a] - r[b] v2 = r[c] - r[b] return v1 @ v2 / (dist[a,b] * dist[b,c]) @@ -137,19 +173,53 @@ def get_cos(a, b, c): return spectrum * dgrid * q1 * q2 * q3 / 3.0 - def get_slatm(q, r, mbtypes, qml_compatible=True, stack_all=True, global_repr=False, r0=defaults.r0, rcut=defaults.rcut, sigma2=defaults.sigma2, dgrid2=defaults.dgrid2, theta0=defaults.theta0, sigma3=defaults.sigma3, dgrid3=defaults.dgrid3): + """Computes SLATM representation for a single molecule. + + Constructs the SLATM (Spectrum of London and Axilrod-Teller-Muto potential) + representation by combining one-body (nuclear charges), two-body (London dispersion), + and three-body (Axilrod-Teller-Muto) contributions. + Reference: + B. Huang, O. A. von Lilienfeld, + "Quantum machine learning using atom-in-molecule-based fragments selected on the fly", + Nat. Chem. 12, 945–951 (2020), doi:10.1038/s41557-020-0527-z. + + Args: + q (numpy.ndarray): Array of atomic numbers (natom,). + r (numpy.ndarray): Atomic position array (natom,3) in Å. + mbtypes (dict): Many-body types from get_mbtypes with keys 1, 2, 3. + qml_compatible (bool): If True, maintains QML package compatibility. + If False, uses condensed representation (less 0s). Defaults to True. + Is set to True if global_repr is True. + stack_all (bool): If True, stacks all representations into one array. + Defaults to True. + global_repr (bool): If True, returns molecular SLATM (sum over atoms). + If False, returns atomic SLATM. Defaults to False. + r0 (float): Minimum radial distance for 2-body grid. Defaults to 0.1 Å. + rcut (float): Radial cutoff for 2-body and 3-body terms. Defaults to 4.8 Å. + sigma2 (float): Gaussian width for 2-body term. Defaults to 0.05 Å. + dgrid2 (float): Grid spacing for 2-body term. Defaults to 0.03 Å. + theta0 (float): Minimum angle for 3-body grid in radians. Defaults to 20°. + sigma3 (float): Gaussian width for 3-body term in radians. Defaults to 0.05. + dgrid3 (float): Grid spacing for 3-body term in radians. Defaults to 0.03. + + Returns: + numpy.ndarray or dict: SLATM representation. + - If stack_all=True and global_repr=False, numpy ndarray of shape (natom,n_features). + - If global_repr=True, numpy ndarray of shape (n_features,). + - If stack_all=False, returns dict with keys 1, 2, 3 containing lists of numpy ndarrays. + """ # for global representation, qml_compatible should be True qml_compatible = qml_compatible or global_repr natoms = len(q) dist = np.zeros((natoms, natoms)) - for (i,j) in itertools.product(range(natoms), range(natoms)): - dist[i,j] = np.linalg.norm(r[i]-r[j]) + for (i, j) in itertools.combinations_with_replacement(range(natoms), 2): + dist[i,j] = dist[j,i] = np.linalg.norm(r[i]-r[j]) slatm = [] for i, qi in enumerate(q): @@ -187,9 +257,7 @@ def get_slatm(q, r, mbtypes, qml_compatible=True, stack_all=True, slatm.append({1: slatm1b, 2: slatm2b, 3: slatm3b}) if stack_all or global_repr: - if not qml_compatible: - slatm = pad_zeros(slatm) - slatm = np.vstack(slatm) + slatm = stack_padding(slatm) if global_repr: slatm = np.sum(slatm, axis=0) @@ -197,43 +265,44 @@ def get_slatm(q, r, mbtypes, qml_compatible=True, stack_all=True, return slatm - def get_slatm_for_dataset(molecules, progress=False, global_repr=False, qml_mbtypes=True, qml_compatible=True, stack_all=True, r0=defaults.r0, rcut=defaults.rcut, sigma2=defaults.sigma2, dgrid2=defaults.dgrid2, theta0=defaults.theta0, sigma3=defaults.sigma3, dgrid3=defaults.dgrid3): - """ Computes the (a)SLATM representation for a set of molecules. + """Computes the (a)SLATM representation for a set of molecules. - Reference: - B. Huang, O. A. von Lilienfeld, - "Quantum machine learning using atom-in-molecule-based fragments selected on the fly", - Nat. Chem. 12, 945–951 (2020), doi:10.1038/s41557-020-0527-z. + Generates SLATM descriptors for molecular datasets, automatically determining + many-body types from all molecules. Args: - molecules (Union(List[ase.Atoms], List[str]): pre-loaded ASE molecules or paths to the xyz files. - Alternatively, a list of any objects providing fields .numbers and .positions (Å) - global_repr (bool): return molecular SLATM if True, return atomic SLATM (aSLATM) if False - qml_mbtypes (bool): if True, mbtypes order should be identical as from QML (https://www.qmlcode.org/). - if False, the elements are sorted thus mbtype order can differ from QML in some cases - qml_compatible (bool): if False, the local representation (global_repr=False) is condensed - stack_all (bool): if stack the representations into one big ndarray - - rcut (float): radial cutoff (Å) for the 2- and 3-body terms - r0 (float): grid range parameter (Å) [r0, rcut] for the 2-body term - sigma2 (float): gaussian width for the 2-body term (Å) - dgrid2 (float): grid spacing for the 2-body term (Å) - theta0 (float): grid range parameter (°) [theta0, 180-theta0] for the 3-body term - sigma3 (float): gaussian width for the 3-body term (°) - dgrid3 (float): grid spacing for the 3-body term (°) - - progress (bool): if print progress bar + molecules (Union[List[Mol], List[str]]): Pre-loaded molecules or paths + to XYZ files. Mol can be any type with .numbers and .positions (Å) attributes, + for example ASE Atoms objects. + progress (bool): If True, displays progress bar. Defaults to False. + global_repr (bool): If True, returns molecular SLATM (sum over atoms). + If False, returns atomic SLATM (aSLATM). Defaults to False. + qml_mbtypes (bool): If True, uses element ordering compatible with QML package + (https://www.qmlcode.org/). If False, uses sorted ordering. Defaults to True. + qml_compatible (bool): If False, uses condensed representation for local + (global_repr=False) mode. Defaults to True. + stack_all (bool): If True, stacks representations into one array. Defaults to True. + r0 (float): Minimum radial distance for 2-body grid in Å. Defaults to 0.1. + rcut (float): Radial cutoff for 2-body and 3-body terms in Å. Defaults to 4.8. + sigma2 (float): Gaussian width for 2-body term in Å. Defaults to 0.05. + dgrid2 (float): Grid spacing for 2-body term in Å. Defaults to 0.03. + theta0 (float): Minimum angle for 3-body grid in radians. Defaults to 20° (π/9). + sigma3 (float): Gaussian width for 3-body term in radians. Defaults to 0.05. + dgrid3 (float): Grid spacing for 3-body term in radians. Defaults to 0.03. Returns: - ndrarray or List[List[ndarray]] containing the SLATM representation for each molecule. + numpy.ndarray or List[List[numpy.ndarray]]: SLATM representations for all molecules. + - If stack_all=True and global_repr=True, np.ndarray of shape (n_molecules, n_features), + - If stack_all=True and global_repr=False, np.ndarray of shape (n_atoms_total, n_features), + - If stack_all=False and global_repr=True, list of np.ndarrays of shape (n_features,) per molecule, + - If stack_all=False and global_repr=False, list of lists of dicts per molecule with keys (1,2,3). """ - if isinstance(molecules[0], str): import ase.io molecules = [ase.io.read(xyz) for xyz in molecules] @@ -241,30 +310,27 @@ def get_slatm_for_dataset(molecules, qs = [mol.numbers for mol in molecules] mbtypes = get_mbtypes(qs, qml=qml_mbtypes) - if progress: - import tqdm - molecules = tqdm.tqdm(molecules) - slatm = [get_slatm(mol.numbers, mol.positions, mbtypes, global_repr=global_repr, qml_compatible=qml_compatible, stack_all=stack_all, r0=r0, rcut=rcut, sigma2=sigma2, dgrid2=dgrid2, theta0=theta0, sigma3=sigma3, dgrid3=dgrid3) - for mol in molecules] + for mol in tqdm(molecules, disable=not progress)] if stack_all: - if not qml_compatible: - slatm = pad_zeros(slatm) - slatm = np.vstack(slatm) + slatm = vstack_padding(slatm) return slatm - def get_slatm_rxn(reactions, progress=False, qml_mbtypes=True, r0=defaults.r0, rcut=defaults.rcut, sigma2=defaults.sigma2, dgrid2=defaults.dgrid2, theta0=defaults.theta0, sigma3=defaults.sigma3, dgrid3=defaults.dgrid3): - """ Computes the SLATM_d representation for a list of reactions. + """Computes the SLATM_d representation for chemical reactions. + + Calculates reaction representations as the difference between product and reactant + SLATM descriptors (ΔR = R_products - R_reactants), suitable for predicting + reaction properties like barriers and energies. Reference: P. van Gerwen, A. Fabrizio, M. D. Wodrich, C. Corminboeuf, @@ -272,35 +338,31 @@ def get_slatm_rxn(reactions, progress=False, qml_mbtypes=True, Mach. Learn.: Sci. Technol. 3, 045005 (2022), doi:10.1088/2632-2153/ac8f1a. Args: - reactions (List[rxn]): a list of rxn objects containing reaction information. - rxn.reactants (List[ase.Atoms]) is a list of reactants (ASE molecules), - rxn.products (List[ase.Atoms]) is a list of products. - qml_mbtypes (bool): if True, mbtypes order should be identical as from QML (https://www.qmlcode.org/). - if False, the elements are sorted thus mbtype order can differ from QML in some cases - - rcut (float): radial cutoff (Å) for the 2- and 3-body terms - r0 (float): grid range parameter (Å) [r0, rcut] for the 2-body term - sigma2 (float): gaussian width for the 2-body term (Å) - dgrid2 (float): grid spacing for the 2-body term (Å) - theta0 (float): grid range parameter (°) [theta0, 180-theta0] for the 3-body term - sigma3 (float): gaussian width for the 3-body term (°) - dgrid3 (float): grid spacing for the 3-body term (°) - - progress (bool): if print progress bar + reactions (List[rxn]): List of reaction objects with attributes: + - rxn.reactants (List[Mol]): List of reactant molecules. + - rxn.products (List[Mol]): List of product molecules. + Mol can be any type with .numbers and .positions (Å) attributes, + for example ASE Atoms objects. + progress (bool): If True, displays progress bar. Defaults to False. + qml_mbtypes (bool): If True, uses element ordering compatible with QML package + (https://www.qmlcode.org/). If False, uses sorted ordering. Defaults to True. + r0 (float): Minimum radial distance for 2-body grid in Å. Defaults to 0.1. + rcut (float): Radial cutoff for 2-body and 3-body terms in Å. Defaults to 4.8. + sigma2 (float): Gaussian width for 2-body term in Å. Defaults to 0.05. + dgrid2 (float): Grid spacing for 2-body term in Å. Defaults to 0.03. + theta0 (float): Minimum angle for 3-body grid in radians. Defaults to 20° (π/9). + sigma3 (float): Gaussian width for 3-body term in radians. Defaults to 0.05. + dgrid3 (float): Grid spacing for 3-body term in radians. Defaults to 0.03. Returns: - ndrarray containing the SLATM_d representation for each reaction + numpy.ndarray: SLATM_d difference representations of shape (n_reactions, n_features), + where each row is the difference between product and reactant SLATM vectors. """ - qs = [mol.numbers for rxn in reactions for mol in rxn.reactants] mbtypes = get_mbtypes(qs, qml=qml_mbtypes) - if progress: - import tqdm - reactions = tqdm.tqdm(reactions) - slatm_diff = [] - for reaction in reactions: + for reaction in tqdm(reactions, disable=not progress): slatm_reactants, slatm_products = [ sum(get_slatm(mol.numbers, mol.positions, mbtypes, global_repr=True, stack_all=True, diff --git a/qstack/regression/__init__.py b/qstack/regression/__init__.py index 5efafc29..e717b304 100644 --- a/qstack/regression/__init__.py +++ b/qstack/regression/__init__.py @@ -1,13 +1,13 @@ +"""Kernel Ridge Regression (KRR) module.""" + try: import sklearn del sklearn except ImportError: print(""" - ERROR: cannot import scikit-learn. Have you installed qstack with the \"regression\" option?\n\n (for instance, with `pip install qstack[regression] or `pip install qstack[all]``) """) raise - from . import kernel_utils diff --git a/qstack/regression/condition.py b/qstack/regression/condition.py index 795750d4..e909afa4 100644 --- a/qstack/regression/condition.py +++ b/qstack/regression/condition.py @@ -1,3 +1,5 @@ +"""Kernel matrix condition number.""" + import numpy as np from qstack.mathutils.fps import do_fps from qstack.tools import correct_num_threads @@ -12,23 +14,22 @@ def condition(X, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, """ Compute kernel matrix condition number Args: - X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples - read_kernel (bool): if 'X' is a kernel and not an array of representations - sigma (float): width of the kernel - eta (float): regularization strength for matrix inversion - akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') - gkernel (str): global kernel (None, 'REM', 'avg') - gdict (dict): parameters of the global kernels - test_size (float or int): test set fraction (or number of samples) - random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (numpy.1darray): list of indices for the test set (based on the sequence in X) - idx_train (numpy.1darray): list of indices for the training set (based on the sequence in X) - sparse (int): the number of reference environnments to consider for sparse regression + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + read_kernel (bool): If 'X' is a kernel and not an array of representations. + sigma (float): Width of the kernel. + eta (float): Regularization strength for matrix inversion. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + random_state (int): The seed used for random number generator (controls train/test splitting). + idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). + sparse (int): The number of reference environnments to consider for sparse regression. Returns: - float : condition number + float: Condition number. """ - idx_train, _, _, _ = train_test_split_idx(y=np.arange(len(X)), idx_test=idx_test, idx_train=idx_train, test_size=test_size, random_state=random_state) if read_kernel is False: diff --git a/qstack/regression/cross_validate_results.py b/qstack/regression/cross_validate_results.py index 22fbaf00..5481c918 100644 --- a/qstack/regression/cross_validate_results.py +++ b/qstack/regression/cross_validate_results.py @@ -1,3 +1,5 @@ +"""Cross-validation results.""" + import numpy as np from tqdm import tqdm from qstack.tools import correct_num_threads @@ -17,26 +19,26 @@ def cv_results(X, y, """ Computes various learning curves (LC) ,with random sampling, and returns the average performance. Args: - X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples - y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples - sigmaarr (list): list of kernel width for the grid search - etaarr (list): list of regularization strength for the grid search - akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') - gkernel (str): global kernel (None, 'REM', 'avg') - gdict (dict): parameters of the global kernels - test_size (float or int): test set fraction (or number of samples) - train_size (list): list of training set size fractions used to evaluate the points on the LC - splits (int): K number of splits for the Kfold cross-validation - printlevel (int): controls level of output printing - adaptative (bool): to expand the grid for optimization adaptatively - read_kernel (bool): if 'X' is a kernel and not an array of representations - n_rep (int): the number of repetition for each point (using random sampling) - save (bool): wheather to save intermediate LCs (.npy) - preffix (str): the prefix to use for filename when saving intemediate results - save_pred (bool): to save predicted targets for all LCs (.npy) - progress (bool): to print a progress bar - sparse (int): the number of reference environnments to consider for sparse regression - seed0 (int): the initial seed to produce a set of seeds used for random number generator + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. + sigmaarr (list): List of kernel width for the grid search. + etaarr (list): List of regularization strength for the grid search. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + train_size (list): List of training set size fractions used to evaluate the points on the LC. + splits (int): K number of splits for the Kfold cross-validation. + printlevel (int): Controls level of output printing. + adaptative (bool): To expand the grid for optimization adaptatively. + read_kernel (bool): If 'X' is a kernel and not an array of representations. + n_rep (int): The number of repetition for each point (using random sampling). + save (bool): Wheather to save intermediate LCs (.npy). + preffix (str): The prefix to use for filename when saving intemediate results. + save_pred (bool): To save predicted targets for all LCs (.npy). + progress (bool): To print a progress bar. + sparse (int): The number of reference environnments to consider for sparse regression. + seed0 (int): The initial seed to produce a set of seeds used for random number generator. Returns: The averaged LC data points as a numpy.ndarray containing (train sizes, MAE, std) diff --git a/qstack/regression/final_error.py b/qstack/regression/final_error.py index 270f2548..14650026 100644 --- a/qstack/regression/final_error.py +++ b/qstack/regression/final_error.py @@ -1,3 +1,5 @@ +"""Final error computation on test sets.""" + import sys import numpy as np import scipy @@ -15,21 +17,21 @@ def final_error(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, """ Perform prediction on the test set using the full training set. Args: - X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples - y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples - read_kernel (bool): if 'X' is a kernel and not an array of representations - sigma (float): width of the kernel - eta (float): regularization strength for matrix inversion - akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') - gkernel (str): global kernel (None, 'REM', 'avg') - gdict (dict): parameters of the global kernels - test_size (float or int): test set fraction (or number of samples) - random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (numpy.1darray): list of indices for the test set (based on the sequence in X) - idx_train (numpy.1darray): list of indices for the training set (based on the sequence in X) - sparse (int): the number of reference environnments to consider for sparse regression - return_pred (bool) : return predictions - return_alpha (bool) : return regression weights + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. + read_kernel (bool): If 'X' is a kernel and not an array of representations. + sigma (float): Width of the kernel. + eta (float): Regularization strength for matrix inversion. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + random_state (int): The seed used for random number generator (controls train/test splitting). + idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). + sparse (int): The number of reference environnments to consider for sparse regression. + return_pred (bool): Return predictions. + return_alpha (bool): Return regression weights. Returns: np.1darray(Ntest) : prediction absolute errors on the test set diff --git a/qstack/regression/global_kernels.py b/qstack/regression/global_kernels.py index f4d0fe78..f3cb78e7 100644 --- a/qstack/regression/global_kernels.py +++ b/qstack/regression/global_kernels.py @@ -1,3 +1,9 @@ +"""Global (molecular) kernel implementations. + +Provides: + global_kernels_dict: Dictionary mapping global kernel names to functions. +""" + import math from collections import Counter import numpy as np @@ -123,7 +129,6 @@ def mol_to_dict(mol, species): Returns: dict: Dictionary mapping atomic numbers to arrays of atomic feature vectors. """ - mol_dict = {q:[] for q in species} for atom in mol: mol_dict[atom[0]].append(atom[1]) diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py index eabd4238..e679a9a1 100644 --- a/qstack/regression/hyperparameters.py +++ b/qstack/regression/hyperparameters.py @@ -1,3 +1,5 @@ +"""Hyperparameter optimization.""" + import sys import numpy as np import scipy @@ -15,27 +17,27 @@ def hyperparameters(X, y, """ Performs a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter). Args: - X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples - y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples - sigma (list): list of kernel width for the grid search - eta (list): list of regularization strength for the grid search - akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') - gkernel (str): global kernel (None, 'REM', 'avg') - gdict (dict): parameters of the global kernels - test_size (float or int): test set fraction (or number of samples) - splits (int): K number of splits for the Kfold cross-validation - idx_test (numpy.1darray): list of indices for the test set (based on the sequence in X) - idx_train (numpy.1darray): list of indices for the training set (based on the sequence in X) - printlevel (int): controls level of output printing - adaptative (bool): to expand the grid search adaptatively - read_kernel (bool): if 'X' is a kernel and not an array of representations - sparse (int): the number of reference environnments to consider for sparse regression - random_state (int): the seed used for random number generator (controls train/test splitting) + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. + sigma (list): List of kernel width for the grid search. + eta (list): List of regularization strength for the grid search. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + splits (int): K number of splits for the Kfold cross-validation. + idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). + printlevel (int): Controls level of output printing. + adaptative (bool): To expand the grid search adaptatively. + read_kernel (bool): If 'X' is a kernel and not an array of representations. + sparse (int): The number of reference environnments to consider for sparse regression. + random_state (int): The seed used for random number generator (controls train/test splitting). Returns: The results of the grid search as a numpy.2darray [Cx(MAE,std,eta,sigma)], - where C is the number of parameter set and - the array is sorted according to MAEs (last is minimum) + where C is the number of parameter set and + the array is sorted according to MAEs (last is minimum) """ def k_fold_opt(K_all, eta): kfold = KFold(n_splits=splits, shuffle=False) diff --git a/qstack/regression/kernel.py b/qstack/regression/kernel.py index 1b2e6157..7f83088c 100644 --- a/qstack/regression/kernel.py +++ b/qstack/regression/kernel.py @@ -1,3 +1,5 @@ +"""Kernel matrix computation.""" + import os import numpy as np from qstack.tools import correct_num_threads @@ -9,12 +11,12 @@ def kernel(X, Y=None, sigma=defaults.sigma, akernel=defaults.kernel, gkernel=def """ Computes a kernel between sets A and B (or A and A) using their representations. Args: - X (numpy.ndarray): Representation of A + X (numpy.ndarray): Representation of A. Y (numpy.ndarray): Representation of B. - sigma (float): width of the kernel - akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') - gkernel (str): global kernel (None, 'REM', 'avg') - gdict (dict): parameters of the global kernels + sigma (float): Width of the kernel. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. Returns: A numpy ndarray containing the kernel. diff --git a/qstack/regression/kernel_utils.py b/qstack/regression/kernel_utils.py index 84d0eff7..b82bf32e 100644 --- a/qstack/regression/kernel_utils.py +++ b/qstack/regression/kernel_utils.py @@ -1,11 +1,15 @@ +"""Kernel computation utility functions and defaults.""" + import os import argparse import warnings from types import SimpleNamespace import numpy as np +from sklearn.model_selection import train_test_split from .local_kernels import local_kernels_dict from .global_kernels import global_kernels_dict, get_global_K + REGMODULE_PATH = os.path.dirname(__file__) @@ -45,11 +49,11 @@ def get_local_kernel(arg): Args: arg (str): Kernel name. Available options include: - - 'G': Gaussian (RBF) kernel - - 'L': Laplacian kernel - - 'dot': Linear (dot product) kernel - - 'cosine': Cosine similarity kernel - - Implementation-specific variants: 'G_sklearn', 'G_custom_c', 'L_sklearn', 'L_custom_c', 'L_custom_py' + - 'G': Gaussian (RBF) kernel. + - 'L': Laplacian kernel. + - 'dot': Linear (dot product) kernel. + - 'cosine': Cosine similarity kernel. + - Implementation-specific variants: 'G_sklearn', 'G_custom_c', 'L_sklearn', 'L_custom_c', 'L_custom_py'. Returns: callable: Kernel function with signature kernel(X, Y, gamma) -> numpy.ndarray. @@ -98,7 +102,6 @@ def get_kernel(arg, arg2=None): Returns: callable: Kernel function (local or global). """ - local_kernel = get_local_kernel(arg) if arg2 is None or arg2[0] is None: @@ -121,10 +124,10 @@ def train_test_split_idx(y, idx_test=None, idx_train=None, Args: y (numpy.1darray(Nsamples)): array containing the target property of all Nsamples - test_size (float or int): test set fraction (or number of samples) - idx_test ([int] / numpy.1darray): list of indices for the test set (based on the sequence in X) - idx_train ([int] / numpy.1darray): list of indices for the training set (based on the sequence in X) - random_state (int): the seed used for random number generator (controls train/test splitting) + test_size (float or int): Test set fraction (or number of samples). + idx_test ([int] / numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train ([int] / numpy.1darray): List of indices for the training set (based on the sequence in X). + random_state (int): The seed used for random number generator (controls train/test splitting). Returns: numpy.1darray(Ntest, dtype=int) : test indices @@ -132,9 +135,6 @@ def train_test_split_idx(y, idx_test=None, idx_train=None, numpy.1darray(Ntest, dtype=float) : test set target property numpy.1darray(Ntrain, dtype=float) : train set target property """ - - from sklearn.model_selection import train_test_split - if idx_test is None and idx_train is None: idx_train, idx_test = train_test_split(np.arange(len(y)), test_size=test_size, random_state=random_state) elif idx_test is not None and idx_train is None: @@ -173,12 +173,10 @@ def sparse_regression_kernel(K_train, y_train, sparse_idx, eta): and y_solve $\mathbf{K}_{MN}\vec y$. Args: - K_train (numpy.1darray(Ntrain1,Ntrain): kernel computed on the training set. - Ntrain1 (N in the equation) may differ from the full training set Ntrain (e.g. a subset) + K_train (numpy.1darray(Ntrain1,Ntrain): Kernel computed on the training set. Ntrain1 (N in the equation) may differ from the full training set Ntrain (e.g. a subset). y_train (numpy.1darray(Ntrain)): array containing the target property of the full training set - sparse_idx (numpy.1darray of int) : (M in the equation): sparse subset indices - wrt to the order of the full training set. - eta (float): regularization strength for matrix inversion + sparse_idx (numpy.1darray of int): (M in the equation): sparse subset indices wrt to the order of the full training set. + eta (float): Regularization strength for matrix inversion. Returns: numpy.2darray((len(sparse), len(sparse)), dtype=float) : matrix to be inverted diff --git a/qstack/regression/local_kernels.py b/qstack/regression/local_kernels.py index eb3ffcf0..428667a4 100644 --- a/qstack/regression/local_kernels.py +++ b/qstack/regression/local_kernels.py @@ -1,4 +1,12 @@ +"""Local (atomic) kernel implementations. + +Provides: + local_kernels_dict: Dictionary mapping kernel names to their implementations. +""" + import os +import ctypes +import sysconfig import warnings import numpy as np import sklearn.metrics.pairwise as _SKLEARN_PAIRWISE @@ -46,8 +54,6 @@ def custom_C_kernels(kernel_function, return_distance_function=False): Returns: callable or None: Kernel or distance function, or None if C library cannot be loaded. """ - import ctypes - import sysconfig array_2d_double = np.ctypeslib.ndpointer(dtype=np.float64, ndim=2, flags='CONTIGUOUS') lib_path = REGMODULE_PATH[0]+"/lib/manh"+sysconfig.get_config_var('EXT_SUFFIX') diff --git a/qstack/regression/oos.py b/qstack/regression/oos.py index 8dcc7a64..43694fa0 100644 --- a/qstack/regression/oos.py +++ b/qstack/regression/oos.py @@ -1,3 +1,5 @@ +"""Out-of-sample prediction.""" + import sys import numpy as np from qstack.mathutils.fps import do_fps @@ -13,23 +15,22 @@ def oos(X, X_oos, alpha, sigma=defaults.sigma, """ Perform prediction on an out-of-sample (OOS) set. Args: - X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples - X_oos (numpy.ndarray[Noos,...]): array of OOS representations - alpha (numpy.1darray(Ntrain or sparse)): regression weights - sigma (float): width of the kernel - akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') - gkernel (str): global kernel (None, 'REM', 'avg') - gdict (dict): parameters of the global kernels - test_size (float or int): test set fraction (or number of samples) - random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (list): list of indices for the test set (based on the sequence in X) - idx_train (list): list of indices for the training set (based on the sequence in X) - sparse (int): the number of reference environnments to consider for sparse regression + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + X_oos (numpy.ndarray[Noos,...]): Array of OOS representations. + alpha (numpy.1darray(Ntrain or sparse)): Regression weights. + sigma (float): Width of the kernel. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + random_state (int): The seed used for random number generator (controls train/test splitting). + idx_test (list): List of indices for the test set (based on the sequence in X). + idx_train (list): List of indices for the training set (based on the sequence in X). + sparse (int): The number of reference environnments to consider for sparse regression. Returns: np.1darray(Noos) : predictions on the OOS set """ - idx_train, _, _, _, = train_test_split_idx(y=np.arange(len(X)), idx_test=idx_test, idx_train=idx_train, test_size=test_size, random_state=random_state) kernel = get_kernel(akernel, [gkernel, gdict]) diff --git a/qstack/regression/parser.py b/qstack/regression/parser.py index 79a94047..71f72be8 100644 --- a/qstack/regression/parser.py +++ b/qstack/regression/parser.py @@ -1,3 +1,5 @@ +"""Command-line argument parser for regression tasks.""" + import argparse from qstack.tools import FlexParser from .kernel_utils import defaults, ParseKwargs, local_kernels_dict, global_kernels_dict @@ -18,25 +20,25 @@ class RegressionParser(FlexParser): Attributes: Standard arguments added for all modes: - - x (--x): Path to molecular representations file + - x (--x): Path to molecular representations file - y (--y): Path to target properties file - akernel (--akernel): Local/atomic kernel type (Gaussian, Laplacian, etc.) - - gkernel (--gkernel): Global/molecular kernel type (average, REMatch) - - gdict (--gdict): Global kernel parameters dictionary + - gkernel (--gkernel): Global/molecular kernel type (average, REMatch) + - gdict (--gdict): Global kernel parameters dictionary - test (--test): Test set fraction (0.0-1.0) - - train (--train): Training set fraction list for learning curvers - (0.0-1.0 where 1.0 means full training set minus test set) - - ll (--ll): Thread correction flag for running on clusters + - train (--train): Training set fraction list for learning curvers + (0.0-1.0 where 1.0 means full training set minus test set) + - ll (--ll): Thread correction flag for running on clusters - readkernel (--readkernel): Flag if input is pre-computed kernel - sparse (--sparse): Sparse regression basis size - random_state (--random_state): Random seed for reproducibility Additional for 'single' mode: - - eta (--eta): Single regularization parameter + - eta (--eta): Single regularization parameter - sigma (--sigma): Single kernel width parameter Additional for 'array' mode: - - eta (--eta): Array of regularization parameters + - eta (--eta): Array of regularization parameters - sigma (--sigma): Array of kernel width parameters - splits (--splits): Number of k-fold cross-validation splits - print (--print): Verbosity level diff --git a/qstack/regression/regression.py b/qstack/regression/regression.py index 16908f17..66460359 100644 --- a/qstack/regression/regression.py +++ b/qstack/regression/regression.py @@ -1,3 +1,5 @@ +"""Learning curve computation.""" + import numpy as np import scipy from qstack.mathutils.fps import do_fps @@ -14,29 +16,28 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, """ Produces learning curves (LC) data, for various training sizes, using kernel ridge regression and the user specified parameters Args: - X (numpy.ndarray[Nsamples,...]): array containing the representations of all Nsamples - y (numpy.1darray[Nsamples]): array containing the target property of all Nsamples - read_kernel (bool): if 'X' is a kernel and not an array of representations - sigma (float): width of the kernel - eta (float): regularization strength for matrix inversion - akernel (str): local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine') - gkernel (str): global kernel (None, 'REM', 'avg') - gdict (dict): parameters of the global kernels - test_size (float or int): test set fraction (or number of samples) - train_size (list): list of training set size fractions used to evaluate the points on the LC - n_rep (int): the number of repetition for each point (using random sampling) - random_state (int): the seed used for random number generator (controls train/test splitting) - idx_test (numpy.1darray): list of indices for the test set (based on the sequence in X) - idx_train (numpy.1darray): list of indices for the training set (based on the sequence in X) - sparse (int): the number of reference environnments to consider for sparse regression - debug (bool): to use a fixed seed for partial training set selection (for reproducibility) - save_pred (bool): to return all predicted targets + X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. + y (numpy.1darray[Nsamples]): Array containing the target property of all Nsamples. + read_kernel (bool): If 'X' is a kernel and not an array of representations. + sigma (float): Width of the kernel. + eta (float): Regularization strength for matrix inversion. + akernel (str): Local kernel ('L' for Laplacian, 'G' for Gaussian, 'dot', 'cosine'). + gkernel (str): Global kernel (None, 'REM', 'avg'). + gdict (dict): Parameters of the global kernels. + test_size (float or int): Test set fraction (or number of samples). + train_size (list): List of training set size fractions used to evaluate the points on the LC. + n_rep (int): The number of repetition for each point (using random sampling). + random_state (int): The seed used for random number generator (controls train/test splitting). + idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). + idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). + sparse (int): The number of reference environnments to consider for sparse regression. + debug (bool): To use a fixed seed for partial training set selection (for reproducibility). + save_pred (bool): To return all predicted targets. Returns: The computed LC, as a list containing all its points (train size, MAE, std) If save_pres is True, a tuple with (results, (target values, predicted values)) """ - idx_train, idx_test, y_train, y_test = train_test_split_idx(y=y, idx_test=idx_test, idx_train=idx_train, test_size=test_size, random_state=random_state) if read_kernel is False: diff --git a/qstack/reorder.py b/qstack/reorder.py index 7290e41a..0b68bf50 100644 --- a/qstack/reorder.py +++ b/qstack/reorder.py @@ -1,3 +1,5 @@ +"""Functions for reordering atomic orbitals between different conventions.""" + import numpy as np @@ -63,7 +65,6 @@ def _pyscf2gpr_idx(l): Returns: tuple: Re-arranged indices array and sign array. """ - idx = np.arange(len(l)) i=0 while(i < len(idx)): @@ -93,7 +94,6 @@ def reorder_ao(mol, vector, src='pyscf', dest='gpr'): NotImplementedError: If the specified convention is not implemented. ValueError: If vector dimension is not 1 or 2. """ - def get_idx(l, m, convention): convention = convention.lower() if convention == 'gpr': @@ -107,6 +107,7 @@ def get_idx(l, m, convention): raise NotImplementedError(errstr) from .compound import basis_flatten + _, l, m = basis_flatten(mol, return_both=False) idx_src, sign_src = get_idx(l, m, src) idx_dest, sign_dest = get_idx(l, m, dest) diff --git a/qstack/spahm/LB2020guess.py b/qstack/spahm/LB2020guess.py index fcf1b82f..93e05814 100644 --- a/qstack/spahm/LB2020guess.py +++ b/qstack/spahm/LB2020guess.py @@ -1,11 +1,21 @@ +"""Laikov-Briling 2020 guess Hamiltonian implementation.""" + import copy import numpy as np from pyscf import data, df, scf class LB2020guess: - """See https://github.com/briling/aepm.""" + """Laikov-Briling 2020 guess Hamiltonian implementation. + + Reference: + D. N. Laikov, K. R. Briling, + "Atomic effective potentials for starting molecular electronic structure calculations", + Theor. Chem. Acc. 139, 17 (2020), doi:10.1007/s00214-019-2521-3. + Implements the atomic effective potential method for initial guess generation. + See https://github.com/briling/aepm for a C implementation. + """ def __init__(self, fname=None, parameters='HF'): self.acfile_default = './parameters_HF.dat' self.Qmax = 102 @@ -40,7 +50,8 @@ def read_ac(self, fname): fname (str, optional): Path to parameter file. If None, uses default. Returns: - dict: Dictionary mapping element symbols to basis function parameters. + dict: Dictionary mapping element symbols to lists of basis function + parameters [[l, [exponent, coefficient]], ...]. """ if fname is None: fname = self.acfile_default @@ -64,22 +75,25 @@ def add_caps(self, basis): """Adds cap (diffuse) functions to the auxiliary basis. Args: - basis (dict): Basis set dictionary to modify. + basis (dict): Basis set dictionary to modify in-place. Returns: - dict: Modified basis set with cap functions added. + None. Modifies basis in-place. """ for q in range(1, self.Qmax+1): - a = self.caps_array[q] + a = self._caps_array[q] qname = data.elements.ELEMENTS[q] if qname in basis: basis[qname].append( [0, [a, self.renormalize(a) ]] ) - return basis + return def get_basis(self, fname, parameters): """Initializes auxiliary basis set from file or predefined parameters. + Loads basis set from either predefined HF/HFS parameters or custom file, + then adds cap functions and stores in self.acbasis. + Args: fname (str, optional): Path to custom parameter file. parameters (str): Parameter set to use ('HF', 'HFS', or None for custom file). @@ -90,25 +104,27 @@ def get_basis(self, fname, parameters): self.acbasis = acbasis self.parameters = None elif parameters=='HF': - acbasis = self.hf_basis + acbasis = self._hf_basis self.add_caps(acbasis) self.acbasis = acbasis self.parameters = 'HF' elif parameters=='HFS': - self.acbasis = self.hfs_basis + self.acbasis = self._hfs_basis self.parameters = 'HFS' def use_charge(self, mol): """Adjusts basis coefficients based on molecular charge. + For charged molecules with HF parameters, scales the cap function + coefficient to account for charge redistribution. + Args: - mol (pyscf Mole): pyscf Mole object. + mol (pyscf.gto.Mole): pyscf Mole object. Returns: - dict: Adjusted auxiliary basis set. + dict: Adjusted auxiliary basis set for molecule's elements. """ - acbasis = {q: copy.deepcopy(self.acbasis[q]) for q in map(mol.atom_symbol, range(mol.natm))} if self.parameters == 'HF': factor = 1.0-mol.charge/mol.natm @@ -120,12 +136,15 @@ def use_charge(self, mol): def use_ecp(self, mol, acbasis): """Adjusts basis set to account for effective core potentials (ECP). + When ECP is present, removes basis functions corresponding to core electrons + by reducing coefficients proportionally until core charge is accounted for. + Args: - mol (pyscf Mole): pyscf Mole object with ECP. + mol (pyscf.gto.Mole): pyscf Mole object potentially with ECP. acbasis (dict): Auxiliary basis set dictionary. Returns: - dict: Adjusted auxiliary basis set accounting for ECP. + dict: Adjusted auxiliary basis set with core electrons removed if ECP present. """ if not mol.has_ecp(): return acbasis @@ -159,13 +178,16 @@ def use_ecp(self, mol, acbasis): def get_auxweights(self, auxmol): - """Extracts auxiliary basis weights from auxiliary molecule object. + """Extracts auxiliary basis weights from the basis. + + Collects the coefficients from each auxiliary basis primitive + into a single array aligned with auxiliary orbital indices. Args: - auxmol (pyscf Mole): Auxiliary molecule object. + auxmol (pyscf.gto.Mole): Molecule object with auxiliary basis. Returns: - numpy ndarray: Array of auxiliary basis function weights. + numpy.ndarray: Array of auxiliary basis function weights (length nao). """ w = np.zeros(auxmol.nao) iao = 0 @@ -180,12 +202,15 @@ def get_auxweights(self, auxmol): def merge_caps(self, w, eri3c): """Contracts 3-center integrals with auxiliary basis weights. + PySCF internally renormalizes basis functions, thus ignores the charge normalization + of the auxiliary basis, and the weights must be used directly later. + Args: - w (numpy ndarray): Auxiliary basis weights. - eri3c (numpy ndarray): 3-center electron repulsion integrals. + w (numpy.ndarray): Auxiliary basis weights. + eri3c (numpy.ndarray): 3-center electron repulsion integrals (ij|P). Returns: - numpy ndarray: Contracted integrals. + numpy.ndarray: Contracted integrals (ij) = sum_P w_P * (ij|P). """ return np.einsum('...i,i->...', eri3c, w) @@ -194,11 +219,12 @@ def get_eri3c(self, mol, auxmol): """Computes 3-center electron repulsion integrals. Args: - mol (pyscf Mole): Main molecule object. - auxmol (pyscf Mole): Auxiliary molecule object. + mol (pyscf.gto.Mole): Main molecule object. + auxmol (pyscf.gto.Mole): Auxiliary molecule object. Returns: - numpy ndarray: 3-center ERIs (ij|P) where i,j are AO indices and P is aux basis index. + numpy.ndarray: 3-center ERIs (ij|P) where i,j are primary AO indices + and P is auxiliary basis index. """ pmol = mol + auxmol shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) @@ -207,6 +233,18 @@ def get_eri3c(self, mol, auxmol): def check_coefficients(self, mol, acbasis): + """Validates that auxiliary basis coefficients sum to correct total charge. + + Ensures basis set modifications (charge adjustment, ECP) maintain + consistency with molecular electronic structure. + + Args: + mol (pyscf.gto.Mole): Molecule object. + acbasis (dict): Auxiliary basis set dictionary. + + Raises: + RuntimeError: If coefficient sum doesn't match expected charge. + """ ch1 = sum(sum(c/self.renormalize(a) for _, (a, c) in acbasis[mol.atom_pure_symbol(iat)]) for iat in range(mol.natm)) ch2 = sum(mol.atom_charges()) - (mol.charge if self.parameters == 'HF' else 0) if not np.isclose(ch1, ch2): @@ -214,6 +252,14 @@ def check_coefficients(self, mol, acbasis): def HLB20(self, mol): + """Computes the LB2020 effective potential matrix. + + Args: + mol (pyscf.gto.Mole): Molecule object. + + Returns: + numpy.ndarray: LB2020 potential matrix in AO basis (nao x nao). + """ acbasis = self.use_charge(mol) acbasis = self.use_ecp(mol, acbasis) self.check_coefficients(mol, acbasis) @@ -224,6 +270,16 @@ def HLB20(self, mol): def Heff(self, mol): + """Constructs one-electron Hamiltonian for initial guess. + + Combines standard core Hamiltonian with LB2020 effective potential. + + Args: + mol (pyscf.gto.Mole): Molecule object. + + Returns: + numpy.ndarray: Effective Hamiltonian matrix in AO basis (nao x nao). + """ self.mol = mol self.Hcore = scf.hf.get_hcore(mol) self.H = self.Hcore + self.HLB20(mol) @@ -231,6 +287,19 @@ def Heff(self, mol): def HLB20_ints_generator(self, mol, auxmol): + """Creates generator for LB2020 potential gradients. + + Computes derivative integrals and returns a function that evaluates + the gradient of LB2020 potential with respect to atomic positions. + + Args: + mol (pyscf.gto.Mole): Molecule object. + auxmol (pyscf.gto.Mole): Auxiliary molecule object. + + Returns: + callable: Function that takes atom index and returns gradient integrals + as numpy.ndarray of shape (3, nao, nao, naux). + """ pmol = mol + auxmol shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) eri3c2e_ip1 = pmol.intor('int3c2e_ip1', shls_slice=shls_slice) # (nabla \, \| \) @@ -249,6 +318,15 @@ def HLB20_ints_deriv(iat): def HLB20_generator(self, mol): + """Creates generator for LB2020 potential gradient contributions. + + Args: + mol (pyscf.gto.Mole): Molecule object. + + Returns: + callable: Function that takes atom index and returns LB2020 potential + gradient as numpy.ndarray of shape (3, nao, nao). + """ acbasis = self.use_charge(mol) acbasis = self.use_ecp(mol, acbasis) self.check_coefficients(mol, acbasis) @@ -261,23 +339,28 @@ def HLB20_deriv(iat): def init_data(self): - self.caps_array = np.zeros(self.Qmax+1) - self.caps_array [ 1 : 2 +1] = 1.0 / 3.0 - self.caps_array [ 3 : 4 +1] = 1.0 / 16.0 - self.caps_array [ 5 : 10 +1] = 1.0 / 3.0 - self.caps_array [ 11 : 12 +1] = 1.0 / 32.0 - self.caps_array [ 13 : 18 +1] = 1.0 / 8.0 - self.caps_array [ 19 : 20 +1] = 1.0 / 32.0 - self.caps_array [ 21 : 30 +1] = 1.0 / 6.0 - self.caps_array [ 31 : 36 +1] = 1.0 / 12.0 - self.caps_array [ 37 : 38 +1] = 1.0 / 32.0 - self.caps_array [ 39 : 48 +1] = 1.0 / 8.0 - self.caps_array [ 49 : 54 +1] = 1.0 / 12.0 - self.caps_array [ 55 : 70 +1] = 1.0 / 32.0 - self.caps_array [ 71 : 86 +1] = 1.0 / 12.0 - self.caps_array [ 87 : 102 +1] = 1.0 / 32.0 - - self.hfs_basis = {'H': [[0, [0.0815877135278, 0.03846658840144482]]], + """Set parameters: + - self._caps_array: Diffuse function exponents for each element. + - self._hf_basis: Predefined HF parameter set for all elements. + - self._hfs_basis: Predefined HFS parameter set for all elements. + """ + self._caps_array = np.zeros(self.Qmax+1) + self._caps_array [ 1 : 2 +1] = 1.0 / 3.0 + self._caps_array [ 3 : 4 +1] = 1.0 / 16.0 + self._caps_array [ 5 : 10 +1] = 1.0 / 3.0 + self._caps_array [ 11 : 12 +1] = 1.0 / 32.0 + self._caps_array [ 13 : 18 +1] = 1.0 / 8.0 + self._caps_array [ 19 : 20 +1] = 1.0 / 32.0 + self._caps_array [ 21 : 30 +1] = 1.0 / 6.0 + self._caps_array [ 31 : 36 +1] = 1.0 / 12.0 + self._caps_array [ 37 : 38 +1] = 1.0 / 32.0 + self._caps_array [ 39 : 48 +1] = 1.0 / 8.0 + self._caps_array [ 49 : 54 +1] = 1.0 / 12.0 + self._caps_array [ 55 : 70 +1] = 1.0 / 32.0 + self._caps_array [ 71 : 86 +1] = 1.0 / 12.0 + self._caps_array [ 87 : 102 +1] = 1.0 / 32.0 + + self._hfs_basis = {'H': [[0, [0.0815877135278, 0.03846658840144482]]], 'He': [[0, [0.808048051263, 0.42950970838920094]]], 'Li': [[0, [2.60255347642, 0.9236581585938292]], [0, [0.0280604557276, 0.02092188631196157]]], 'Be': [[0, [4.59692793038, 1.5671644720955082]], [0, [0.0804833286681, 0.07687177344753668]]], @@ -381,7 +464,7 @@ def init_data(self): 'No': [[0, [9692.47931286, 489.2442112346279]], [0, [672.034303671, 214.23197488829172]], [0, [105.988550516, 117.63038685999115]], [0, [18.5442146559, 70.92646079341314]], [0, [2.7932429022, 16.425417198806798]], [0, [0.577107028367, 2.4645969790388342]], [0, [0.0432042120982, 0.07161106318788321]]], } - self.hf_basis = {'H': [], + self._hf_basis = {'H': [], 'He': [[0, [1.8865345899608519, 0.4056146926108746]]], 'Li': [[0, [1.9854870701524918, 0.842937532901041]]], 'Be': [[0, [4.744586184977778, 1.3574437702689057]], [0, [0.2792470137084066, 0.12818229520909]]], diff --git a/qstack/spahm/__init__.py b/qstack/spahm/__init__.py index 4c1185e4..3a33238a 100644 --- a/qstack/spahm/__init__.py +++ b/qstack/spahm/__init__.py @@ -1,2 +1,3 @@ +"""SPAHM (spectrum of approximated Hamiltonian matrices representations) module.""" + from . import compute_spahm -#from . import rho diff --git a/qstack/spahm/compute_spahm.py b/qstack/spahm/compute_spahm.py index e36d4bf9..5a14b804 100644 --- a/qstack/spahm/compute_spahm.py +++ b/qstack/spahm/compute_spahm.py @@ -1,3 +1,5 @@ +"""Eigenvalue SPAHM computation.""" + import numpy as np from pyscf import scf, grad from .guesses import solveF, get_guess, get_occ, eigenvalue_grad, get_guess_g @@ -16,11 +18,11 @@ def get_guess_orbitals(mol, guess, xc="pbe", field=None, return_ao_dip=False): Returns: tuple: Depending on return_ao_dip: - - If False: (e, v) where: - - e (numpy ndarray): 1D array (nao,) of orbital eigenvalues - - v (numpy ndarray): 2D array (nao, nao) of MO coefficients - - If True: (e, v, ao_dip) where ao_dip is 3D array (3, nao, nao) of AO dipole integrals - if field is not None, else None + - If False: (e, v) where: + - e (numpy ndarray): 1D array (nao,) of orbital eigenvalues. + - v (numpy ndarray): 2D array (nao, nao) of MO coefficients. + - If True: (e, v, ao_dip) where ao_dip is 3D array (3, nao, nao) of AO dipole integrals + if field is not None, else None. Raises: NotImplementedError: If field is specified with Hückel guess. @@ -60,7 +62,6 @@ def ext_field_generator(mol, field): 3D array (3, nao, nao) of dH_ext/dr[iat] - external field Hamiltonian gradient for atom iat. """ - shls_slice = (0, mol.nbas, 0, mol.nbas) with mol.with_common_orig((0,0,0)): int1e_irp = mol.intor('int1e_irp', shls_slice=shls_slice).reshape(3, 3, mol.nao, mol.nao) # ( | rc nabla | ) @@ -92,12 +93,11 @@ def get_guess_orbitals_grad(mol, guess, field=None): Returns: tuple: (e, de_dr, de_dfield) where: - - e (numpy ndarray): 1D array (nao,) of orbital eigenvalues in Eh - - de_dr (numpy ndarray): 3D array (nao, natm, 3) of eigenvalue gradients in Eh/bohr - - de_dfield (numpy ndarray or None): 2D array (nao, 3) of eigenvalue derivatives - w.r.t. electric field in Eh/a.u., or None if field is None + - e (numpy ndarray): 1D array (nao,) of orbital eigenvalues in Eh. + - de_dr (numpy ndarray): 3D array (nao, natm, 3) of eigenvalue gradients in Eh/bohr. + - de_dfield (numpy ndarray or None): 2D array (nao, 3) of eigenvalue derivatives + w.r.t. electric field in Eh/a.u., or None if field is None. """ - e, c, ao_dip = get_guess_orbitals(mol, guess[0], field=field, return_ao_dip=True) mf = grad.rhf.Gradients(scf.RHF(mol)) s1 = mf.get_ovlp(mol) @@ -130,8 +130,8 @@ def get_spahm_representation(mol, guess_in, xc="pbe", field=None): Returns: numpy ndarray: SPAHM representation consisting of occupied orbital eigenvalues. - - Closed-shell: 1D array of shape (n_occupied,) in Eh - - Open-shell: 2D array of shape (2, n_alpha) for alpha and beta orbitals (padded by zeros) + - Closed-shell: 1D array of shape (n_occupied,) in Eh. + - Open-shell: 2D array of shape (2, n_alpha) for alpha and beta orbitals (padded by zeros). """ guess = get_guess(guess_in) e, _v = get_guess_orbitals(mol, guess, xc, field=field) @@ -153,12 +153,12 @@ def get_spahm_representation_grad(mol, guess_in, field=None): Returns: tuple: (spahm, spahm_grad, spahm_field_grad) where: - - spahm (numpy ndarray): SPAHM representation - occupied orbital energies in Eh. - Shape: (n_occ,) for closed-shell or (2, n_alpha) for open-shell - - spahm_grad (numpy ndarray): Nuclear gradients of SPAHM in Eh/bohr. - Shape: (n_occ, natm, 3) or (2, n_alpha, natm, 3) - - spahm_field_grad (numpy ndarray or None): Electric field gradients in Eh/a.u. - Shape: (n_occ, 3) or (2, n_alpha, 3), or None if field is None + - spahm (numpy ndarray): SPAHM representation - occupied orbital energies in Eh. + Shape: (n_occ,) for closed-shell or (2, n_alpha) for open-shell. + - spahm_grad (numpy ndarray): Nuclear gradients of SPAHM in Eh/bohr. + Shape: (n_occ, natm, 3) or (2, n_alpha, natm, 3). + - spahm_field_grad (numpy ndarray or None): Electric field gradients in Eh/a.u. + Shape: (n_occ, 3) or (2, n_alpha, 3), or None if field is None. """ guess = get_guess_g(guess_in) e, agrad, fgrad = get_guess_orbitals_grad(mol, guess, field=field) diff --git a/qstack/spahm/guesses.py b/qstack/spahm/guesses.py index 29759726..c7441bd0 100644 --- a/qstack/spahm/guesses.py +++ b/qstack/spahm/guesses.py @@ -1,3 +1,11 @@ +"""Initial guess Hamiltonian methods for SPAHM. + +Implements various guess methods: Hcore, Hückel, GWH, SAD, SAP, LB2020. + +Provides: + - guesses_dict: Dictionary mapping guess names to functions. +""" + import warnings import numpy as np import scipy @@ -9,11 +17,11 @@ def hcore(mol, *_): """Computes guess Hamiltonian from core contributions (kinetic + nuclear + ECP). Args: - mol (pyscf Mole): pyscf Mole object. - *_: Unused positional arguments (for interface compatibility). + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). Returns: - numpy ndarray: 2D array containing the core Hamiltonian matrix in AO basis. + numpy ndarray: 2D array containing the core Hamiltonian matrix in AO basis. """ return scf.hf.get_hcore(mol) @@ -21,7 +29,7 @@ def hcore(mol, *_): def GWH(mol, *_): """Computes guess Hamiltonian using Generalized Wolfsberg-Helmholtz (GWH) method. - Uses the empirical formula: H_ij = 0.5 * K * (H_ii + H_jj) * S_ij with K = 1.75. + Uses the formula: H_ij = 0.5 * K * (H_ii + H_jj) * S_ij with K = 1.75. Reference: M. Wolfsberg, L. Helmholtz, @@ -29,13 +37,12 @@ def GWH(mol, *_): J. Chem. Phys. 20 837-843 (1952), doi:10.1063/1.1700580. Args: - mol (pyscf Mole): pyscf Mole object. - *_: Unused positional arguments (for interface compatibility). + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). Returns: - numpy ndarray: 2D GWH Hamiltonian matrix in AO basis. + numpy ndarray: 2D GWH Hamiltonian matrix in AO basis. """ - h = hcore(mol).diagonal() S = mol.intor_symmetric('int1e_ovlp') K = 1.75 @@ -44,18 +51,18 @@ def GWH(mol, *_): return h_gwh -def SAD(mol, func): +def SAD(mol, xc): """Computes guess Hamiltonian using Superposition of Atomic Densities (SAD). Constructs the Fock matrix from atomic Hartree-Fock density matrices summed together as an initial guess for molecular calculations. Args: - mol (pyscf Mole): pyscf Mole object. - func (str): Exchange-correlation functional name (e.g., 'pbe', 'b3lyp'). + mol (pyscf Mole): pyscf Mole object. + xc (str): Exchange-correlation functional. Returns: - numpy ndarray: 2D Fock matrix in AO basis computed from SAD. + numpy ndarray: 2D Fock matrix in AO basis computed from SAD. Warns: RuntimeWarning: If alpha and beta effective potentials differ for the functional. @@ -63,14 +70,14 @@ def SAD(mol, func): hc = hcore(mol) dm = scf.hf.init_guess_by_atom(mol) mf = dft.RKS(mol) - mf.xc = func + mf.xc = xc vhf = mf.get_veff(dm=dm) if vhf.ndim == 2: fock = hc + vhf else: fock = hc + vhf[0] if not np.array_equal(vhf[0], vhf[1]): - msg = f'The effective potential ({func}) returned different alpha and beta matrix components from atomicHF DM' + msg = f'The effective potential ({xc}) returned different alpha and beta matrix components from atomicHF DM' warnings.warn(msg, RuntimeWarning, stacklevel=2) return fock @@ -81,11 +88,11 @@ def SAP(mol, *_): Constructs initial Hamiltonian from kinetic energy plus summed atomic potentials. Args: - mol (pyscf Mole): pyscf Mole object. - *_: Unused positional arguments (for interface compatibility). + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). Returns: - numpy ndarray: 2D Hamiltonian matrix (T + V_SAP) in AO basis. + numpy ndarray: 2D Hamiltonian matrix (T + V_SAP) in AO basis. """ mf = dft.RKS(mol) vsap = mf.get_vsap() @@ -97,14 +104,12 @@ def SAP(mol, *_): def LB(mol, *_): """Computes guess Hamiltonian using Laikov-Briling 2020 model with HF parameters. - Uses auxiliary basis representation optimized for Hartree-Fock calculations. - Args: - mol (pyscf Mole): pyscf Mole object. - *_: Unused positional arguments (for interface compatibility). + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). Returns: - numpy ndarray: 2D effective Hamiltonian matrix from LB2020 model in AO basis. + numpy ndarray: 2D effective Hamiltonian matrix from LB2020 model in AO basis. """ return LB20(parameters='HF').Heff(mol) @@ -112,14 +117,12 @@ def LB(mol, *_): def LB_HFS(mol, *_): """Computes guess Hamiltonian using Laikov-Briling 2020 model with HFS parameters. - Uses auxiliary basis representation optimized for Hartree-Fock-Slater calculations. - Args: - mol (pyscf Mole): pyscf Mole object. - *_: Unused positional arguments (for interface compatibility). + mol (pyscf Mole): pyscf Mole object. + *_: Unused positional arguments (for interface compatibility). Returns: - numpy ndarray: 2D effective Hamiltonian matrix from LB2020-HFS model in AO basis. + numpy ndarray: 2D effective Hamiltonian matrix from LB2020-HFS model in AO basis. """ return LB20(parameters='HFS').Heff(mol) @@ -128,11 +131,11 @@ def solveF(mol, fock): """Solves generalized eigenvalue problem FC = SCε for the Fock/Hamiltonian matrix. Args: - mol (pyscf Mole): pyscf Mole object. - fock (numpy ndarray): 2D Fock or Hamiltonian matrix in AO basis. + mol (pyscf Mole): pyscf Mole object. + fock (numpy ndarray): 2D Fock or Hamiltonian matrix in AO basis. Returns: - tuple: (eigenvalues, eigenvectors) where: + tuple: (eigenvalues, eigenvectors) where: - eigenvalues: 1D array of orbital energies - eigenvectors: 2D array of MO coefficients (columns are MOs) """ @@ -144,20 +147,20 @@ def get_guess(arg): """Returns guess Hamiltonian function by name. Args: - arg (str): Guess method name. Available options: - - 'core': Core Hamiltonian (H_core) - - 'sad': Superposition of Atomic Densities - - 'sap': Superposition of Atomic Potentials - - 'gwh': Generalized Wolfsberg-Helmholtz - - 'lb': Laikov-Briling 2020 (HF parameters) - - 'lb-hfs': Laikov-Briling 2020 (HFS parameters) - - 'huckel': Extended Hückel method + arg (str): Guess method name. Available options: + - 'core': Core Hamiltonian (H_core). + - 'sad': Superposition of Atomic Densities. + - 'sap': Superposition of Atomic Potentials. + - 'gwh': Generalized Wolfsberg-Helmholtz. + - 'lb': Laikov-Briling 2020 (HF parameters). + - 'lb-hfs': Laikov-Briling 2020 (HFS parameters). + - 'huckel': Extended Hückel method. Returns: - callable: Guess Hamiltonian function with signature f(mol, xc) -> numpy.ndarray. + callable: Guess Hamiltonian function with signature f(mol, xc) -> numpy.ndarray. Raises: - RuntimeError: If the specified guess method is not available. + RuntimeError: If the specified guess method is not available. """ arg = arg.lower() if arg not in guesses_dict: @@ -189,12 +192,13 @@ def get_occ(e, nelec, spin): """Extracts occupied orbital eigenvalues/energies. Args: - e (numpy ndarray): Full array of orbital eigenvalues. - nelec (tuple): Number of (alpha, beta) electrons. - spin (int or None): Spin multiplicity. If None, assumes closed-shell. + e (numpy ndarray): Full array of orbital eigenvalues (1D) + or possibly arrays of larger dimensionality. + nelec (tuple): Number of (alpha, beta) electrons. + spin (int or None): Spin multiplicity. If None, assumes closed-shell. Returns: - numpy ndarray: Occupied eigenvalues. Shape depends on spin: + numpy ndarray: Occupied eigenvalues. Shape depends on spin: - Closed-shell (spin=None): 1D array of occupied eigenvalues - Open-shell: 2D array (2, nocc) for alpha and beta separately """ @@ -204,7 +208,7 @@ def get_occ(e, nelec, spin): return e[:nocc,...] else: nocc = nelec - e1 = np.zeros((2, *e.shape))[:,:nocc[0],...] + e1 = np.zeros((2, nocc[0], *e.shape[1:])) e1[0,:nocc[0],...] = e[:nocc[0],...] e1[1,:nocc[1],...] = e[:nocc[1],...] return e1 @@ -214,16 +218,15 @@ def get_dm(v, nelec, spin): """Constructs density matrix from occupied molecular orbitals. Args: - v (numpy ndarray): 2D array of MO coefficients (eigenvectors), columns are MOs. - nelec (tuple): Number of (alpha, beta) electrons. - spin (int or None): Spin multiplicity. If None, assumes closed-shell (RHF). + v (numpy ndarray): 2D array of MO coefficients (eigenvectors), columns are MOs. + nelec (tuple): Number of (alpha, beta) electrons. + spin (int or None): Spin multiplicity. If None, assumes closed-shell (RHF). Returns: - numpy ndarray: Density matrix in AO basis. + numpy ndarray: Density matrix in AO basis. - Closed-shell: 2D array (nao, nao) - Open-shell: 3D array (2, nao, nao) for alpha and beta """ - check_nelec(nelec, len(v)) if spin is None: nocc = nelec[0] @@ -235,19 +238,19 @@ def get_dm(v, nelec, spin): dm1 = v[:,:nocc[1]] @ v[:,:nocc[1]].T return np.array((dm0,dm1)) -############################################################################### def hcore_grad(mf): """Returns core Hamiltonian gradient generator function. Args: - mf: Mean-field object with hcore_generator method. + mf: PySCF mean-field object. Returns: callable: Function that returns core Hamiltonian gradient for a given atom. """ return mf.hcore_generator(mf.mol) + def LB_grad(mf): """Returns Laikov-Briling Hamiltonian gradient generator function. @@ -265,6 +268,7 @@ def H_grad(iat): return hcore_grad(iat) + HLB_grad(iat) return H_grad + def get_guess_g(arg): """Returns both guess Hamiltonian function and its gradient generator. @@ -283,6 +287,7 @@ def get_guess_g(arg): raise RuntimeError(f'Unknown guess. Available guesses: {list(guesses.keys())}') return guesses[arg] + def eigenvalue_grad(mol, e, c, s1, h1): """Computes nuclear gradients of orbital eigenvalues from generalized eigenvalue problem HC = eSC. @@ -292,7 +297,7 @@ def eigenvalue_grad(mol, e, c, s1, h1): mol (pyscf Mole): pyscf Mole object. e (numpy ndarray): 1D array (nao,) of orbital eigenvalues. c (numpy ndarray): 2D array (nao, nao) of MO coefficients (eigenvectors). - s1 (numpy ndarray): 3D array (3, nao, nao) - compact gradient of overlap matrix. + s1 (numpy ndarray): 3D array (3, nao, nao) - gradient of overlap matrix. h1 (callable): Function returning dH/dr[iat] - Hamiltonian gradient for atom iat. Returns: diff --git a/qstack/spahm/rho/Dmatrix.py b/qstack/spahm/rho/Dmatrix.py index 0cae23bc..34a9c74a 100644 --- a/qstack/spahm/rho/Dmatrix.py +++ b/qstack/spahm/rho/Dmatrix.py @@ -1,3 +1,5 @@ +"""Wigner d-matrices for real spherical harmonics to symmetrize coefficient vectors.""" + import numpy as np from numpy import sqrt @@ -14,7 +16,7 @@ def c_split(mol, c): Returns: list: List of [l, coefficients] pairs where l is angular momentum and - coefficients is the subset of c for that angular momentum shell. + coefficients is the subset of c for that angular momentum shell. """ cs = [] i0 = 0 @@ -55,7 +57,7 @@ def new_xy_axis(z): Returns: numpy ndarray: 3x3 rotation matrix with rows [x', y', z'] defining the - new orthonormal coordinate system. + new orthonormal coordinate system. """ z = z/np.linalg.norm(z) # don't use /= so a copy of z is created i = np.argmin(abs(z)) # find the axis with the minimal projection of the vector z @@ -81,7 +83,7 @@ def Dmatrix(xyz, lmax, order='xyz'): Returns: list: List of numpy ndarrays D[l] where D[l] is the (2l+1) x (2l+1) real Wigner - D-matrix for angular momentum l. Note: m1 index is rotated (D is transposed). + D-matrix for angular momentum l. Note: m1 index is rotated (D is transposed). Raises: NotImplementedError: If lmax > 4. @@ -89,7 +91,6 @@ def Dmatrix(xyz, lmax, order='xyz'): Note: The matrices are computed using explicit algebraic expressions for each l. """ - xx = xyz[0,0]; xy = xyz[0,1]; xz = xyz[0,2] yx = xyz[1,0]; yy = xyz[1,1]; yz = xyz[1,2] zx = xyz[2,0]; zy = xyz[2,1]; zz = xyz[2,2] diff --git a/qstack/spahm/rho/__init__.py b/qstack/spahm/rho/__init__.py index e69de29b..f4346753 100644 --- a/qstack/spahm/rho/__init__.py +++ b/qstack/spahm/rho/__init__.py @@ -0,0 +1 @@ +"""Atom- and bond-based SPAHM module.""" diff --git a/qstack/spahm/rho/__main__.py b/qstack/spahm/rho/__main__.py index f81982ec..d8745bad 100644 --- a/qstack/spahm/rho/__main__.py +++ b/qstack/spahm/rho/__main__.py @@ -1,4 +1,7 @@ +"""Command-line entry point for SPAHM(a,b) computation.""" + from .compute_rho_spahm import main + if __name__ == "__main__": main() diff --git a/qstack/spahm/rho/atom.py b/qstack/spahm/rho/atom.py index 826d3e11..291d4cb2 100644 --- a/qstack/spahm/rho/atom.py +++ b/qstack/spahm/rho/atom.py @@ -1,3 +1,5 @@ +"""Legacy command-line entry point for SPAHM(a) computations.""" + import numpy as np from qstack import compound from .compute_rho_spahm import get_repr diff --git a/qstack/spahm/rho/atomic_density.py b/qstack/spahm/rho/atomic_density.py index 447859f0..c15f5059 100644 --- a/qstack/spahm/rho/atomic_density.py +++ b/qstack/spahm/rho/atomic_density.py @@ -1,3 +1,5 @@ +"""Atomic density computation.""" + import numpy as np from qstack import compound, fields from . import lowdin @@ -22,8 +24,8 @@ def fit(mol, dm, aux_basis, short=False, w_slicing=True, only_i=None): Returns: list or numpy ndarray: Density fitting coefficients for each atom. - - If short=False: list of 1D arrays (full aux basis per atom) - - If short=True: 1D array (concatenated atom-centered coefficients only) + - If short=False: list of 1D arrays (full aux basis per atom) + - If short=True: 1D array (concatenated atom-centered coefficients only) """ L = lowdin.Lowdin_split(mol, dm) diff --git a/qstack/spahm/rho/bond.py b/qstack/spahm/rho/bond.py index b7ea520b..0eaeba04 100644 --- a/qstack/spahm/rho/bond.py +++ b/qstack/spahm/rho/bond.py @@ -1,3 +1,5 @@ +"""Legacy command-line entry point for SPAHM(b) computations.""" + import os import numpy as np from qstack.tools import correct_num_threads diff --git a/qstack/spahm/rho/bond_selected.py b/qstack/spahm/rho/bond_selected.py index 277fe6c0..dca2640b 100644 --- a/qstack/spahm/rho/bond_selected.py +++ b/qstack/spahm/rho/bond_selected.py @@ -1,3 +1,5 @@ +"""Representation for a specific bond in a molecule.""" + import os import numpy as np from . import utils, dmb_rep_bond as dmbb, lowdin @@ -23,7 +25,7 @@ def get_spahm_b_selected(mols, bondidx, xyzlist, guess (str): Guess Hamiltonian method name. Defaults to defaults.guess. xc (str): Exchange-correlation functional. Defaults to defaults.xc. spin (numpy ndarray, optional): Array of numbers of unpaired electrons per molecule. Defaults to None. - cutoff (float): Maximum bond distance in Angstrom. Defaults to defaults.cutoff. + cutoff (float): Maximum bond distance in Å. Defaults to defaults.cutoff. printlevel (int): Verbosity level. Defaults to 0. omods (list): Open-shell modes (e.g. 'alpha', 'beta'). Defaults to defaults.omod. bpath (str): Path to bond basis set directory. Defaults to defaults.bpath. @@ -33,7 +35,6 @@ def get_spahm_b_selected(mols, bondidx, xyzlist, Returns: list: List of (filename, representation) tuples for each specified bond. """ - if spin is None or (spin == None).all(): omods = [None] diff --git a/qstack/spahm/rho/compute_rho_spahm.py b/qstack/spahm/rho/compute_rho_spahm.py index 71560b03..c8439414 100644 --- a/qstack/spahm/rho/compute_rho_spahm.py +++ b/qstack/spahm/rho/compute_rho_spahm.py @@ -1,3 +1,5 @@ +"""Main computation routines for SPAHM(a,b) representations.""" + import os import itertools import numpy as np @@ -25,7 +27,7 @@ def spahm_a_b(rep_type, mols, dms, mols (list): List of pyscf Mole objects. dms (list): List of density matrices (2D or 3D numpy arrays) for each molecule. bpath (str): Directory path containing bond-optimized basis files (.bas) for SPAHM(b). Defaults to defaults.bpath. - cutoff (float): Bond cutoff distance in Angstrom for SPAHM(b). Defaults to defaults.cutoff. + cutoff (float): Bond cutoff distance in Å for SPAHM(b). Defaults to defaults.cutoff. omods (list): Open-shell modes ('alpha', 'beta', 'sum', 'diff'). Defaults to defaults.omod. elements (list, optional): Element symbols present in dataset. Auto-detected if None. Defaults to None. only_m0 (bool): Use only m=0 angular momentum component for SPAHM(b). Defaults to False. @@ -40,10 +42,10 @@ def spahm_a_b(rep_type, mols, dms, Returns: numpy ndarray: 4D array (n_omods, n_mols, max_atoms, n_features) where: - - n_omods: Number of open-shell components (1 for closed-shell, len(omods) for open-shell) - - n_mols: Number of molecules in dataset - - max_atoms: Maximum number of atoms/bonds across all molecules - - n_features: Representation dimension + - n_omods: Number of open-shell components (1 for closed-shell, len(omods) for open-shell) + - n_mols: Number of molecules in dataset + - max_atoms: Maximum number of atoms/bonds across all molecules + - n_features: Representation dimension """ maxlen = 0 if only_z is None: @@ -120,7 +122,7 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= dump_and_exit (bool): Save atom pair file for SPAHM(b) to pairfile and exit without computing. Defaults to False. same_basis (bool): Use generic CC.bas for all atom pairs for SPAHM(b). Defaults to False. bpath (str): Directory path containing bond-optimized basis files (.bas) for SPAHM(b). Defaults to defaults.bpath. - cutoff (float): Bond cutoff distance in Angstrom for SPAHM(b). Defaults to defaults.cutoff. + cutoff (float): Bond cutoff distance in Å for SPAHM(b). Defaults to defaults.cutoff. omods (list): Open-shell modes ('alpha', 'beta', 'sum', 'diff'). Defaults to defaults.omod. elements (list, optional): Element symbols in dataset. Auto-detected if None. Defaults to None. only_m0 (bool): Use only m=0 angular momentum component for SPAHM(b). Defaults to False. @@ -135,11 +137,11 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= Returns: numpy ndarray: Representation array with shape depending on options: - - Base: (n_omods, n_mols, max_atoms, n_features) - - If split=False: (n_omods, total_atoms, n_features) - all molecules concatenated - - If merge=True: Features concatenated, omods dimension removed - - If with_symbols=True: Object array with (symbol, vector) tuples per atom - - If split=True and with_symbols=True: List format per molecule + - Base: (n_omods, n_mols, max_atoms, n_features) + - If split=False: (n_omods, total_atoms, n_features) - all molecules concatenated + - If merge=True: Features concatenated, omods dimension removed + - If with_symbols=True: Object array with (symbol, vector) tuples per atom + - If split=True and with_symbols=True: List format per molecule """ if not dump_and_exit: dms = utils.mols_guess(mols, xyzlist, guess, xc=xc, spin=spin, readdm=readdm, printlevel=printlevel) diff --git a/qstack/spahm/rho/dmb_rep_atom.py b/qstack/spahm/rho/dmb_rep_atom.py index 802e9d26..d645fa2b 100644 --- a/qstack/spahm/rho/dmb_rep_atom.py +++ b/qstack/spahm/rho/dmb_rep_atom.py @@ -1,3 +1,11 @@ +"""Functions for SPAHM(a) computation. + +Implements various models: pure, SAD-diff, occupation-corrected, Löwdin partitioning. + +Provides: + - models_dict: Dictionary of available models. +""" + import numpy as np import pyscf from qstack import compound, fields @@ -16,10 +24,10 @@ def get_basis_info(atom_types, auxbasis): Returns: tuple: (ao, ao_len, idx, M) where: - - ao (dict): Angular momentum info per element - - ao_len (dict): Basis set size per element - - idx (dict): Pair indices for symmetrization per element - - M (dict): Metric matrices per element + - ao (dict): Angular momentum info per element. + - ao_len (dict): Basis set size per element. + - idx (dict): Pair indices for symmetrization per element. + - M (dict): Metric matrices (2D numpy ndarray) per element. """ ao = {} idx = {} @@ -34,28 +42,42 @@ def get_basis_info(atom_types, auxbasis): def _make_models_dict(): + """Creates dictionary of available SPAHM(a) models. + + Defines density fitting functions for each model. + + Returns: + dict: Mapping model names to (density_fitting_function, symmetrization_function). + """ def df_pure(mol, dm, auxbasis): + """Pure density fitting without modifications.""" return fields.decomposition.decompose(mol, dm, auxbasis)[1] def df_sad_diff(mol, dm, auxbasis): + """Density fitting on difference from superposition of atomic densities (SAD).""" mf = pyscf.scf.RHF(mol) dm_sad = mf.init_guess_by_atom(mol) dm = dm - dm_sad return fields.decomposition.decompose(mol, dm, auxbasis)[1] def df_lowdin_long(mol, dm, auxbasis, only_i=None): + """Löwdin partitioning with block-diagonal slicing with contributions from other elements.""" return atomic_density.fit(mol, dm, auxbasis, only_i=only_i) def df_lowdin_short(mol, dm, auxbasis, only_i=None): + """Löwdin partitioning with block-diagonal slicing.""" return atomic_density.fit(mol, dm, auxbasis, short=True, only_i=only_i) def df_lowdin_long_x(mol, dm, auxbasis, only_i=None): + """Löwdin partitioning with contributions from other elements.""" return atomic_density.fit(mol, dm, auxbasis, w_slicing=False, only_i=only_i) def df_lowdin_short_x(mol, dm, auxbasis, only_i=None): + """Löwdin partitioning.""" return atomic_density.fit(mol, dm, auxbasis, short=True, w_slicing=False, only_i=only_i) def df_occup(mol, dm, auxbasis): + """Pure density fitting with preserving atom charges.""" L = lowdin.Lowdin_split(mol, dm) diag = np.diag(L.dmL) Q = np.array([sum(diag[start:stop]) for (start, stop) in mol.aoslice_nr_by_atom()[:,2:]]) @@ -82,16 +104,34 @@ def get_model(arg): Args: arg (str): Model name. Available options: - 'pure': Pure density fitting - - 'sad-diff': Superposition of Atomic Densities difference - - 'occup': Occupation-corrected - - 'lowdin-short': Short Löwdin partitioning - - 'lowdin-long': Long Löwdin partitioning - - 'lowdin-short-x': Short Löwdin without slicing - - 'lowdin-long-x': Long Löwdin without slicing - - 'mr2021': Method from Margraf & Reuter 2021 + - 'occup': Occupation-corrected density fitting. + - 'sad-diff': Superposition of Atomic Densities difference. + - 'lowdin-short': Short Löwdin partitioning with slicing. + - 'lowdin-long': Long Löwdin partitioning with slicing. + - 'lowdin-short-x': Short Löwdin. + - 'lowdin-long-x': Long Löwdin. + - 'mr2021': Method from Margraf & Reuter 2021. Returns: tuple: (density_fitting_function, symmetrization_function) pair. + - density_fitting_function (callable): Function performing density fitting. + Args: + mol (pyscf Mole): Molecule object. + dm (numpy ndarray): Density matrix (2D). + auxbasis (str or dict): Auxiliary basis set. + Returns: + c (numpy ndarray or list): Density fitting coefficients (1D). + - symmetrization_function (callable): Function for symmetrizing coefficients. + Args: + c (numpy ndarray): Density fitting coefficients (1D). + mol (pyscf Mole): Molecule object. + idx (dict): Pair indices per element. + ao (dict): Angular momentum info per element. + ao_len (dict): Basis set sizes per element. + M (dict): Metric matrices per element (2D numpy ndarrays). + atom_types (list): All element types in dataset. + Returns: + v (list or numpy ndarray): Symmetrized atomic feature vectors. Raises: RuntimeError: If model name is not recognized. @@ -105,8 +145,10 @@ def get_model(arg): def coefficients_symmetrize_MR2021(c, mol, idx, ao, ao_len, _M, _): """Symmetrizes density fitting coefficients using MR2021 method. - Implementation of the method from J. T. Margraf and K. Reuter, - Nat. Commun. 12, 344 (2021). + Reference: + J. T. Margraf, K. Reuter, + "Pure non-local machine-learned density functional theory for electron correlation", + Nat. Commun. 12, 344 (2021), doi:10.1038/s41467-020-20471-y. Args: c (numpy ndarray): Concatenated density fitting coefficients. @@ -120,7 +162,6 @@ def coefficients_symmetrize_MR2021(c, mol, idx, ao, ao_len, _M, _): Returns: list: Symmetrized vectors for each atom. """ - # J. T. Margraf and K. Reuter, Nat. Commun. 12, 344 (2021). v = [] i0 = 0 for q in mol.elements: @@ -131,9 +172,9 @@ def coefficients_symmetrize_MR2021(c, mol, idx, ao, ao_len, _M, _): def coefficients_symmetrize_short(c, mol, idx, ao, ao_len, M, _): - """Symmetrizes coefficients for short Löwdin and related models. + """Symmetrizes coefficients for each atom. - Applies metric matrix transformation and pads to consistent length. + For each atom, use contributions from the said atom. Args: c (numpy ndarray): Density fitting coefficients. @@ -147,7 +188,6 @@ def coefficients_symmetrize_short(c, mol, idx, ao, ao_len, M, _): Returns: numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. """ - # short lowdin / everything else v = [] i0 = 0 for q in mol.elements: @@ -160,9 +200,9 @@ def coefficients_symmetrize_short(c, mol, idx, ao, ao_len, M, _): def coefficients_symmetrize_long(c_df, mol, idx, ao, ao_len, M, atom_types): - """Symmetrizes coefficients for long Löwdin partitioning. + """Symmetrizes coefficients for long Löwdin models. - Handles per-atom coefficient lists from Löwdin splitting. + For each atom, use contributions from the said atom as well as all other atoms. Args: c_df (list): List of coefficient arrays per atom. @@ -174,9 +214,8 @@ def coefficients_symmetrize_long(c_df, mol, idx, ao, ao_len, M, atom_types): atom_types (list): All element types in dataset. Returns: - list: Symmetrized vectors for each atom. + list: Symmetrized vectors (numpy ndarrays) for each atom. """ - # long lowdin vectors = [] for c_a in c_df: v_atom = {q: np.zeros(len(idx[q])) for q in atom_types} diff --git a/qstack/spahm/rho/dmb_rep_bond.py b/qstack/spahm/rho/dmb_rep_bond.py index 9cce9be4..d95d8c06 100644 --- a/qstack/spahm/rho/dmb_rep_bond.py +++ b/qstack/spahm/rho/dmb_rep_bond.py @@ -1,3 +1,5 @@ +"""Functions for SPAHM(b) computation.""" + import operator from ast import literal_eval import numpy as np @@ -10,8 +12,6 @@ def make_bname(q0, q1): """Creates canonical bond name from two element symbols. - Orders elements alphabetically to ensure consistent naming (e.g., 'CH' not 'HC'). - Args: q0 (str): First element symbol. q1 (str): Second element symbol. @@ -33,8 +33,8 @@ def get_basis_info(qqs, mybasis, only_m0, printlevel): Returns: tuple: (idx, M) where: - - idx (dict): Pair indices for each bond type - - M (dict): Metric matrices for each bond type + - idx (dict): Pair indices for each bond type (list of [i, j] pairs) + - M (dict): Metric matrices for each bond type (numpy 2D ndarray) """ idx = {} M = {} @@ -81,8 +81,8 @@ def get_element_pairs(elements): Returns: tuple: (qqs, qqs4q) where: - - qqs (list): Sorted list of unique bond pair names - - qqs4q (dict): Maps each element to its list of possible bond partners + - qqs (list): Sorted list of unique bond pair names + - qqs4q (dict): Maps each element to its list of possible bond partners """ qqs = [] qqs4q = {} @@ -106,14 +106,14 @@ def get_element_pairs_cutoff(elements, mols, cutoff, align=False): Args: elements (list): List of element symbols to consider. mols (list): List of pyscf Mole objects. - cutoff (float): Maximum bond distance in Angstrom. + cutoff (float): Maximum bond distance in Å. align (bool): If True, includes all element pairs regardless of distance. Defaults to False. Returns: tuple: (qqs, qqs4q) where: - - qqs (list): Sorted list of bond pair names found within cutoff - - qqs4q (dict): Maps each element to its list of bond partners + - qqs (list): Sorted list of bond pair names found within cutoff + - qqs4q (dict): Maps each element to its list of bond partners """ qqs4q = {q: [] for q in elements} qqs = [] @@ -145,6 +145,25 @@ def get_element_pairs_cutoff(elements, mols, cutoff, align=False): def read_basis_wrapper_pairs(mols, bondidx, bpath, only_m0, printlevel, same_basis=False): + """Reads basis sets and computes metric matrices for specified bond pairs. + + Processes bond pairs from molecular structures and loads their corresponding + basis sets from disk, then computes basis indices and metric matrices. + + Args: + mols (list): List of pyscf Mole objects. + bondidx (list): List of bond index pairs [(i0, i1), ...] for each molecule. + bpath (str): Directory path containing basis set files. + only_m0 (bool): If True, use only m=0 angular momentum components. + printlevel (int): Verbosity level for output (>1 for detailed printing). + same_basis (bool): If True, uses generic CC.bas for all pairs. Defaults to False. + + Returns: + tuple: (mybasis, idx, M) where: + - mybasis (dict): Bond name to basis set dictionary mapping, + - idx (dict): Pair indices (list of [i, j] pairs) for each bond type, + - M (dict): Metric matrices (2D numpy ndarray) for each bond type. + """ qqs0 = [make_bname(*map(mol.atom_symbol, bondij)) for (bondij, mol) in zip(bondidx, mols, strict=True)] qqs0 = sorted(set(qqs0)) if printlevel>1: @@ -155,6 +174,34 @@ def read_basis_wrapper_pairs(mols, bondidx, bpath, only_m0, printlevel, same_bas def read_basis_wrapper(mols, bpath, only_m0, printlevel, cutoff=None, elements=None, pairfile=None, dump_and_exit=False, same_basis=False): + """Reads basis sets for all element pairs present in molecules. + + Determines which element pairs exist (either all possible or within cutoff distance), + loads corresponding basis sets, and computes metric matrices and indices. + Can cache pair information to file for subsequent runs. + + Args: + mols (list): List of pyscf Mole objects to analyze. + bpath (str): Directory path containing basis set files. + only_m0 (bool): If True, use only m=0 angular momentum components. + printlevel (int): Verbosity level for output (>1 for detailed printing). + cutoff (float, optional): Maximum bond distance in Å for pair detection. + If None, considers all element combinations. Defaults to None. + elements (list, optional): List of element symbols to consider. If None, + extracts all elements from molecules. Defaults to None. + pairfile (str, optional): Path to save/load element pair information. Defaults to None. + dump_and_exit (bool): If True, saves pair information and exits. Defaults to False. + same_basis (bool): If True, uses generic CC.bas for all pairs. Defaults to False. + + Returns: + tuple: (elements, mybasis, qqs, qqs4q, idx, M) where: + - elements (list): Sorted list of element symbols. + - mybasis (dict): Bond name to basis set dictionary mapping. + - qqs (dict): Maps each element to list of all bond pair names. + - qqs4q (dict): Maps each element to its specific bond partners. + - idx (dict): Pair indices (list of [i, j] pairs) for each bond type. + - M (dict): Metric matrices (2D numpy ndarray) for each bond type. + """ if elements is None: elements = sorted({q for mol in mols for q in mol.elements}) @@ -177,7 +224,22 @@ def read_basis_wrapper(mols, bpath, only_m0, printlevel, cutoff=None, elements=N idx, M = get_basis_info(qqs0, mybasis, only_m0, printlevel) return elements, mybasis, qqs, qqs4q, idx, M + def bonds_dict_init(qqs, M): + """Initializes storage for bond representations. + + Creates a dictionary with zero-initialized arrays for each bond type, + with array sizes matching the corresponding metric matrix dimensions. + + Args: + qqs (list): List of bond pair names (e.g., ['CC', 'CH', 'OH']). + M (dict): Dictionary mapping bond names to metric matrices. + + Returns: + tuple: (mybonds, N) where: + - mybonds (dict): Bond name to zero-initialized numpy array mapping. + - N (int): Total number of basis functions across all bond types. + """ N = 0 mybonds = {} for qq in qqs: @@ -188,6 +250,20 @@ def bonds_dict_init(qqs, M): def fit_dm(dm, mol, mybasis, ri0, ri1): + """Fits density matrix using auxiliary basis functions at bond center. + + Decomposes the bond density matrix into auxiliary basis coefficients + centered at the bond midpoint, then splits coefficients by angular momentum. + + Args: + dm (numpy.ndarray): Density matrix for the bond. + mol (pyscf.gto.Mole): Molecule object containing the bond. + mybasis (dict): Basis set dictionary for the bond type. + ri0 (numpy.ndarray): Coordinates of first atom in Å. + ri1 (numpy.ndarray): Coordinates of second atom in Å. + + Returns: + list: Coefficients split by angular momentum quantum number [(l, coeff), ...].""" rm = (ri0+ri1)*0.5 atom = f"No {rm[0]} {rm[1]} {rm[2]}" auxmol = gto.M(atom=atom, basis=mybasis) @@ -198,6 +274,21 @@ def fit_dm(dm, mol, mybasis, ri0, ri1): def vec_from_cs(z, cs, lmax, idx): + """Rotates basis coefficients to bond axis and creates vectorized representation. + + Applies Wigner D-matrix rotation to align coefficients with the bond vector + (same as pretending the bond is along the z-axis), + ensuring rotational invariance, then vectorizes using symmetry indices. + + Args: + z (numpy.ndarray): Bond vector (displacement from one atom to another). + cs (list): Angular momentum decomposed coefficients [(l, coeff), ...]. + lmax (int): Maximum angular momentum quantum number. + idx (dict): Pair indices mapping for symmetrization. + + Returns: + numpy.ndarray: Rotationally invariant vectorized representation of the bond. + """ D = Dmatrix_for_z(z, lmax) c_new = rotate_c(D, cs) v = sym.vectorize_c(idx, c_new) @@ -208,7 +299,7 @@ def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): """Computes bond representation for a specific atom pair. Extracts bond density, fits it with basis functions at the bond center, - and rotates coefficients to bond axis to create rotationally invariant representation. + and symmetrizes the representation from both atom perspectives. Args: i0 (int): Index of first atom. @@ -217,15 +308,15 @@ def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): mybasis (dict): Bond basis sets keyed by bond names. idx (dict): Pair indices for symmetrization. q (list): Element symbols for all atoms. - r (numpy ndarray): Atomic coordinates in Angstrom. + r (numpy ndarray): Atomic coordinates in Å. cutoff (float): Maximum bond distance. Returns: tuple: ([v0, v1], bname) where: - - v0: Representation from atom i0's perspective - - v1: Representation from atom i1's perspective - - bname: Bond name (e.g., 'CH') - Returns (None, None) if distance exceeds cutoff. + - v0: Representation from atom i0's perspective. + - v1: Representation from atom i1's perspective. + - bname: Bond name (e.g., 'CH'). + Returns (None, None) if distance exceeds cutoff. """ q0, q1 = q[i0], q[i1] r0, r1 = r[i0], r[i1] @@ -242,7 +333,27 @@ def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): def repr_for_mol(mol, dm, qqs, M, mybasis, idx, maxlen, cutoff, only_z=None): + """Computes SPAHM(b) representations for all atoms in a molecule. + + Constructs bond-based atomic representations by summing contributions from + all bonds of the same type within cutoff distance. + Args: + mol (pyscf.gto.Mole): Molecule object. + dm (numpy.ndarray): Molecular density matrix. + qqs (dict): Maps each element to list of bond pair names it can form. + M (dict): Metric matrices for each bond type. + mybasis (dict): Bond basis sets keyed by bond names. + idx (dict): Pair indices for symmetrization of each bond type. + maxlen (int): Maximum representation length for zero-padding. + cutoff (float): Maximum bond distance in Å to consider. + only_z (list, optional): If provided, compute representations only for atoms + with these element symbols. Defaults to None (all atoms). + + Returns: + numpy.ndarray: Array of shape (n_atoms, maxlen) containing atom representations, + where each row is a zero-padded SPAHM(b) vector. + """ if only_z is None: only_z = [] diff --git a/qstack/spahm/rho/lowdin.py b/qstack/spahm/rho/lowdin.py index 76518de4..0d761c61 100644 --- a/qstack/spahm/rho/lowdin.py +++ b/qstack/spahm/rho/lowdin.py @@ -1,5 +1,8 @@ +"""Löwdin orthogonalization for density matrix partitioning.""" + import numpy as np + class Lowdin_split: """Löwdin orthogonalization for density matrix partitioning. @@ -14,7 +17,6 @@ class Lowdin_split: dm (numpy ndarray): Original density matrix in AO basis. dmL (numpy ndarray): Löwdin-orthogonalized density matrix. """ - def __init__(self, mol, dm): """Initializes Löwdin split with molecule and density matrix. diff --git a/qstack/spahm/rho/parser.py b/qstack/spahm/rho/parser.py index 931d5890..528d2604 100644 --- a/qstack/spahm/rho/parser.py +++ b/qstack/spahm/rho/parser.py @@ -1,3 +1,5 @@ +"""Command-line argument parser for SPAHM(a,b) main functions.""" + import argparse from qstack.tools import FlexParser from .utils import defaults, omod_fns_dict diff --git a/qstack/spahm/rho/sym.py b/qstack/spahm/rho/sym.py index 12ce739c..cd27ce14 100644 --- a/qstack/spahm/rho/sym.py +++ b/qstack/spahm/rho/sym.py @@ -1,3 +1,5 @@ +"""Symmetry operations for SPAHM(a,b) representations.""" + import numpy as np from qstack import compound from qstack.mathutils.matrix import sqrtm @@ -7,7 +9,7 @@ def idxl0(i, l, ao): """Returns index of basis function with same L and N quantum numbers but M=0. - Finds the m=0 component of the same angular momentum shell for normalization. + Finds the m=0 component of the same angular momentum shell. Args: i (int): Basis function index. @@ -17,12 +19,12 @@ def idxl0(i, l, ao): Returns: int: Index of corresponding m=0 basis function. """ - # return the index of the basis function with the same L and N but M=0 if l != 1: return i - ao['m'][i]+l else: return i + [0, 2, 1][ao['m'][i]] + def get_S(q, basis): """Computes overlap matrix and angular momentum info for an atom. @@ -30,13 +32,13 @@ def get_S(q, basis): Args: q (str): Element symbol. - basis (str or dict): Basis set specification. + basis (str or dict): Basis set. Returns: tuple: (S, ao, ao_start) where: - - S (numpy ndarray): Overlap matrix - - ao (dict): Angular momentum info with 'l' and 'm' lists - - ao_start (list): Starting indices for each angular momentum shell + - S (numpy ndarray): Overlap matrix + - ao (dict): Angular momentum info with 'l' and 'm' lists for each AO + - ao_start (list): Starting indices for each angular momentum shell """ mol = compound.make_atom(q, basis) S = mol.intor_symmetric('int1e_ovlp') @@ -72,6 +74,18 @@ def store_pair_indices(ao): def store_pair_indices_short(ao, ao_start): + """Stores basis function pair indices for m=0 components only. + + Creates list of (i,j) pairs using only the first basis function (m=0) + of each angular momentum shell, for compact representation. + + Args: + ao (dict): Angular momentum info with 'l' and 'm' keys. + ao_start (list): Starting indices for each angular momentum shell. + + Returns: + list: List of [i, j] index pairs for m=0 components with matching L. + """ idx = [] for i in ao_start: for j in ao_start: @@ -84,6 +98,21 @@ def store_pair_indices_short(ao, ao_start): def metric_matrix(q, idx, ao, S): + """Computes metric matrix for symmetrization of density fitting coefficients. + + Constructs metric matrix from overlap integrals of basis function pairs, + normalized by angular momentum degeneracy (2l+1). Returns square root + for transformation to orthonormal representation. + + Args: + q (str): Element symbol key for angular momentum info. + idx (list): List of [i, j] basis function pair indices. + ao (dict): Angular momentum info dict with nested structure ao[q]. + S (numpy ndarray): Overlap matrix. + + Returns: + numpy ndarray: Square root of metric matrix. + """ N = len(idx) A = np.zeros((N,N)) for p in range(N): @@ -109,7 +138,7 @@ def metric_matrix_short(idx, ao, S): S (numpy ndarray): Overlap matrix. Returns: - numpy ndarray: Square root of metric matrix for normalization. + numpy ndarray: Square root of metric matrix. """ N = len(idx) A = np.zeros((N,N)) @@ -144,6 +173,24 @@ def vectorize_c(idx, c): def vectorize_c_MR2021(idx_pair, ao, c): + """Vectorizes coefficients using MR2021 scheme. + + Reference: + J. T. Margraf, K. Reuter, + "Pure non-local machine-learned density functional theory for electron correlation", + Nat. Commun. 12, 344 (2021), doi:10.1038/s41467-020-20471-y. + + Computes simplified rotationally invariant representation by contracting coefficients + within each angular momentum shell. + + Args: + idx_pair (list): List of [i, j] basis function pair indices. + ao (dict): Angular momentum info with 'l' and 'm' keys. + c (numpy ndarray): 1D array of density fitting coefficients. + + Returns: + numpy ndarray: 1D array of contracted coefficient norms per shell. + """ idx = sorted(set(np.array(idx_pair)[:,0])) v = np.zeros(len(idx)) for p,i in enumerate(idx): @@ -154,6 +201,18 @@ def vectorize_c_MR2021(idx_pair, ao, c): def vectorize_c_short(idx, ao, c): + """Vectorizes coefficients using short format with shell-wise dot products. + + Computes representation by contracting coefficient vectors of angular momentum shells. + + Args: + idx (list): List of [i, j] basis function pair indices (shell starts). + ao (dict): Angular momentum info with 'l' and 'm' keys. + c (numpy ndarray): 1D array of density fitting coefficients. + + Returns: + numpy ndarray: 1D array of shell-pair dot products. + """ v = np.zeros(len(idx)) for p, [i,j] in enumerate(idx): l = ao['l'][i] @@ -163,6 +222,17 @@ def vectorize_c_short(idx, ao, c): def store_pair_indices_z(ao): + """Stores basis function pairs with matching |m| quantum numbers. + + Creates list of all (i,j) pairs where basis functions have equal + absolute values of magnetic quantum number m. + + Args: + ao (dict): Angular momentum info with 'l' and 'm' keys. + + Returns: + list: List of [i, j] index pairs with |m_i| = |m_j|. + """ idx = [] for i, mi in enumerate(ao['m']): for j, mj in enumerate(ao['m']): @@ -173,6 +243,15 @@ def store_pair_indices_z(ao): def store_pair_indices_z_only0(ao): + """Stores basis function pairs restricted to m=0 components only. + + Creates list of all (i,j) pairs where both basis functions have m=0. + + Args: + ao (dict): Angular momentum info with 'l' and 'm' keys. + + Returns: + list: List of [i, j] index pairs where both m_i = m_j = 0.""" idx = [] for i, mi in enumerate(ao['m']): if mi!=0: @@ -185,6 +264,19 @@ def store_pair_indices_z_only0(ao): def metric_matrix_z(idx, ao, S): + """Computes metric matrix for z-axis symmetric representations. + + Constructs metric matrix accounting for m and -m degeneracy. Matrix + elements are nonzero only when angular momenta match and m quantum + numbers satisfy m_i=m_j AND m_i1=m_j1, or m_i=-m_j AND m_i1=-m_j1. + + Args: + idx (list): List of [i, j] basis function pair indices. + ao (dict): Angular momentum info with 'l' and 'm' keys. + S (numpy ndarray): Overlap matrix. + + Returns: + numpy ndarray: Square root of metric matrix for z-symmetric normalization.""" N = len(idx) A = np.zeros((N,N)) for p in range(N): diff --git a/qstack/spahm/rho/utils.py b/qstack/spahm/rho/utils.py index ca8dbcae..e1b828e8 100644 --- a/qstack/spahm/rho/utils.py +++ b/qstack/spahm/rho/utils.py @@ -1,3 +1,10 @@ +"""Utility functions for SPAHM(a,b) computation and default settings. + +Provides: + defaults: Default parameters for SPAHM(a,b) computation. + omod_fns_dict: Dictionary of density matrix modification functions for open-shell systems. +""" + import os import warnings import numpy as np @@ -7,6 +14,7 @@ from qstack.spahm import guesses from qstack import compound + defaults = SimpleNamespace( guess='LB', model='Lowdin-long-x', @@ -59,10 +67,10 @@ def load_mols(xyzlist, charge, spin, basis, printlevel=0, units='ANG', ecp=None, xyzlist (list): List of XYZ filenames. charge (list or None): List of molecular charges (or None for neutral). spin (list or None): List of spin multiplicities (or None for default). - basis (str or dict): Basis set specification. + basis (str or dict): Basis set. printlevel (int): Verbosity level (0=silent). Defaults to 0. units (str): Coordinate units ('ANG' or 'BOHR'). Defaults to 'ANG'. - ecp (str or dict, optional): Effective core potential specification. Defaults to None. + ecp (str or dict, optional): Effective core potential. Defaults to None. progress (bool): If True, shows progress bar. Defaults to False. srcdir (str, optional): Source directory prepended to XYZ filenames. Defaults to None. @@ -106,15 +114,15 @@ def mols_guess(mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=None, pri dms = [] guess = guesses.get_guess(guess) if spin is None: - spin = [None] *len(xyzlist) + spin = [None]*len(xyzlist) for xyzfile, mol, sp in zip(xyzlist, mols, spin, strict=True): if printlevel>0: print(xyzfile, flush=True) if readdm is None: _e, v = spahm.get_guess_orbitals(mol, guess, xc=xc) - dm = guesses.get_dm(v, mol.nelec, mol.spin if sp is not None else None) + dm = guesses.get_dm(v, mol.nelec, mol.spin if sp is not None else None) else: - dm = np.load(readdm+'/'+os.path.basename(xyzfile)+'.npy') + dm = np.load(f'{readdm}/{os.path.basename(xyzfile)}.npy') if spin and dm.ndim==2: dm = np.array((dm/2,dm/2)) dms.append(dm) @@ -124,29 +132,55 @@ def mols_guess(mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=None, pri def dm_open_mod(dm, omod): - """Applies open-shell modification to density matrix. + """Treats density matrix according to the open-shell mode.. Args: dm (numpy ndarray): Density matrix (2D for closed-shell, 3D for open-shell). - omod (str or None): Open-shell modification type. Options in omod_fns_dict. + omod (str or None): Open-shell mode. Options in omod_fns_dict. Returns: numpy ndarray: Modified density matrix. Raises: - ValueError: If omod is not a valid modification type. + NotImplementedError: If omod is not a valid modification type. + RuntimeError: If dm is 2D but omod is None, or if dm is 3D but omod is not None. """ - omod_fns_dict[None] = lambda dm: dm - if omod in omod_fns_dict: - return omod_fns_dict[omod](dm) - else: - raise ValueError(f'unknown open-shell mod: must be in {list(omod_fns_dict.keys())}, None if the system is closed-shell') + if omod is None: + if dm.ndim==3: + raise RuntimeError('Density matrix is open-shell (3D) but omod is None') + elif dm.ndim==2: + return dm + elif dm.ndim == 2: + raise RuntimeError('Density matrix is closed-shell (2D) but omod is not None') + if omod not in omod_fns_dict: + raise ValueError(f'unknown open-shell mode: must be in {list(omod_fns_dict.keys())}, None if the system is closed-shell') + return omod_fns_dict[omod](dm) def get_xyzlist(xyzlistfile): + """Load list of paths to files. + + Args: + xyzlistfile (str): Path to the file containing list of XYZ filenames. + + Returns: + numpy ndarray: Array of XYZ filenames as strings. + """ return np.loadtxt(xyzlistfile, dtype=str, ndmin=1) + def check_data_struct(fin, local=False): + """Check the structure of a representation file. + + Args: + fin (str): Input file path. + local (bool): If True, checks for local representations. + + Returns: + tuple: (is_single (bool), is_labeled (bool)) + is_single: True if the file contains a single representation. + is_labeled: True if the representations are labeled. + """ x = np.load(fin, allow_pickle=True) if type(x.flatten()[0]) is str or type(x.flatten()[0]) is np.str_: is_labeled = True @@ -167,26 +201,26 @@ def check_data_struct(fin, local=False): return is_single, is_labeled - def load_reps(f_in, from_list=True, srcdir=None, with_labels=False, local=True, sum_local=False, printlevel=0, progress=False, file_format=None): - ''' - A function to load representations from txt-list/npy files. - Args: - - f_in: the name of the input file - - from_list(bool): if the input file is a txt-file containing a list of paths to the representations - - srcdir(str) : the path prefix to be at the begining of each file in `f_in`, defaults to cwd - - with_label(bool): saves a list of tuple (filename, representation) - - local(bool): if the representations is local - - sum_local(bool): if local=True then sums the local components - - printlevel(int): level of verbosity - - progress(bool): if True shows progress-bar - - file_format(dict): (for "experienced users" only) structure of the input data, defaults to structure auto determination - Return: - np.array with shape (N,M) where N number of representations M dimmensionality of the representation - OR tuple ([N],np.array(N,M)) containing list of labels and np.array of representations - ''' + """Load representations from disk. + + Args: + f_in (str): Path to the input file. + from_list (bool): If the input file is a text file containing a list of paths to the representations. + srcdir (str) : The path prefix to be at the begining of each file in `f_in`. Defaults to current working directory. + with_labels (bool): If return atom type labes along with the representations. + local (bool): If the representations are local (per-atom) or global (per-molecule). + sum_local (bool): Sums the local components into a global representation, only if local=True. + printlevel (int): Verbosity level. + progress (bool): If shows a progress bar. + file_format (dict): Structure of the input data, with keys=('is_labeled;, 'is_single'). + Defaults to structure auto determination (for "experienced users" only). + + Returns: + np.array with shape (N_representations, N_features), or a tuple containing a list of atomic labels and said np.array. + """ if file_format is None: # Do not use mutable data structures for argument defaults file_format = {'is_labeled':None, 'is_single':None} if srcdir is None: @@ -247,7 +281,18 @@ def load_reps(f_in, from_list=True, srcdir=None, with_labels=False, else: return reps + def regroup_symbols(file_list, print_level=0, trim_reps=False): + """Regroups representations by atom type. + + Args: + file_list (list): List of representation files. + print_level (int): Verbosity level. Defaults to 0. + trim_reps (bool): If True, trims zeros from representations. Defaults to False. + + Returns: + dict: Dictionary with atom types as keys and lists of representations as values. + """ reps, atoms = load_reps(file_list, from_list=True, with_labels=True, local=True, printlevel=print_level) if print_level > 0: print(f"Extracting {len(atoms)} atoms from {file_list}:") diff --git a/qstack/tools.py b/qstack/tools.py index cc7972ae..ef0006df 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -1,3 +1,7 @@ +"""Utility functions and classes for Q-stack. + +Provides decorators, argument parsers, and helper functions for command-line tools.""" + import os import time import resource @@ -69,7 +73,6 @@ class FlexParser(argparse.ArgumentParser): **kwargs: Arguments passed to ArgumentParser. """ - def remove_argument(self, arg): """Removes an argument from the parser. diff --git a/tests/test_compound.py b/tests/test_compound.py index a5ec367b..c429ad8b 100755 --- a/tests/test_compound.py +++ b/tests/test_compound.py @@ -37,7 +37,7 @@ def test_rotate_molecule(): def test_mol_to_xyz(): path = os.path.dirname(os.path.realpath(__file__)) molpath = path+'/data/H2O_saved.xyz' - with open(molpath, 'r') as f: + with open(molpath) as f: xyz0 = f.read().strip() mol = compound.xyz_to_mol(molpath, 'def2svp', charge=0, spin=0) xyz = compound.mol_to_xyz(mol, '/dev/null') diff --git a/tests/test_excited.py b/tests/test_excited.py index 5fb1cade..d15988a9 100755 --- a/tests/test_excited.py +++ b/tests/test_excited.py @@ -14,8 +14,8 @@ def test_excited(): coeff = np.load(xyzfile+'.mo.npy') X = np.load(xyzfile+'.X.npy') x_c = np.load(xyzfile+'.st2_transition_fit.npy') - hole_d = np.load(xyzfile+'.st2_dm_hole.npy') - part_d = np.load(xyzfile+'.st2_dm_part.npy') + hole_d0 = np.load(xyzfile+'.st2_dm_hole.npy') + part_d0 = np.load(xyzfile+'.st2_dm_part.npy') hole_c = np.load(xyzfile+'.st2_dm_hole_fit.npy') part_c = np.load(xyzfile+'.st2_dm_part_fit.npy') @@ -23,18 +23,22 @@ def test_excited(): x_ao = fields.excited.get_transition_dm(mol, X[state_id], coeff) dip = fields.moments.first(mol, x_ao) dip0 = np.array([ 0.68927353, -2.10714637, -1.53423419]) - assert(np.linalg.norm(dip-dip0)<1e-8) + assert(np.allclose(dip, dip0, atol=1e-8)) + + hole_d, part_d = fields.excited.get_holepart(mol, X[state_id], coeff) + assert(np.allclose(hole_d, hole_d0, atol=1e-8)) + assert(np.allclose(part_d, part_d0, atol=1e-8)) auxmol = compound.make_auxmol(mol, 'ccpvqz jkfit') dip = fields.moments.first(auxmol, x_c) dip0 = np.array([-0.68919144, 2.10692116, 1.53399871]) - assert(np.linalg.norm(dip-dip0)<1e-8) + assert(np.allclose(dip, dip0, atol=1e-8)) dist, hole_extent, part_extent = fields.excited.exciton_properties(mol, hole_d, part_d) - assert(np.linalg.norm(np.array([dist, hole_extent, part_extent])-np.array([2.59863354, 7.84850017, 5.67617426]))<1e-7) + assert(np.allclose([dist, hole_extent, part_extent], [2.59863354, 7.84850017, 5.67617426], atol=1e-7)) dist, hole_extent, part_extent = fields.excited.exciton_properties(auxmol, hole_c, part_c) - assert(np.linalg.norm(np.array([dist, hole_extent, part_extent])-np.array([2.59940378, 7.8477511, 5.67541635]))<1e-7) + assert(np.allclose([dist, hole_extent, part_extent], [2.59940378, 7.8477511, 5.67541635], atol=1e-7)) def test_excited_frag(): diff --git a/tests/test_fitting.py b/tests/test_fitting.py index c908d60c..f7bc4bc5 100755 --- a/tests/test_fitting.py +++ b/tests/test_fitting.py @@ -39,9 +39,11 @@ def test_fitting_error(): c0 = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') error0 = 4.876780263884939e-05 auxmol = compound.make_auxmol(mol, 'cc-pvdz jkfit') - eri2c = auxmol.intor('int2c2e_sph') + _, eri2c, eri3c = decomposition.get_integrals(mol, auxmol) self_repulsion = decomposition.get_self_repulsion(mol, dm) - error = decomposition.decomposition_error(self_repulsion, c0, eri2c) + error = decomposition.optimal_decomposition_error(self_repulsion, c0, eri2c) + assert(np.allclose(error, error0)) + error = decomposition.decomposition_error(self_repulsion, c0, eri2c, eri3c, dm) assert(np.allclose(error, error0)) diff --git a/tests/test_moments.py b/tests/test_moments.py old mode 100644 new mode 100755 From 88d7df91cb02aae1ce901c5ca43705a9f1e5c2b2 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Fri, 7 Nov 2025 16:02:25 +0100 Subject: [PATCH 15/23] Fixup 9530f5b5 --- qstack/mathutils/array.py | 2 +- tests/test_spahm.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/qstack/mathutils/array.py b/qstack/mathutils/array.py index 4d760255..35c42164 100644 --- a/qstack/mathutils/array.py +++ b/qstack/mathutils/array.py @@ -86,7 +86,7 @@ def vstack_padding(xs): if len({x.ndim for x in xs}) > 1: raise ValueError("All input arrays must have the same number of dimensions.") shapes_common_axis, shapes_other_axes = np.split(np.array([x.shape for x in xs]), (1,), axis=1) - if len(np.unique(shapes_other_axes, axis=0))>1: + if len(np.unique(shapes_other_axes, axis=0))==1: return np.vstack(xs) X = np.zeros((shapes_common_axis.sum(), *shapes_other_axes.max(axis=0))) idx = 0 diff --git a/tests/test_spahm.py b/tests/test_spahm.py index 6ad7c6ac..eb96619a 100755 --- a/tests/test_spahm.py +++ b/tests/test_spahm.py @@ -4,6 +4,7 @@ import numpy as np from qstack import compound from qstack.spahm import compute_spahm +from qstack.mathutils.array import vstack_padding def test_spahm_GWH(): @@ -71,8 +72,7 @@ def test_generate_reps(): xyzlist = [os.path.join(path,s) for s in sorted(os.listdir(path)) if ".xyz" in s] mols = [compound.xyz_to_mol(f, basis='minao', charge=0, spin=0) for f in xyzlist] xmols = [compute_spahm.get_spahm_representation(mol, 'lb')[0] for mol in mols] - maxlen = max([len(x) for x in xmols]) - X = np.array([np.pad(x, pad_width=(0,maxlen-len(x)), constant_values=0) for x in xmols]) + X = vstack_padding(xmols) Xtrue = np.load(os.path.join(path, 'X_lb.npy')) assert(np.allclose(X, Xtrue)) From b202db11ec4a8af4494c52efdf0ac24890dde4ec Mon Sep 17 00:00:00 2001 From: Ksenia Date: Sun, 9 Nov 2025 17:39:24 +0100 Subject: [PATCH 16/23] Refactor SPAHM(a) - fix maxlen - add test for mr2021 - fix short versions for only_z --- qstack/spahm/rho/atomic_density.py | 6 +- qstack/spahm/rho/compute_rho_spahm.py | 27 +++--- qstack/spahm/rho/dmb_rep_atom.py | 114 +++++++++++++----------- qstack/spahm/rho/sym.py | 30 +++---- qstack/tools.py | 10 +++ tests/data/SPAHM_a_H2O/X_H2O_MR2021.npy | Bin 0 -> 751 bytes tests/test_spahm_a.py | 26 ++++++ 7 files changed, 129 insertions(+), 84 deletions(-) create mode 100644 tests/data/SPAHM_a_H2O/X_H2O_MR2021.npy diff --git a/qstack/spahm/rho/atomic_density.py b/qstack/spahm/rho/atomic_density.py index c15f5059..1f97e27f 100644 --- a/qstack/spahm/rho/atomic_density.py +++ b/qstack/spahm/rho/atomic_density.py @@ -58,7 +58,11 @@ def fit(mol, dm, aux_basis, short=False, w_slicing=True, only_i=None): if short: cc = [] - for i, c in zip(auxmol.aoslice_by_atom()[:,2:], a_dfs, strict=True): + if only_i is not None and len(only_i) > 0: + aoslice_by_atom = auxmol.aoslice_by_atom()[only_i,2:] + else: + aoslice_by_atom = auxmol.aoslice_by_atom()[:,2:] + for i, c in zip(aoslice_by_atom, a_dfs, strict=True): cc.append(c[i[0]:i[1]]) return np.hstack(cc) diff --git a/qstack/spahm/rho/compute_rho_spahm.py b/qstack/spahm/rho/compute_rho_spahm.py index c8439414..45be2f99 100644 --- a/qstack/spahm/rho/compute_rho_spahm.py +++ b/qstack/spahm/rho/compute_rho_spahm.py @@ -47,9 +47,14 @@ def spahm_a_b(rep_type, mols, dms, - max_atoms: Maximum number of atoms/bonds across all molecules - n_features: Representation dimension """ - maxlen = 0 if only_z is None: only_z = [] + if len(only_z) > 0: + print(f"Selecting atom-types in {only_z}") + natm = max(sum(sum(z==np.array(mol.elements)) for z in only_z) for mol in mols) + else: + natm = max(mol.natm for mol in mols) + if rep_type == 'bond': elements, mybasis, qqs0, qqs4q, idx, M = dmbb.read_basis_wrapper(mols, bpath, only_m0, printlevel, elements=elements, cutoff=cutoff, @@ -62,40 +67,28 @@ def spahm_a_b(rep_type, mols, dms, for mol in mols: elements.update(mol.elements) elements = sorted(set(elements)) - df_wrapper, sym_wrapper = dmba.get_model(model) + df_wrapper, sym_wrapper, maxlen_fn = dmba.get_model(model) ao, ao_len, idx, M = dmba.get_basis_info(elements, auxbasis) - maxlen = sum([len(v) for v in idx.values()]) + maxlen = maxlen_fn(idx, idx.keys() if len(only_z)==0 else only_z) - if len(only_z) > 0: - print(f"Selecting atom-types in {only_z}") - zinmols = [] - for mol in mols: - zinmol = [sum(z == np.array(mol.elements)) for z in only_z] - zinmols.append(sum(zinmol)) - natm = max(zinmols) - else: - natm = max([mol.natm for mol in mols]) - zinmols = [mol.natm for mol in mols] allvec = np.zeros((len(omods), len(mols), natm, maxlen)) for imol, (mol, dm) in enumerate(zip(mols, dms, strict=True)): if printlevel>0: print('mol', imol, flush=True) - if len(only_z) >0: + if len(only_z)>0: only_i = [i for i,z in enumerate(mol.elements) if z in only_z] else: only_i = range(mol.natm) for iomod, omod in enumerate(omods): DM = utils.dm_open_mod(dm, omod) - vec = None # This too !!! (maybe a wrapper or dict) if rep_type == 'bond': vec = dmbb.repr_for_mol(mol, DM, qqs, M, mybasis, idx, maxlen, cutoff, only_z=only_z) elif rep_type == 'atom': c_df = df_wrapper(mol, DM, auxbasis, only_i=only_i) - vec = sym_wrapper(c_df, mol, idx, ao, ao_len, M, elements) + vec = sym_wrapper(maxlen, c_df, mol.elements, idx, ao, ao_len, M, only_i) allvec[iomod,imol,:len(vec)] = vec - return allvec diff --git a/qstack/spahm/rho/dmb_rep_atom.py b/qstack/spahm/rho/dmb_rep_atom.py index d645fa2b..5f502383 100644 --- a/qstack/spahm/rho/dmb_rep_atom.py +++ b/qstack/spahm/rho/dmb_rep_atom.py @@ -10,6 +10,7 @@ import pyscf from qstack import compound, fields from . import sym, atomic_density, lowdin +from qstack.tools import slice_generator def get_basis_info(atom_types, auxbasis): @@ -47,13 +48,13 @@ def _make_models_dict(): Defines density fitting functions for each model. Returns: - dict: Mapping model names to (density_fitting_function, symmetrization_function). + dict: Mapping model names to (density_fitting_function, symmetrization_function, maxlen_function). """ - def df_pure(mol, dm, auxbasis): + def df_pure(mol, dm, auxbasis, **kwargs): """Pure density fitting without modifications.""" return fields.decomposition.decompose(mol, dm, auxbasis)[1] - def df_sad_diff(mol, dm, auxbasis): + def df_sad_diff(mol, dm, auxbasis, **kwargs): """Density fitting on difference from superposition of atomic densities (SAD).""" mf = pyscf.scf.RHF(mol) dm_sad = mf.init_guess_by_atom(mol) @@ -87,14 +88,23 @@ def df_occup(mol, dm, auxbasis): c = fields.decomposition.correct_N_atomic(auxmol, Q, c0, metric=eri2c) return c - models_dict = {'pure' : [df_pure, coefficients_symmetrize_short ], - 'sad-diff' : [df_sad_diff, coefficients_symmetrize_short ], - 'occup' : [df_occup, coefficients_symmetrize_short ], - 'lowdin-short' : [df_lowdin_short, coefficients_symmetrize_short ], - 'lowdin-long' : [df_lowdin_long, coefficients_symmetrize_long ], - 'lowdin-short-x': [df_lowdin_short_x, coefficients_symmetrize_short ], - 'lowdin-long-x' : [df_lowdin_long_x, coefficients_symmetrize_long ], - 'mr2021' : [df_pure, coefficients_symmetrize_MR2021]} + def maxlen_long(idx, _): + return sum(len(v) for v in idx.values()) + + def maxlen_short(idx, elements): + return max(len(idx[q]) for q in elements) + + def maxlen_MR2021(idx, elements): + return max(len(np.unique(idx[q][:,0])) for q in elements) + + models_dict = {'pure' : (df_pure, coefficients_symmetrize_short , maxlen_short ), + 'sad-diff' : (df_sad_diff, coefficients_symmetrize_short , maxlen_short ), + 'occup' : (df_occup, coefficients_symmetrize_short , maxlen_short ), + 'lowdin-short' : (df_lowdin_short, coefficients_symmetrize_short , maxlen_short ), + 'lowdin-long' : (df_lowdin_long, coefficients_symmetrize_long , maxlen_long ), + 'lowdin-short-x': (df_lowdin_short_x, coefficients_symmetrize_short , maxlen_short ), + 'lowdin-long-x' : (df_lowdin_long_x, coefficients_symmetrize_long , maxlen_long ), + 'mr2021' : (df_pure, coefficients_symmetrize_MR2021, maxlen_MR2021 )} return models_dict @@ -113,25 +123,32 @@ def get_model(arg): - 'mr2021': Method from Margraf & Reuter 2021. Returns: - tuple: (density_fitting_function, symmetrization_function) pair. + tuple: (density_fitting_function, symmetrization_function, maxlen_function). - density_fitting_function (callable): Function performing density fitting. Args: mol (pyscf Mole): Molecule object. dm (numpy ndarray): Density matrix (2D). auxbasis (str or dict): Auxiliary basis set. Returns: - c (numpy ndarray or list): Density fitting coefficients (1D). + numpy ndarray or list: Density fitting coefficients (1D). - symmetrization_function (callable): Function for symmetrizing coefficients. Args: + maxlen (int): Maximum feature length. c (numpy ndarray): Density fitting coefficients (1D). - mol (pyscf Mole): Molecule object. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). idx (dict): Pair indices per element. ao (dict): Angular momentum info per element. ao_len (dict): Basis set sizes per element. M (dict): Metric matrices per element (2D numpy ndarrays). - atom_types (list): All element types in dataset. + only_i (list[int]): List of atom indices to use. + Returns: + numpy ndarray: Symmetrized atomic feature vectors. + - maxlen_function (callable): Function computing max. feature size. + Args: + idx (dict): Pair indices per element. + elements (list[str]): Elements for which representation is computed. Returns: - v (list or numpy ndarray): Symmetrized atomic feature vectors. + int: Maximum feature length. Raises: RuntimeError: If model name is not recognized. @@ -142,7 +159,7 @@ def get_model(arg): return models_dict[arg] -def coefficients_symmetrize_MR2021(c, mol, idx, ao, ao_len, _M, _): +def coefficients_symmetrize_MR2021(maxlen, c, atoms, idx, ao, ao_len, _M, only_i): """Symmetrizes density fitting coefficients using MR2021 method. Reference: @@ -151,81 +168,76 @@ def coefficients_symmetrize_MR2021(c, mol, idx, ao, ao_len, _M, _): Nat. Commun. 12, 344 (2021), doi:10.1038/s41467-020-20471-y. Args: + maxlen (int): Maximum feature length. c (numpy ndarray): Concatenated density fitting coefficients. - mol (pyscf Mole): pyscf Mole object. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). idx (dict): Pair indices per element. ao (dict): Angular momentum info per element. ao_len (dict): Basis set sizes per element. _M: Unused (for interface compatibility). - _: Unused (for interface compatibility). + only_i (list[int]): List of atom indices to use. Returns: - list: Symmetrized vectors for each atom. + numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. """ - v = [] - i0 = 0 - for q in mol.elements: - n = ao_len[q] - v.append(sym.vectorize_c_MR2021(idx[q], ao[q], c[i0:i0+n])) - i0 += n + if only_i is not None and len(only_i)>0: + atoms = np.array(atoms)[only_i] + v = np.zeros((len(atoms), maxlen)) + for iat, (q, ao_slice) in enumerate(slice_generator(atoms, inc=lambda q: ao_len[q])): + vi = sym.vectorize_c_MR2021(idx[q], ao[q], c[ao_slice]) + v[iat,:len(vi)] = vi return v -def coefficients_symmetrize_short(c, mol, idx, ao, ao_len, M, _): +def coefficients_symmetrize_short(maxlen, c, atoms, idx, ao, ao_len, M, only_i): """Symmetrizes coefficients for each atom. For each atom, use contributions from the said atom. Args: + maxlen (int): Maximum feature length. c (numpy ndarray): Density fitting coefficients. - mol (pyscf Mole): pyscf Mole object. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). idx (dict): Pair indices per element. ao (dict): Angular momentum info per element. ao_len (dict): Basis set sizes per element. M (dict): Metric matrices per element. - _: Unused (for interface compatibility). + only_i (list[int]): List of atom indices to use. Returns: numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. """ - v = [] - i0 = 0 - for q in mol.elements: - n = ao_len[q] - v.append(M[q] @ sym.vectorize_c_short(idx[q], ao[q], c[i0:i0+n])) - i0 += n - maxlen = sum([len(v) for v in idx.values()]) - v = np.array([np.pad(x, (0, maxlen-len(x)), constant_values=0) for x in v]) + if only_i is not None and len(only_i)>0: + atoms = np.array(atoms)[only_i] + v = np.zeros((len(atoms), maxlen)) + for iat, (q, ao_slice) in enumerate(slice_generator(atoms, inc=lambda q: ao_len[q])): + v[iat,:len(idx[q])] = M[q] @ sym.vectorize_c_short(idx[q], ao[q], c[ao_slice]) return v -def coefficients_symmetrize_long(c_df, mol, idx, ao, ao_len, M, atom_types): +def coefficients_symmetrize_long(maxlen, c_df, atoms, idx, ao, ao_len, M, _): """Symmetrizes coefficients for long Löwdin models. For each atom, use contributions from the said atom as well as all other atoms. Args: + maxlen (int): Maximum feature length. c_df (list): List of coefficient arrays per atom. - mol (pyscf Mole): pyscf Mole object. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). idx (dict): Pair indices per element. ao (dict): Angular momentum info per element. ao_len (dict): Basis set sizes per element. M (dict): Metric matrices per element. - atom_types (list): All element types in dataset. + _: Unused (for interface compatibility). Returns: - list: Symmetrized vectors (numpy ndarrays) for each atom. + numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. """ - vectors = [] - for c_a in c_df: - v_atom = {q: np.zeros(len(idx[q])) for q in atom_types} - i0 = 0 - for q in mol.elements: - n = ao_len[q] - v_atom[q] += M[q] @ sym.vectorize_c_short(idx[q], ao[q], c_a[i0:i0+n]) - i0 += n - v_a = np.hstack([v_atom[q] for q in atom_types]) - vectors.append(v_a) + vectors = np.zeros((len(c_df), maxlen)) + feature_slice = dict(slice_generator(idx.keys(), inc=lambda q: len(idx[q]))) + for iat, c_a in enumerate(c_df): + for q, ao_slice in slice_generator(atoms, inc=lambda q: ao_len[q]): + vectors[iat,feature_slice[q]] += M[q] @ sym.vectorize_c_short(idx[q], ao[q], c_a[ao_slice]) return vectors diff --git a/qstack/spahm/rho/sym.py b/qstack/spahm/rho/sym.py index cd27ce14..2c76cd7a 100644 --- a/qstack/spahm/rho/sym.py +++ b/qstack/spahm/rho/sym.py @@ -62,7 +62,7 @@ def store_pair_indices(ao): ao (dict): Angular momentum info with 'l' and 'm' keys. Returns: - list: List of [i, j] index pairs with matching (l, m). + numpy ndarray: [i, j] index pairs with matching (l, m). """ idx = [] for i, [li, mi] in enumerate(zip(ao['l'], ao['m'], strict=True)): @@ -70,7 +70,7 @@ def store_pair_indices(ao): if (li!=lj) or (mi!=mj): continue idx.append([i, j]) - return idx + return np.array(idx) def store_pair_indices_short(ao, ao_start): @@ -84,7 +84,7 @@ def store_pair_indices_short(ao, ao_start): ao_start (list): Starting indices for each angular momentum shell. Returns: - list: List of [i, j] index pairs for m=0 components with matching L. + numpy ndarray: [i, j] index pairs for m=0 components with matching L. """ idx = [] for i in ao_start: @@ -94,7 +94,7 @@ def store_pair_indices_short(ao, ao_start): if li!=lj: continue idx.append([i, j]) - return idx + return np.array(idx) def metric_matrix(q, idx, ao, S): @@ -106,7 +106,7 @@ def metric_matrix(q, idx, ao, S): Args: q (str): Element symbol key for angular momentum info. - idx (list): List of [i, j] basis function pair indices. + idx (numpy ndarray): [i, j] basis function pair indices. ao (dict): Angular momentum info dict with nested structure ao[q]. S (numpy ndarray): Overlap matrix. @@ -133,7 +133,7 @@ def metric_matrix_short(idx, ao, S): """Computes metric matrix for symmetrization of short-format coefficients. Args: - idx (list): List of basis function pair indices. + idx (numpy ndarray): [i, j] basis function pair indices. ao (dict): Angular momentum info. S (numpy ndarray): Overlap matrix. @@ -160,7 +160,7 @@ def vectorize_c(idx, c): Creates rotationally invariant representation from coefficient products. Args: - idx (list): List of [i, j] basis function pair indices. + idx (numpy ndarray): [i, j] basis function pair indices. c (numpy ndarray): 1D array of coefficients. Returns: @@ -184,14 +184,14 @@ def vectorize_c_MR2021(idx_pair, ao, c): within each angular momentum shell. Args: - idx_pair (list): List of [i, j] basis function pair indices. + idx_pair (numpy ndarray): [i, j] basis function pair indices. ao (dict): Angular momentum info with 'l' and 'm' keys. c (numpy ndarray): 1D array of density fitting coefficients. Returns: numpy ndarray: 1D array of contracted coefficient norms per shell. """ - idx = sorted(set(np.array(idx_pair)[:,0])) + idx = np.unique(idx_pair[:,0]) v = np.zeros(len(idx)) for p,i in enumerate(idx): l = ao['l'][i] @@ -206,7 +206,7 @@ def vectorize_c_short(idx, ao, c): Computes representation by contracting coefficient vectors of angular momentum shells. Args: - idx (list): List of [i, j] basis function pair indices (shell starts). + idx (numpy ndarray): [i, j] basis function pair indices (shell starts). ao (dict): Angular momentum info with 'l' and 'm' keys. c (numpy ndarray): 1D array of density fitting coefficients. @@ -231,7 +231,7 @@ def store_pair_indices_z(ao): ao (dict): Angular momentum info with 'l' and 'm' keys. Returns: - list: List of [i, j] index pairs with |m_i| = |m_j|. + numpy ndarray: [i, j] index pairs with |m_i| = |m_j|. """ idx = [] for i, mi in enumerate(ao['m']): @@ -239,7 +239,7 @@ def store_pair_indices_z(ao): if abs(mi)!=abs(mj): continue idx.append([i,j]) - return idx + return np.array(idx) def store_pair_indices_z_only0(ao): @@ -251,7 +251,7 @@ def store_pair_indices_z_only0(ao): ao (dict): Angular momentum info with 'l' and 'm' keys. Returns: - list: List of [i, j] index pairs where both m_i = m_j = 0.""" + numpy ndarray: [i, j] index pairs where both m_i = m_j = 0.""" idx = [] for i, mi in enumerate(ao['m']): if mi!=0: @@ -260,7 +260,7 @@ def store_pair_indices_z_only0(ao): if mj!=0: continue idx.append([i,j]) - return idx + return np.array(idx) def metric_matrix_z(idx, ao, S): @@ -271,7 +271,7 @@ def metric_matrix_z(idx, ao, S): numbers satisfy m_i=m_j AND m_i1=m_j1, or m_i=-m_j AND m_i1=-m_j1. Args: - idx (list): List of [i, j] basis function pair indices. + idx (numpy ndarray): [i, j] basis function pair indices. ao (dict): Angular momentum info with 'l' and 'm' keys. S (numpy ndarray): Overlap matrix. diff --git a/qstack/tools.py b/qstack/tools.py index ef0006df..1ad801e1 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -6,6 +6,8 @@ import time import resource import argparse +import itertools +import numpy as np def unix_time_decorator(func): @@ -97,3 +99,11 @@ def remove_argument(self, arg): if (opts and opts[0] == arg) or group_action.dest == arg: action._group_actions.remove(group_action) return + + +def slice_generator(iterable, inc=lambda x: x, initial=0): + func = func=lambda total, elem: total+inc(elem) + starts = itertools.accumulate(iterable, func=func, initial=initial) + starts_ends = itertools.pairwise(starts) + for elem, (start, end) in zip(iterable, starts_ends): + yield elem, np.s_[start:end] diff --git a/tests/data/SPAHM_a_H2O/X_H2O_MR2021.npy b/tests/data/SPAHM_a_H2O/X_H2O_MR2021.npy new file mode 100644 index 0000000000000000000000000000000000000000..0360a409ac89ec356ac81df8a5e92658eb8c3dc3 GIT binary patch literal 751 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1J|1@7uwHaQhwpx@zH*R-NEG!d+ypxuF$z>?9gGa ze}(_%jnjPgYc`1(EOaulpSCA1X6fPx`-V=zeP14>*#|sdyin<2slEF%iTl^LxY;L& zJ#emwzhrlXfp3OzinINqINS37tIO^8H+f4y_qleK0ODJ#xLqV$3*@IQ}nCc;8IeTlazYEyCQ|*P1MH*dH zEVX}FsxGbDu4Ui3JF1;sgU#OexWW8g(w6q(m9x9V0{HF4SFD>IWBJFf;f06gdi{G~ zn=<4vLmOt#)rp6}_C#Lz2ewD`dNA0YbU7)oJu3y{!1la+tpT>@N?Q@wp5- Date: Sun, 9 Nov 2025 17:48:22 +0100 Subject: [PATCH 17/23] Fix SAD-diff - fix SAD DM for open-shell - fix tests so they test SAD-diff --- qstack/spahm/rho/dmb_rep_atom.py | 2 ++ tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy | Bin 10405 -> 8971 bytes tests/data/SPAHM_a_H2O/X_H2O_SAD.npy | Bin 5434 -> 4696 bytes tests/test_spahm_a.py | 6 ++---- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/qstack/spahm/rho/dmb_rep_atom.py b/qstack/spahm/rho/dmb_rep_atom.py index 5f502383..30413e10 100644 --- a/qstack/spahm/rho/dmb_rep_atom.py +++ b/qstack/spahm/rho/dmb_rep_atom.py @@ -58,6 +58,8 @@ def df_sad_diff(mol, dm, auxbasis, **kwargs): """Density fitting on difference from superposition of atomic densities (SAD).""" mf = pyscf.scf.RHF(mol) dm_sad = mf.init_guess_by_atom(mol) + if dm_sad.ndim==3: + dm_sad = dm_sad.sum(axis=0) dm = dm - dm_sad return fields.decomposition.decompose(mol, dm, auxbasis)[1] diff --git a/tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy b/tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy index 311909ec2534763a7fe77df0636f9f0569ef38e3..75a20eb4e0ef2b9ad206ec57347bf7eae887998f 100644 GIT binary patch literal 8971 zcmeHLc{J5)7d9M`CPQ3PIwVSFisGJq?o|>pX3UV|;56W5p2~0|N>QCi-GrNvG`P1x z$xotDrAUetSEeFm%uru`XS-{C-&*&d@2`8;@>}bicfEV>_c_mg_Is?yIbvzQ(b}1p zCzK~jh3OsS6{xa?suFFbqDEEm2?z`h^z`=#2xNLsg-twn1bKgt2eCZa-rx7jbk(Rj z%hjm6sQ>>X97{PGtH?t>DU$vne(VSh53hhgZwNg{DXo6 zL%f1HM>r`0WF;p>(4WaIIl?jLN#GdsGB|rVwwz_=yypDoe2E+ue~JJzID+jxb%@W( zfRmiW&GV=5MsqAJEzQ1tf5G<<}G8M1t*XZ3)8 z+QC_iuQh}1>u|5+@u$#XqE>C7@f56+>UdOEJcd{W_#oz02{goM+_NNHENgP{Q6MS=EEoy z)7P!9d@=%tMohE3#__sgv5pSB;;7*m~~VM>DzeDnOkTQJ-+^D-`)W zLY#Z1krQHdE#RZ+_#<7U2@J01J<8l&3u1VG>~ZcZab90hXJ6EZuZ7!}1M-oc#)}X(h{KWbTi%qa6J?E#@~SF|gyI%% zedX=6XSPG4V5_@aTJjVhJlvjpp8ZUduqrI}QuZ??&XBz%O??rcBk-Ay8FffofCTcFLq0dWxcDVl&$o8Rp5z!G z5K@VZ$vD6S)6HUKbQx!O7s2f}75TTRbNQqr{#RvgJw@ieQ^_3$?2UaDcec*~98sT( z*za!SuZ?_c5I+R#-B^N2OZhCoLXq9tRj2@F0^9y@U1SKc?;EP#_R}CNL|H7{R}Ul= z2a+nJ<-rE?s^dJ5s-J9vq^QfarwK=BD$+faZEFR@>Q6^?W%S_yp5Kr8iTpvv8R^ZC zRhZ|#;*AZ=FD@;-;JOBU5x)h`Pv^6g1rG%rUJt)K5{`6dXoD(#eg)z$#PbQ5m)qL3 zyE{)B)UDElHfRmg$`sAD3X5vWAK*F>lHrG<-lW{7n(mJBC1}AgZ1tK%rM_2~J9h2+`*lKK4)C&riQ}$zhEjWf&~_X)+3< zy|Ah?UiiFpKa?%DIqAsg1>WsV^GENu!Y$+@hdBAY&z}}-6r)R}(AkV{67;l-(LXP2 z5vA+Lw!JK%Qs~VloG-t5k3;+`_nH?0lkjHV>YU5XV_;$0c}&*09~!Z*O5`JhI6Hz% zH75gP=usIJ9McF1`m7!w>S-+@dZndm#>r3~`WAM5*tUzj^aA0rhD}O5^oa#We{|bF z0Gg^b#-t_o^}?_A z`}hRgTEP?NWr{ja>oeV#HS!TgoI#ehh>)m z-_vm!a?m2~bo$J8+PZ`w*xdT1)SaT;TB2}%RB=S>8XvempjB>9(If($;BJYv5wZ1( zv5$zhAyHJDInu1GMTFOJ@zaL5^=~CYOMYey(sC;+LTTy>gwqQvhT3xt!ey}Z$B`52 z1W>C2cBLy2`N*dN@lRv@uIHBD9(}d|A!TN;#kezM7#_=YA8-P;fyzDg(;J}y`O}b3 zYXcX5U@5m=_K5z)92;L)yX6MEg6RXzOxd*E$6Y`Q`wc|?2FND`@mFEJQgv4Jx;SrO z8mvu+6lZWpeLApT7VVWxN32J}y)9zCPu z1jA#A2YxCsh0zeBoOaozV1W6woZ)3-wIzQx`e9y{Ubvq?_&ZBKZk(>G?CL<1u{ ze;V_D$nXXkdFl_^jQOg#Jii z%O0>BUAYxnu|5vt_PZ8DX0q5Yyghf3)D?G7Mw~FLAH{g6QH=l5n--uK;PNW>$}$)f z;d>-nt_$%{on3TJ_piO;K-JZG`vJ}zqV6;g0N8{1eB3v?iL>(T0BoNPUh7*jLW}PG zQ116^lC~RHgOU^Ly4Lao_aC5{_ZbCd6qr%qpR2&?c?Gj}d4xm5Ny~+olwu)h_6=RZ z{yFrxq}Fy*3Ii~r`=YV^<*d$}HZek#{P^S3`BJ+!%IC+VT*Vcbw)g!Is2H{wVS zy-wyZ>7fO%UKHa#!D6I`-VuM6^w5@wV}|wq7^i9^lOB4)*pc+m;W=MxrabiJmA6O_ zJ-uH^DJMNNt5}Nk&{r`(y&p>uCO!1WsE?$FCTQ7gCQ za!3!o4RID?y)(w!fAJ(e^n{rk>7k{p`$-SYXq-r$^3c=!S;w4758ab7OnPW0=HKH! tA%;eJ=n^4Cj7tT}FpEFN& zzv}=0pIDf{Hs63vR4@*iOekyLk9u%Cw*Q`nK8Mx)u#nIgxcjxt4U znPS?O0*-vY)f0n>vO1K?!W=__o;x^1FUNkT2K~uecJw?)%DfR0q zR*8I(%xPX&*OSha{;exx%aonaHDb#Bzq&LQQ{L^ENw)-z_U_B~^4L~gDBs;?x71-7 z(92pRjA|IrZ(;c4aOPs*l67~vuA6Q0X21=2?q9Ra63kRQ4oy`z1qR7~N!Cr4=!*<< z*Mhlh(!-nQR>4oAV@mSZko7gM4dV(=GoUeK={FmnJTz?RvifRdJ^Os2OWxMhFKpYz z_a{rFQ}5rh$)#DU{O^2Y{X(mrQ9>DvN~V7{rR zjPDCGT*LeMd#wc+?bj~|>)soHfA(C=5HN0stYG>58-zN*_eO0XZ+aW7x|`QNcCa4e zh#y`s)-el_TU|aT#mEY;hWHz@jBJK@bMG}NhW(BUb*WVX45RJ?wJL9hAWZ*+&Ay|< z;FEoQmet1}(A}eGnbG+DPhCO8&n==;-Rh<~Q&x!~miq6CQjlfPvOy}OQJB#YAQ1+?9G?rCIR>7>sS{n00N@vp~z5Yd_9 zc2{fe${n?SHguf{2nHd_otuCU5U)gjPyB)^k~(5b>H!czYMNGbzIkb7rgm743{j9^oMDjP-*jX;}(3JuujpaNTX`@Y2wF#=u4F1 ze2YfVf{+L`D~lO>nj}X zp;EYZcS425vS?s5W=)WfUb*oL>?knqo$Xl;HsUyOWtBU(m zJq6b*@*VqM(}w+=DZu)YRXCq3{7u!XtP!m$^S-{PM-D7fJ35-X7eEZm=)SvQAy`~6 zZCK;20*=R*+8tihjh2)6j?lQd-;B(l{#JZ%PKO1!rNim^{RR-(&~&!snGl>H`x{97 z&W>Ze1nadhd$ar@S}7g&$t<;*JN&|?bs!@* zm6!Iu9EQcq?3DG&;O5JXx%rJRK}WYiW9@V{6d%ModGnrbO7tE8SF3e%mt+YrCY1;E zK1&m3RR64v8F3R}&=jOp^c)7@_}9vsRlcpT^r4FUqLQz`eKA?z@7QP9C*TQ%`R_oE z_;DdRbqAhw)yhm`Oslh>ES)dQu+@2?lv*yuP?H^+zU7htqd)al!+w_^V4JukdUkI= zbl8`q{p{<3eMRv`LZ6%A1gVSn8vBtXI#$g`xBE?3VlZ6GG!?yM8J(A6B5h9!Gq?o; z&#uUfLiVQZ%MIoZfpDzCiIZmrps@WPrifxITqgZ_N9vmJgNRP_9D3G{SSiNCm_ya> z>jW4*b3F6b3H^Ws(Y3=WSNq|JjD6FxJ3YXvG|pcf*9=Oe&lCNTBXx}sKbb_QrPuA? zTay9!cmC{+i#D_ZYyE}EM{~Zyle%dabM}4)+Ytc`we)wOOy=cB`rJ3??nrWp;-^Vhg|ah93gZ6O6KK4`n-?y$AHvDFTs91iOvk=H(H6RxiG~+ zAg<5qJ$hsvdiLV}K9v2;y<(5!Th#J+hEh0NkVAX7e7VAAInGoQTbYwHML7HND$X2i z`^*j?`E#uFzKU8&0yjhm&9Ci2yusoJQNPZeQ~q$rPeF*&YxkvuW=ONu zHt5^24k{lcm_$!9fUP9{$z0`3r@9~f^t4M6-WveC><>OlJa2Hz-;%@lX7y+PMBF9X z)wVV^6dHsx$_*@;a4ob@Kkl0yoFe)Y`x{C8K$>(_+_*dFa4Yi18Rk$#p6^NY)5-pM zByRMfUGc2;JXm+GRUpy%EpmDp!$qvRKi96&k80(HpAl1m#Z+{oaLSyiG=Wpof8&(0 zEmLIzr)GG`Vm_kZ={w$c*cBE<^|M#fcY&_5(Tc|f+rg)=wO~Q+CYVpw9R%$P%4>sx zuKSK%!xsQw?6b;a(ze4QlE0L!=gt#RN|a(kL5pH>)1qyEbec*2#QImwdPlyb^)TO2 zy5zy=6nHcoU!xPf%Cp6x56QpvQ^+gh13fW+KW6$cGERI}t?v8< zHR>Iv`olT8j;d>YD)tlQq(td7_M`=W^9w|Gu3KJ-61<9xvr0e#G3d&OXDq(jDs+ z;I1Cy+v9LexRzq()K;wuG9K=0U-65Bk362c`w2WRot3!HneT9aa;M|E`kkz8{H=e>je*p|VRu=m_rT}y!{QjgL!E-x%fS7bjR;c#8N!>S~4&52;AqrGPpnfq%O$<>_>^{IFvU8 zB#exq?A~p6gy&?TAkj|2do2Y>GeAkZP(BSAPUU{!oTy(LQ-&rf=$2{3WTVR$?D!WfZ)3l=?TS4i-^;#WvE+=;xwq`F z``hjXZ_7p!EUYgfkMkLJ4m0DMFCndEdaq(nSFn{+qz)~ZTg^^Z8Si(!_n58gtSf32 za|!7YKT$;An&ej=6Vj{LAq2`!sG_oE3e>j3ySu)zBdUjA%CzqdDx{6=}5jd@NDfx zr=E`;9H{I>jz{Vk?Gh!Z*f(v~&ioX%7KtzF(i<&UEDFMxel|`%ISC#aRP@f2X+j10 z>B$n|57}{KzX*xD>b0k6zMz5s(vwn~Zhu6dl2ZmpYVNT|BeDKtvVSj$Ka;dxoNN9F zRhfqe-QF0>{*OFAp6GMQ{I3&z-;L&fjzA@455)T>HN|cbGF-OjS3pKV+o#n~cK$?px866JBWTec+WOSV5hm&WyAnl4UPMQUR_xjG|gG8O@(Fl_I0#&+8bwB<-}h2vw1C9N$US}OAVo$AhL0#RdOp3C zBBLU59n8B-k&&NM3`IsQ4~;1@%Gn%Ekx_Q>dy0%ij^QtVqaggn-W1qHk;(BBS7Wm>_b3Fd3O_#b4}o>ZKGJaoXNeWW)={ zb#+RXQlwVrh;?W-n2cPHOs9yZWD_Qlog(B*nP-NuiYDJNeg0=%iMk4yRC^C|lI7pF^dCvoijOZ@7&#QZJf7&jS+{Rkh# zI)kB@jE4LP8O>Qmk&$nmCq+iur|{g%i~N!FD`%RTn%jnnDiM=j;8rKd(q#=EM*00M?P~Yo^F3+TNWaqcB-aM*UYY zxd=I7GICQlrpTxup^+k^HA?3wGMYv5Uj$(?iV?@8bBmD6M;0a{Nkb!wjH2#3P-Ik| ziuIGlaDG@2CL_8tCL``BCYMt>0t3IvC^>v4MMgB@CxPg1Ao)j%Fd4mT#$zDI4G~)l>2JC0l8|zD0<9u!B2#Smti!d4WN@FtmsKlel zD4y>VMMjez#8YI{PU5Th^(iunsK;cKyQGRDqb(oKQDjt4_D{so8B9h@&p3*VmgvM$ zWQ2(RW3oSh#1AcZpvY)mhCf9{66EeRS8aq721 zT|g>|7fO-x68bJOyd3aw1W7Ov&Eq97g%U{zHTJ%Gs#E{;SAX!Gv+q0a^E~fwzvuV7 z-*4Lb$e{A64_!0FxgwdXKgYEo*42aKx-3T~m!)N=&t$ZXTR9od14VC`~l2)|By5*=FZuNfllxq8ypnD=TlMd_|f}mZqRp>=c<; zl#`t+m*t7%^mbZhYOJJH=GhszWIG*A719eB>2xt2PjjOg(X41@5iMbifO?%|%vl;)u>RX^IproocL!zI??l!c4 zMEYvXhhu|ro?34E&dMqRrd`!vx4E(aRu6Xk>=P0Kyb<9p7ZXRMJo27&BrX7@7XBdq z(e-UKZ3-u`y(bxYw7lAWb8RF_eM0*3#(ZXVU>u8QD_5-*@<8{c_koEGJg6>rv(~bw zp|&cCNc_FV7mRbN?2skKnh7#=gqEF`w^W!8n&FACoy{GO&1ZyUk-?3JhPs=Wvz_ zK0kG>cer4CvxVY^TCn{g3&Of9vu2^>S{oycJ4`+q@L&m;bZ2B@muEM`jXR6AN1W_4(*b% zJZEhllx8XWc$zFYwW6D`_)roYB>bQA=_B}Eq+WRE*rm3TjS!9oTyCFU4gIt4JD9Ro zKtJiXobazCeAEO#l+xP%6V?Idty!_HuBEf*!Izo#gc?t0IGy3OIPNHgPLI6P)X6v_6#!;S|B2 zMD`yd`O?bkmS3@xpgJILYgKs=nC-yd-$n3!$^Jbg-}KV!8otC4hD_Jp<88Nx^X9x%$*2+V} z0S=OU99c(AZa5_Ra4r1-5LP7HruaB>#L} tF?`Hu**C4QWj`4YUb zoNb){|0ka$6&Mk-IV#>XFp?K+x;ZACAGV&yTOTj~GcPzWG9sGKiwWe*Gc3rGj|hS& zBjL*HzPw*gZS}L!Sal(k0gu5a^o^`>1lhI$?lB2 zv9pwq#Fw>1M#0t9)oJ3(5C1tMKBE|r?#E0q#&I`hYA7ode`s3hm#-+fGs<7T@*tI& z9tyG{Tt?-qtg1Vs_D$A?QU70A7N21Uq}#>Guvm{@^)_U7uS5-zJs#FxzKC1jrD)UQ zkA|JBZtObkjcUkxkovomJ?s!v6LW{xvS<@>m>IltnqDBHN&Y>uZnw}v?sH8Pa#Sn0 zd@^?f`b2c>N&X75zT$yZ&h>16)RAEQ#x1;s`fRms`5&ob!J+HxuBEh_qB|9 zdH=mR=woX_)gkx!sFcJ<$oslY+8cQ1ZIH!X@%Vu5S!$%(lC57+Ea)Iljr&w$K@l5W zy`6OE#jY2q(d$1l`)ow0N};G&_Q7OYBuWarOS+fgj^pDZMmPy$v(r4n?+q`9_&=Dt|#)@ai)633Z=^Ni6MmfZf zI39Ey64aMBjGNPElAF=t6P9#2tnLsbzLNW6o}5{RbBaixtY^X`kS=>j@GBA zsYshlcxuv?C5^Q`(pofK)cE6#P&VzPeO)HmM}amZeI6zKSxM@WBYqwbojcthcN}`A zM$c4Wm%WWuq{W`U$y{ALh4v{l_@}`|o<4MU2iyFr7&(x6$&o%Ak^Z!ix+IAob)uun z`K3jeCP{P0N4J{GOVJfZoHO|x3EHP5nEh5_7_lN@?j>YirKHc_`eQ-rl3WFTD55hp z;JlH^0V!J3_ZW+tCqbM2eSWAca~N5W^TQ`|k0kRlAbnm&`m>hQb(Z+iAUb)^y|WJ< zpFpL{=HHyPZ3yL;z&Xq#=VzGAU5m`if%Lh6^ygb$f_vcSDABQMij`2$e}&Q`HoU#k zI86PSm$)+I$#mq{+V$6VmG@MHkjXPX}-1$O3@3;-&9eTo6gI6h4{Lg~Z zB;Repnu#@v=14pq>r!Q>jIJN-+LAZVPMGq$Snf=LyU@_y-X>MmR;W77FVb8)mr0!AwNt{NUu91RC#70OjL2qop90>CiPEFOSSnV$R>W~6Me-x z$afM+IxG$GK+5!_y-jU_=-zOznJd>F752_NuAM#`C9=W)Q{spA1^q?@`2x4t<4b(_ zs8RN5WL{YSD!W!Yb(^6z>h^+qe<1$T)WFXm(SJblTbJ7fT-5#v$vp~*Nl^p0# z^Hiu;t{42T=m9^!RD-@!6Xe&(zTt${v(U2ou;bB#R>-ON?c2@)HX7&!0kft>Q~ z+gF4wMm~G3J$6|tp~WP=F)_#FjZFx8e(La0#oLW2pcG|4-{y%@+g}{4y={p0ll|=^ zer{vBIL|c-&05Re$*S`~TUD*y^nFZ_BGC^e`@h9)R}1>v-`S!1-A0u;m1@YHd_Rxq zA0hjjNnAm5U+mqI0jfV*k;p}_JkK;_lSI&{sI;xzFs=G9Ay|7IFuG2~KzpcMEeC{Jr z5QFCWsMKB(nW85%I>O)TnxnfZQ%-(r)I}}iUD&RpBzkXw6fVZpe!ixUYDoOs`{uEs z=IkS?X!fS7D>CdQ5aPVv-r)9{vLVkuLk@Ml$h}W({4KNbvI`4dVOEQ<*C9gWk?7)PC`)h{VyS$8I8`(C{#J2jHWy2pVac^pi>F8l`eb@bc@^6ko2n*YSe|g zri;PPt_aXE*yq>R#q?90Ud@9234c*PCx(yCTlJ3GKcqcN0jVJNqbQ9l^}5Jho)^WG zY9ZUBx106+q)~_YF5DmCo}X}CtBS!-ny(ht=@=}De-m|xI(OXp_cZC7)Qi_1eYWe~ zP-@yI#-Vs3QX^xX%N1&>x3+P?u;6_!-y>IwcOeE&Fr^ zsHlM8gvxpql<8;IQs$wDDi?*i1Z|#yytE1>3q;cBjugzRe-ZRKgY;+F0MwN>0)FI! zKxeB}PTj^;Ty)-?ese9^0UaxH@a<`)k!oR<%ix#M?63$^q5S*VBYnb~}GB3vz=<^m;=ns$7rBn@m zw1|#ZLwii#*i2MD6nIW{_I)aDa=-M2t~SarM$7m*`xa$2t>&3<|ENIoKE2)Gh=g!` zYFl&X=T1TAmZH;2-8ZQpNPffJ%Q;sn`l(48X7vUcP1KQm56O9p<%Lh&-(~J+PZ#Dp zE&L@s_k$qmV&H}Nz$QwO5BiF1$frHLbPjh`QAUf*?`LKY3bc!qcg{2UBq*FYG3B~m=IEp(jGL~?%8CMn{N0w+>$hBNp#(*Zx~J)qxo8uHUj zGpeOGiK&cVZ~7ihYZd$+5B;Gkpx&sp;Q!7N@Uzbs^hHM@pEfDoQh=%`9&$RcYn_NP zReCT#qjTPb&W=<678Z?+wKNJusi1$2?B7D- zx0PHMR60JRnjDkk&aOTq_?vwHFww6e`*W5-yh6UEZk|#mbvA7Bu(-LC(k@?8KC%1B z_vgPCYKBq{|4lJEe1;Pc`W;ZZUje1(&gc_B8GHj&>oowB-dOU_j+rRbfEgnbC~_b(1_tt{lODit^3Cfa1yP;%*T0o%mE#rY!k9R}3q%vcPlOf9%zd96*=Ch|@K<)W3fdSQA2Y&MEKyk z2QKo8H~^I0dbl`S_3AL7gguWipv1{gSHDsn24<@d=&;-XQ2x7TV6fD#0U&G|gNuAk z8(hF4g>Z2m@VkovwK6gp1FG+d2d-<88Te@$0UfUN2@I$xe-{iW4P!41C^?H$7*K4* z9T-rKgO@O%xa*+L%ZH#pJr+<`kQew#vIm`ZdjJ%F8-QhREC6B3JGjV`KNn#@?Pp+K z{s*AXouof{qfl4b0`Mcd19V0b0Z^kn0;t)`FrXq@LolF>vtaI}&tP8b{GreN&k3N+ zp{`Lr_?a3FI;%ziQ2np%Frem1>tH}7O@;HLt^sq`(uaA`N1@M!QqZ6J_fXdX;^z|4 zQ9QR91Ild200vZ{>}q@tbKb)F+0h4c=d6Qyu{0`hpN|hif9{jI9El(CcF<9O--!XW z&oc-EDoG6hHT(wvM8+2Y6`*H}0acyXfdREbD;EPwpXA?-13+ac0B}wbfVA-eP)b%d z7*J{Fy)d8}ib20n9`cjo08m^%091_#0Fq@Y_2Da^3X^p(pjgCD9?|zE`MYldpdNPu zpt29!VL+YsT8IJlS{M8`5I-x%Kwm)e|Fm6!0i~Y369X!7)5wos0aZrom0ke;IcDJJ zKGAQ_f_%>P91N(u<(Dv^B( Date: Sun, 9 Nov 2025 18:32:15 +0100 Subject: [PATCH 18/23] Make all the models work with only_i --- qstack/spahm/rho/Dmatrix.py | 4 ++-- qstack/spahm/rho/atomic_density.py | 5 +--- qstack/spahm/rho/dmb_rep_atom.py | 38 +++++++++++++++++------------- qstack/spahm/rho/sym.py | 21 +++++++++++++++++ qstack/tools.py | 2 +- tests/test_spahm_a.py | 15 ++++++++++++ 6 files changed, 61 insertions(+), 24 deletions(-) diff --git a/qstack/spahm/rho/Dmatrix.py b/qstack/spahm/rho/Dmatrix.py index 34a9c74a..dba38772 100644 --- a/qstack/spahm/rho/Dmatrix.py +++ b/qstack/spahm/rho/Dmatrix.py @@ -20,8 +20,8 @@ def c_split(mol, c): """ cs = [] i0 = 0 - for at in mol.aoslice_by_atom(): - for b in range(at[0], at[1]): + for at0, at1 in mol.aoslice_by_atom()[:,:2]: + for b in range(at0, at1): l = mol.bas_angular(b) msize = 2*l+1 for _n in range(mol.bas_nctr(b)): diff --git a/qstack/spahm/rho/atomic_density.py b/qstack/spahm/rho/atomic_density.py index 1f97e27f..78f0b97b 100644 --- a/qstack/spahm/rho/atomic_density.py +++ b/qstack/spahm/rho/atomic_density.py @@ -57,13 +57,10 @@ def fit(mol, dm, aux_basis, short=False, w_slicing=True, only_i=None): a_dfs.append(c_a) if short: - cc = [] if only_i is not None and len(only_i) > 0: aoslice_by_atom = auxmol.aoslice_by_atom()[only_i,2:] else: aoslice_by_atom = auxmol.aoslice_by_atom()[:,2:] - for i, c in zip(aoslice_by_atom, a_dfs, strict=True): - cc.append(c[i[0]:i[1]]) - return np.hstack(cc) + return [c[i0:i1] for (i0, i1), c in zip(aoslice_by_atom, a_dfs, strict=True)] return a_dfs diff --git a/qstack/spahm/rho/dmb_rep_atom.py b/qstack/spahm/rho/dmb_rep_atom.py index 30413e10..c0631288 100644 --- a/qstack/spahm/rho/dmb_rep_atom.py +++ b/qstack/spahm/rho/dmb_rep_atom.py @@ -50,18 +50,20 @@ def _make_models_dict(): Returns: dict: Mapping model names to (density_fitting_function, symmetrization_function, maxlen_function). """ - def df_pure(mol, dm, auxbasis, **kwargs): + def df_pure(mol, dm, auxbasis, only_i): """Pure density fitting without modifications.""" - return fields.decomposition.decompose(mol, dm, auxbasis)[1] + auxmol, c = fields.decomposition.decompose(mol, dm, auxbasis) + return sym.c_split_atom(auxmol, c, only_i=only_i) - def df_sad_diff(mol, dm, auxbasis, **kwargs): + def df_sad_diff(mol, dm, auxbasis, only_i=None): """Density fitting on difference from superposition of atomic densities (SAD).""" mf = pyscf.scf.RHF(mol) dm_sad = mf.init_guess_by_atom(mol) if dm_sad.ndim==3: dm_sad = dm_sad.sum(axis=0) dm = dm - dm_sad - return fields.decomposition.decompose(mol, dm, auxbasis)[1] + auxmol, c = fields.decomposition.decompose(mol, dm, auxbasis) + return sym.c_split_atom(auxmol, c, only_i=only_i) def df_lowdin_long(mol, dm, auxbasis, only_i=None): """Löwdin partitioning with block-diagonal slicing with contributions from other elements.""" @@ -79,7 +81,7 @@ def df_lowdin_short_x(mol, dm, auxbasis, only_i=None): """Löwdin partitioning.""" return atomic_density.fit(mol, dm, auxbasis, short=True, w_slicing=False, only_i=only_i) - def df_occup(mol, dm, auxbasis): + def df_occup(mol, dm, auxbasis, only_i=None): """Pure density fitting with preserving atom charges.""" L = lowdin.Lowdin_split(mol, dm) diag = np.diag(L.dmL) @@ -88,7 +90,8 @@ def df_occup(mol, dm, auxbasis): eri2c, eri3c = fields.decomposition.get_integrals(mol, auxmol)[1:] c0 = fields.decomposition.get_coeff(dm, eri2c, eri3c) c = fields.decomposition.correct_N_atomic(auxmol, Q, c0, metric=eri2c) - return c + return sym.c_split_atom(auxmol, c, only_i=only_i) + def maxlen_long(idx, _): return sum(len(v) for v in idx.values()) @@ -131,8 +134,9 @@ def get_model(arg): mol (pyscf Mole): Molecule object. dm (numpy ndarray): Density matrix (2D). auxbasis (str or dict): Auxiliary basis set. + only_i (list[int]): List of atom indices to use. Returns: - numpy ndarray or list: Density fitting coefficients (1D). + list: Density fitting coefficients per atom (1D numpy ndarrays). - symmetrization_function (callable): Function for symmetrizing coefficients. Args: maxlen (int): Maximum feature length. @@ -161,7 +165,7 @@ def get_model(arg): return models_dict[arg] -def coefficients_symmetrize_MR2021(maxlen, c, atoms, idx, ao, ao_len, _M, only_i): +def coefficients_symmetrize_MR2021(maxlen, c, atoms, idx, ao, _, _M, only_i): """Symmetrizes density fitting coefficients using MR2021 method. Reference: @@ -171,11 +175,11 @@ def coefficients_symmetrize_MR2021(maxlen, c, atoms, idx, ao, ao_len, _M, only_i Args: maxlen (int): Maximum feature length. - c (numpy ndarray): Concatenated density fitting coefficients. + c (list): List of coefficient arrays per atom. atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). idx (dict): Pair indices per element. ao (dict): Angular momentum info per element. - ao_len (dict): Basis set sizes per element. + _: Unused (for interface compatibility). _M: Unused (for interface compatibility). only_i (list[int]): List of atom indices to use. @@ -185,24 +189,24 @@ def coefficients_symmetrize_MR2021(maxlen, c, atoms, idx, ao, ao_len, _M, only_i if only_i is not None and len(only_i)>0: atoms = np.array(atoms)[only_i] v = np.zeros((len(atoms), maxlen)) - for iat, (q, ao_slice) in enumerate(slice_generator(atoms, inc=lambda q: ao_len[q])): - vi = sym.vectorize_c_MR2021(idx[q], ao[q], c[ao_slice]) + for iat, (q, ci) in enumerate(zip(atoms, c, strict=True)): + vi = sym.vectorize_c_MR2021(idx[q], ao[q], ci) v[iat,:len(vi)] = vi return v -def coefficients_symmetrize_short(maxlen, c, atoms, idx, ao, ao_len, M, only_i): +def coefficients_symmetrize_short(maxlen, c, atoms, idx, ao, _, M, only_i): """Symmetrizes coefficients for each atom. For each atom, use contributions from the said atom. Args: maxlen (int): Maximum feature length. - c (numpy ndarray): Density fitting coefficients. + c (list): List of coefficient arrays per atom. atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). idx (dict): Pair indices per element. ao (dict): Angular momentum info per element. - ao_len (dict): Basis set sizes per element. + _: Unused (for interface compatibility). M (dict): Metric matrices per element. only_i (list[int]): List of atom indices to use. @@ -212,8 +216,8 @@ def coefficients_symmetrize_short(maxlen, c, atoms, idx, ao, ao_len, M, only_i): if only_i is not None and len(only_i)>0: atoms = np.array(atoms)[only_i] v = np.zeros((len(atoms), maxlen)) - for iat, (q, ao_slice) in enumerate(slice_generator(atoms, inc=lambda q: ao_len[q])): - v[iat,:len(idx[q])] = M[q] @ sym.vectorize_c_short(idx[q], ao[q], c[ao_slice]) + for iat, (q, ci) in enumerate(zip(atoms, c, strict=True)): + v[iat,:len(idx[q])] = M[q] @ sym.vectorize_c_short(idx[q], ao[q], ci) return v diff --git a/qstack/spahm/rho/sym.py b/qstack/spahm/rho/sym.py index 2c76cd7a..e42c84b0 100644 --- a/qstack/spahm/rho/sym.py +++ b/qstack/spahm/rho/sym.py @@ -6,6 +6,27 @@ from qstack.reorder import get_mrange +def c_split_atom(mol, c, only_i=None): + """Splits coefficient vector by angular momentum quantum number for each atom. + + Organizes expansion coefficients into sublists grouped by angular momentum (l) + for each atomic basis function. + + Args: + mol (pyscf Mole): pyscf Mole object. + c (numpy ndarray): 1D array of expansion coefficients. + only_i (list[int]): List of atom indices to use. + + Returns: + list: List of coefficients (numpy ndarrays) per atom. + """ + if only_i is None or len(only_i)==0: + aoslice_by_atom = mol.aoslice_by_atom()[:,2:] + else: + aoslice_by_atom = mol.aoslice_by_atom()[only_i,2:] + return [c[i0:i1] for i0, i1 in aoslice_by_atom] + + def idxl0(i, l, ao): """Returns index of basis function with same L and N quantum numbers but M=0. diff --git a/qstack/tools.py b/qstack/tools.py index 1ad801e1..f2d2125e 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -105,5 +105,5 @@ def slice_generator(iterable, inc=lambda x: x, initial=0): func = func=lambda total, elem: total+inc(elem) starts = itertools.accumulate(iterable, func=func, initial=initial) starts_ends = itertools.pairwise(starts) - for elem, (start, end) in zip(iterable, starts_ends): + for elem, (start, end) in zip(iterable, starts_ends, strict=True): yield elem, np.s_[start:end] diff --git a/tests/test_spahm_a.py b/tests/test_spahm_a.py index c11fe414..aeae5cb0 100755 --- a/tests/test_spahm_a.py +++ b/tests/test_spahm_a.py @@ -101,6 +101,20 @@ def test_water_single_element_short(): assert(np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations +def test_water_single_element_SAD(): + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=0, spin=0) + X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'sad', + elements=["H", "O"], spin=None, with_symbols=True, + xc = 'hf', model='sad-diff', auxbasis='ccpvdzjkfit', only_z=['O']) + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) ## trimming is necessary to get the short-version vector ! + X_true = np.load(PATH+'/data/SPAHM_a_H2O/X_H2O_SAD.npy', allow_pickle=True) + a = X[0] + assert(X.shape == np.array(X_true[0], ndmin=2).shape) + for a_true in X_true: + if a[0] == a_true[0]: # atom type + assert(np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + + if __name__ == '__main__': test_water() test_water_alternate() @@ -112,3 +126,4 @@ def test_water_single_element_short(): test_water_single_element() test_water_single_element_short() test_water_mr21() + test_water_single_element_SAD() From c4c592e7cb6758ff02c4b41b41344bf89efe0f2b Mon Sep 17 00:00:00 2001 From: Ksenia Date: Sun, 9 Nov 2025 19:41:08 +0100 Subject: [PATCH 19/23] Use slice_generator --- qstack/compound.py | 34 +++++++++------- qstack/equio.py | 12 +++--- qstack/mathutils/array.py | 7 ++-- qstack/regression/global_kernels.py | 8 ++-- qstack/reorder.py | 58 ++++++++++++++------------- qstack/spahm/rho/compute_rho_spahm.py | 17 ++++---- qstack/tools.py | 15 ++++++- 7 files changed, 83 insertions(+), 68 deletions(-) diff --git a/qstack/compound.py b/qstack/compound.py index a0ca482f..cf32dd4c 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -341,7 +341,7 @@ def singleatom_basis_enumerator(basis): return l_per_bas, n_per_bas, ao_starts -def basis_flatten(mol, return_both=True): +def basis_flatten(mol, return_both=True, return_shells=False): """Flattens a basis set definition for AOs. Args: @@ -350,16 +350,20 @@ def basis_flatten(mol, return_both=True): Returns: - numpy.ndarray: 3×mol.nao int array where each column corresponds to an AO and rows are: - - 0: atom index - - 1: angular momentum quantum number l - - 2: magnetic quantum number m + - 0: atom index + - 1: angular momentum quantum number l + - 2: magnetic quantum number m If return_both is True, also returns: - numpy.ndarray: 2×mol.nao×max_n float array where index (i,j,k) means: - - i: 0 for exponent, 1 for contraction coefficient of a primitive Gaussian - - j: AO index - - k: radial function index (padded with zeros if necessary) + - i: 0 for exponent, 1 for contraction coefficient of a primitive Gaussian + - j: AO index + - k: radial function index (padded with zeros if necessary) + If return_shell is True, also returns: + - numpy.ndarray: angular momentum quantum number for each shell + """ x = [] + L = [] y = np.zeros((3, mol.nao), dtype=int) i = 0 a = mol.bas_exps() @@ -373,11 +377,13 @@ def basis_flatten(mol, return_both=True): for c in cs.T: ac = np.array([a[bas_id], c]) x.extend([ac]*msize) - y[:2,i:i+msize*n] = np.array([[iat, l]]*msize*n).T - y[2,i:i+msize*n] = [*get_mrange(l)]*n - i += msize*n + y[:,i.add(msize*n)] = np.vstack((np.array([[iat, l]]*msize*n).T, [*get_mrange(l)]*n)) + if return_shells: + L.extend([l]*n) + + ret = [y] if return_both: - x = stack_padding(x).transpose((1,0,2)) - return y, x - else: - return y + ret.append(stack_padding(x).transpose((1,0,2))) + if return_shells: + ret.append(np.array(L)) + return ret[0] if len(ret)==1 else ret diff --git a/qstack/equio.py b/qstack/equio.py index e56a1372..3dc8f422 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -6,7 +6,8 @@ import numpy as np from pyscf import data import metatensor -from qstack.reorder import get_mrange +from qstack.tools import Cursor +from qstack.reorder import get_mrange, pyscf2gpr_l1_order from qstack.compound import singleatom_basis_enumerator @@ -26,7 +27,6 @@ _molid_name = 'mol_id' -_pyscf2gpr_l1_order = [1,2,0] def _get_llist(mol): @@ -123,7 +123,7 @@ def vector_to_tensormap(mol, c): nsize = blocks[(l,q)].shape[-1] cslice = c[i:i+nsize*msize].reshape(nsize,msize).T if l==1: # for l=1, the pyscf order is x,y,z (1,-1,0) - cslice = cslice[_pyscf2gpr_l1_order] + cslice = cslice[pyscf2gpr_l1_order] blocks[(l,q)][iq[q],:,:] = cslice i += msize*nsize else: @@ -132,7 +132,7 @@ def vector_to_tensormap(mol, c): msize = 2*l+1 cslice = c[i:i+msize] if l==1: # for l=1, the pyscf order is x,y,z (1,-1,0) - cslice = cslice[_pyscf2gpr_l1_order] + cslice = cslice[pyscf2gpr_l1_order] blocks[(l,q)][iq[q],:,il[l]] = cslice i += msize il[l] += 1 @@ -291,9 +291,9 @@ def matrix_to_tensormap(mol, dm): for key in blocks: l1,l2 = key[:2] if l1==1: - blocks[key] = np.ascontiguousarray(blocks[key][:,_pyscf2gpr_l1_order,:,:]) + blocks[key] = np.ascontiguousarray(blocks[key][:,pyscf2gpr_l1_order,:,:]) if l2==1: - blocks[key] = np.ascontiguousarray(blocks[key][:,:,_pyscf2gpr_l1_order,:]) + blocks[key] = np.ascontiguousarray(blocks[key][:,:,pyscf2gpr_l1_order,:]) # Build tensor map tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals] diff --git a/qstack/mathutils/array.py b/qstack/mathutils/array.py index 35c42164..d2c9e1eb 100644 --- a/qstack/mathutils/array.py +++ b/qstack/mathutils/array.py @@ -1,6 +1,7 @@ """Array manipulation utility functions.""" import numpy as np +from qstack.tools import slice_generator def scatter(values, indices): @@ -89,9 +90,7 @@ def vstack_padding(xs): if len(np.unique(shapes_other_axes, axis=0))==1: return np.vstack(xs) X = np.zeros((shapes_common_axis.sum(), *shapes_other_axes.max(axis=0))) - idx = 0 - for x in xs: - slices = (np.s_[idx:idx+x.shape[0]], *(np.s_[0:s] for s in x.shape[1:])) + for x, s0 in slice_generator(xs, inc=lambda x: x.shape[0]): + slices = (s0, *(np.s_[0:s] for s in x.shape[1:])) X[slices] = x - idx += x.shape[0] return X diff --git a/qstack/regression/global_kernels.py b/qstack/regression/global_kernels.py index f3cb78e7..8ccfe052 100644 --- a/qstack/regression/global_kernels.py +++ b/qstack/regression/global_kernels.py @@ -8,6 +8,7 @@ from collections import Counter import numpy as np from tqdm import tqdm +from qstack.tools import slice_generator def get_global_K(X, Y, sigma, local_kernel, global_kernel, options): @@ -83,18 +84,15 @@ def get_covariance(mol1, mol2, species, max_atoms, max_size, kernel, sigma=None) numpy ndarray: Covariance matrix of shape (max_size, max_size). """ K_covar = np.zeros((max_size, max_size)) - idx = 0 - for q in species: + for q, slice_ in slice_generator(species, inc=lambda q: max_atoms[q]): n1 = len(mol1[q]) n2 = len(mol2[q]) q_size = max_atoms[q] if n1==0 or n2==0: - idx += q_size continue x1 = np.pad(mol1[q], ((0, q_size - n1),(0,0)), 'constant') x2 = np.pad(mol2[q], ((0, q_size - n2),(0,0)), 'constant') - K_covar[idx:idx+q_size, idx:idx+q_size] = kernel(x1, x2, sigma) - idx += q_size + K_covar[slice_, slice_] = kernel(x1, x2, sigma) return K_covar diff --git a/qstack/reorder.py b/qstack/reorder.py index 0b68bf50..f3ca5f4a 100644 --- a/qstack/reorder.py +++ b/qstack/reorder.py @@ -1,6 +1,14 @@ -"""Functions for reordering atomic orbitals between different conventions.""" +"""Functions for reordering atomic orbitals between different conventions. + +Provides: + pyscf2gpr_l1_order: indices to reorder l=1 orbitals from PySCF to GPR. +""" import numpy as np +from .tools import slice_generator + + +pyscf2gpr_l1_order = [1,2,0] def get_mrange(l): @@ -21,7 +29,7 @@ def get_mrange(l): return range(-l,l+1) -def _orca2gpr_idx(l, m): +def _orca2gpr_idx(l_slices, m): """Given a molecule returns a list of reordered indices to tranform Orca AO ordering into SA-GPR. In Orca, orbital ordering corresponds to: @@ -31,26 +39,22 @@ def _orca2gpr_idx(l, m): Additionally, Orca uses a different sign convention for |m|>=3. Args: - l (np.ndarray): Array of angular momentum quantum numbers. - m (np.ndarray): Array of magnetic quantum numbers. + l (np.ndarray): Array of angular momentum quantum numbers per shell. + m (np.ndarray): Array of magnetic quantum numbers per AO. Returns: tuple: Re-arranged indices array and sign array. """ - idx = np.arange(len(l)) - i=0 - while(i < len(idx)): - msize = 2*l[i]+1 - j = np.s_[i:i+msize] - idx[j] = np.concatenate((idx[j][::-2], idx[j][1::2])) - i += msize + idx = np.arange(len(m)) + for _l, s in l_slices: + idx[s] = np.concatenate((idx[s][::-2], idx[s][1::2])) signs = np.ones_like(idx) signs[np.where(np.abs(m)>=3)] = -1 # in pyscf order signs[idx] = signs # in orca order return idx, signs -def _pyscf2gpr_idx(l): +def _pyscf2gpr_idx(l_slices, m): """Given a molecule returns a list of reordered indices to tranform pyscf AO ordering into SA-GPR. In SA-GPR, orbital ordering corresponds to: @@ -60,18 +64,17 @@ def _pyscf2gpr_idx(l): Signs are the same in both conventions, so they are returned for compatibility. Args: - l (np.ndarray): Array of angular momentum quantum numbers. + l_slices (iterator): Iterator that yeilds (l: int, s: slice) per shell, where + l is angular momentum quantum number and s is the corresponding slice of size 2*l+1. + m (np.ndarray): Array of magnetic quantum numbers per AO. Returns: tuple: Re-arranged indices array and sign array. """ - idx = np.arange(len(l)) - i=0 - while(i < len(idx)): - msize = 2*l[i]+1 - if l[i]==1: - idx[i:i+3] = [i+1,i+2,i] - i += msize + idx = np.arange(len(m)) + for l, s in l_slices: + if l==1: + idx[s] = idx[s][pyscf2gpr_l1_order] return idx, np.ones_like(idx) @@ -94,23 +97,24 @@ def reorder_ao(mol, vector, src='pyscf', dest='gpr'): NotImplementedError: If the specified convention is not implemented. ValueError: If vector dimension is not 1 or 2. """ - def get_idx(l, m, convention): + def get_idx(L, m, convention): convention = convention.lower() + l_slices = slice_generator(L, inc=lambda l: 2*l+1) if convention == 'gpr': - return np.arange(len(l)), np.ones_like(l) + return np.arange(len(m)), np.ones_like(m) elif convention == 'pyscf': - return _pyscf2gpr_idx(l) + return _pyscf2gpr_idx(l_slices, m) elif convention == 'orca': - return _orca2gpr_idx(l, m) + return _orca2gpr_idx(l_slices, m) else: errstr = f'Conversion to/from the {convention} convention is not implemented' raise NotImplementedError(errstr) from .compound import basis_flatten - _, l, m = basis_flatten(mol, return_both=False) - idx_src, sign_src = get_idx(l, m, src) - idx_dest, sign_dest = get_idx(l, m, dest) + (_, _, m), L = basis_flatten(mol, return_both=False, return_shells=True) + idx_src, sign_src = get_idx(L, m, src) + idx_dest, sign_dest = get_idx(L, m, dest) if vector.ndim == 2: sign_src = np.einsum('i,j->ij', sign_src, sign_src) diff --git a/qstack/spahm/rho/compute_rho_spahm.py b/qstack/spahm/rho/compute_rho_spahm.py index 45be2f99..10619c94 100644 --- a/qstack/spahm/rho/compute_rho_spahm.py +++ b/qstack/spahm/rho/compute_rho_spahm.py @@ -3,7 +3,7 @@ import os import itertools import numpy as np -from qstack.tools import correct_num_threads +from qstack.tools import correct_num_threads, slice_generator from . import utils, dmb_rep_bond as dmbb from . import dmb_rep_atom as dmba from .utils import defaults @@ -191,24 +191,21 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= ], dtype=object) else: - natm_tot = sum(len(elems) for elems in all_atoms) - allvec_new = np.empty_like(allvec, shape=(len(omods), natm_tot, maxlen)) - atm_i = 0 - for mol_i, elems in enumerate(all_atoms): - allvec_new[:, atm_i:atm_i+len(elems), :] = allvec[:, mol_i, :len(elems), :] - atm_i += len(elems) + all_atoms_list = list(itertools.chain.from_iterable(all_atoms)) + allvec_new = np.empty_like(allvec, shape=(len(omods), len(all_atoms_list), maxlen)) + for (mol_i, elems), slice_i in slice_generator([*enumerate(all_atoms)], inc=lambda x: len(x[1])): + allvec_new[:, slice_i, :] = allvec[:, mol_i, :len(elems), :] allvec = allvec_new del allvec_new - all_atoms = list(itertools.chain.from_iterable(all_atoms)) if merge: allvec = np.hstack(allvec) if with_symbols: - allvec = np.array(list(zip(all_atoms, allvec, strict=True)), dtype=object) + allvec = np.array(list(zip(all_atoms_list, allvec, strict=True)), dtype=object) else: if with_symbols: allvec = np.array([ - np.array(list(zip(all_atoms, modvec, strict=True)), dtype=object) + np.array(list(zip(all_atoms_list, modvec, strict=True)), dtype=object) for modvec in allvec ], dtype=object) diff --git a/qstack/tools.py b/qstack/tools.py index f2d2125e..480cfee9 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -101,9 +101,20 @@ def remove_argument(self, arg): return -def slice_generator(iterable, inc=lambda x: x, initial=0): +def slice_generator(iterable, inc=lambda x: x, i0=0): + """Generates slices for elements in an iterable based on increments. + + Args: + iterable (iterable): Iterable of elements to generate slices for. + inc (callable: int->int): Function that computes increment size for each element. + Defaults to identity function. + i0 (int): Initial starting index. Defaults to 0. + + Yields: + tuple: (element, slice) pairs for each element in the iterable. + """ func = func=lambda total, elem: total+inc(elem) - starts = itertools.accumulate(iterable, func=func, initial=initial) + starts = itertools.accumulate(iterable, func=func, initial=i0) starts_ends = itertools.pairwise(starts) for elem, (start, end) in zip(iterable, starts_ends, strict=True): yield elem, np.s_[start:end] From 641ba72e10d89c42e9c7041f01962cd43b28483f Mon Sep 17 00:00:00 2001 From: Ksenia Date: Sun, 9 Nov 2025 22:03:38 +0100 Subject: [PATCH 20/23] Add cursor class --- qstack/compound.py | 18 ++++----- qstack/equio.py | 28 ++++++-------- qstack/fields/dm.py | 17 +++++---- qstack/orcaio.py | 6 +-- qstack/spahm/rho/Dmatrix.py | 8 ++-- qstack/tools.py | 73 +++++++++++++++++++++++++++++++++++++ 6 files changed, 106 insertions(+), 44 deletions(-) diff --git a/qstack/compound.py b/qstack/compound.py index cf32dd4c..edb4096c 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -9,6 +9,7 @@ from qstack.reorder import get_mrange from qstack.mathutils.array import stack_padding from qstack.mathutils.rotation_matrix import rotate_euler +from qstack.tools import Cursor # detects a charge-spin line, containing only two ints (one positive or negative, the other positive and nonzero) @@ -319,7 +320,7 @@ def singleatom_basis_enumerator(basis): ao_starts = [] l_per_bas = [] n_per_bas = [] - cursor = 0 + cursor = Cursor(action='ranger') cursor_per_l = [] for bas in basis: # shape of `bas`, l, then another optional constant, then lists [exp, coeff, coeff, coeff] @@ -327,17 +328,12 @@ def singleatom_basis_enumerator(basis): # and the number of primitive gaussians (one per list) l = bas[0] while len(cursor_per_l) <= l: - cursor_per_l.append(0) - + cursor_per_l.append(Cursor(action='ranger')) n_count = len(bas[-1])-1 - n_start = cursor_per_l[l] - cursor_per_l[l] += n_count - l_per_bas += [l] * n_count - n_per_bas.extend(range(n_start, n_start+n_count)) + n_per_bas.extend(cursor_per_l[l].add(n_count)) msize = 2*l+1 - ao_starts.extend(range(cursor, cursor+msize*n_count, msize)) - cursor += msize*n_count + ao_starts.extend(cursor.add(msize*n_count)[::msize]) return l_per_bas, n_per_bas, ao_starts @@ -365,7 +361,7 @@ def basis_flatten(mol, return_both=True, return_shells=False): x = [] L = [] y = np.zeros((3, mol.nao), dtype=int) - i = 0 + i = Cursor(action='slicer') a = mol.bas_exps() for iat in range(mol.natm): for bas_id in mol.atom_shell_ids(iat): @@ -377,7 +373,7 @@ def basis_flatten(mol, return_both=True, return_shells=False): for c in cs.T: ac = np.array([a[bas_id], c]) x.extend([ac]*msize) - y[:,i.add(msize*n)] = np.vstack((np.array([[iat, l]]*msize*n).T, [*get_mrange(l)]*n)) + y[:,i(msize*n)] = np.vstack((np.array([[iat, l]]*msize*n).T, [*get_mrange(l)]*n)) if return_shells: L.extend([l]*n) diff --git a/qstack/equio.py b/qstack/equio.py index 3dc8f422..9d95640b 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -115,26 +115,24 @@ def vector_to_tensormap(mol, c): # Fill in the blocks iq = dict.fromkeys(llists.keys(), 0) - i = 0 + i = Cursor(action='slicer') for q in atom_charges: if llists[q]==sorted(llists[q]): for l in set(llists[q]): msize = 2*l+1 nsize = blocks[(l,q)].shape[-1] - cslice = c[i:i+nsize*msize].reshape(nsize,msize).T + cslice = c[i(nsize*msize)].reshape(nsize,msize).T if l==1: # for l=1, the pyscf order is x,y,z (1,-1,0) cslice = cslice[pyscf2gpr_l1_order] blocks[(l,q)][iq[q],:,:] = cslice - i += msize*nsize else: il = dict.fromkeys(range(max(llists[q]) + 1), 0) for l in llists[q]: msize = 2*l+1 - cslice = c[i:i+msize] + cslice = c[i(msize)] if l==1: # for l=1, the pyscf order is x,y,z (1,-1,0) cslice = cslice[pyscf2gpr_l1_order] blocks[(l,q)][iq[q],:,il[l]] = cslice - i += msize il[l] += 1 iq[q] += 1 @@ -242,48 +240,44 @@ def matrix_to_tensormap(mol, dm): if all(llists[q]==sorted(llists[q]) for q in llists): iq1 = dict.fromkeys(elements, 0) - i1 = 0 + i1 = Cursor(action='slicer') for iat1, q1 in enumerate(atom_charges): for l1 in set(llists[q1]): msize1 = 2*l1+1 nsize1 = llists[q1].count(l1) iq2 = dict.fromkeys(elements, 0) - i2 = 0 + i1.add(nsize1*msize1) + i2 = Cursor(action='slicer') for iat2, q2 in enumerate(atom_charges): for l2 in set(llists[q2]): msize2 = 2*l2+1 nsize2 = llists[q2].count(l2) - dmslice = dm[i1:i1+nsize1*msize1,i2:i2+nsize2*msize2].reshape(nsize1,msize1,nsize2,msize2) + dmslice = dm[i1(),i2(nsize2*msize2)].reshape(nsize1,msize1,nsize2,msize2) dmslice = np.transpose(dmslice, axes=[1,3,0,2]).reshape(msize1,msize2,-1) block = tensor_blocks[tm_label_vals.index((l1,l2,q1,q2))] at_p = block.samples.position((iat1,iat2)) blocks[(l1,l2,q1,q2)][at_p,:,:,:] = dmslice - i2 += msize2*nsize2 iq2[q2] += 1 - i1 += msize1*nsize1 iq1[q1] += 1 else: iq1 = dict.fromkeys(elements, 0) - i1 = 0 + i1 = Cursor(action='slicer') for iat1, q1 in enumerate(atom_charges): il1 = dict.fromkeys(range(max(llists[q1]) + 1), 0) for l1 in llists[q1]: - msize1 = 2*l1+1 + i1.add(2*l1+1) iq2 = dict.fromkeys(elements, 0) - i2 = 0 + i2 = Cursor(action='slicer') for iat2, q2 in enumerate(atom_charges): il2 = dict.fromkeys(range(max(llists[q2]) + 1), 0) for l2 in llists[q2]: - msize2 = 2*l2+1 - dmslice = dm[i1:i1+msize1,i2:i2+msize2] + dmslice = dm[i1(),i2(2*l2+1)] block = tensor_blocks[tm_label_vals.index((l1, l2, q1, q2))] at_p = block.samples.position((iat1, iat2)) n_p = block.properties.position((il1[l1], il2[l2])) blocks[(l1,l2,q1,q2)][at_p,:,:,n_p] = dmslice - i2 += msize2 il2[l2] += 1 iq2[q2] += 1 - i1 += msize1 il1[l1] += 1 iq1[q1] += 1 diff --git a/qstack/fields/dm.py b/qstack/fields/dm.py index a79575ef..b1d0b95a 100644 --- a/qstack/fields/dm.py +++ b/qstack/fields/dm.py @@ -1,8 +1,9 @@ """Density matrix manipulation and analysis functions.""" +import numpy as np from pyscf import dft from qstack import constants -import numpy as np +from qstack.tools import Cursor def get_converged_mf(mol, xc, dm0=None, verbose=False): @@ -79,27 +80,27 @@ def sphericalize_density_matrix(mol, dm): A numpy ndarray with the sphericalized density matrix. """ idx_by_l = [[] for i in range(constants.MAX_L)] - i0 = 0 + i0 = Cursor(action='ranger') for ib in range(mol.nbas): l = mol.bas_angular(ib) + msize = 2*l+1 nc = mol.bas_nctr(ib) - i1 = i0 + nc * (l*2+1) - idx_by_l[l].extend(range(i0, i1, l*2+1)) - i0 = i1 + idx_by_l[l].extend(i0(nc*msize)[::msize]) spherical_dm = np.zeros_like(dm) for l in np.nonzero(idx_by_l)[0]: + msize = 2*l+1 for idx in idx_by_l[l]: for jdx in idx_by_l[l]: if l == 0: spherical_dm[idx,jdx] = dm[idx,jdx] else: trace = 0 - for m in range(2*l+1): + for m in range(msize): trace += dm[idx+m,jdx+m] - for m in range(2*l+1): - spherical_dm[idx+m,jdx+m] = trace / (2*l+1) + for m in range(msize): + spherical_dm[idx+m,jdx+m] = trace / msize return spherical_dm diff --git a/qstack/orcaio.py b/qstack/orcaio.py index 21709c5d..61a4fa4e 100644 --- a/qstack/orcaio.py +++ b/qstack/orcaio.py @@ -8,6 +8,7 @@ import pyscf from qstack.mathutils.matrix import from_tril from qstack.reorder import reorder_ao +from qstack.tools import Cursor def read_input(fname, basis, ecp=None): @@ -205,10 +206,9 @@ def _get_indices(mol, ls_from_orca): indices_full = np.arange(mol.nao) for iat, ls in ls_from_orca.items(): indices = [] - i = 0 + i = Cursor(action='ranger') for il, l in enumerate(ls): - indices.append((l, il, i + np.arange(2*l+1))) - i += 2*l+1 + indices.append((l, il, i(2*l+1))) indices = sorted(indices, key=lambda x: (x[0], x[1])) indices = np.array([j for i in indices for j in i[2]]) atom_slice = np.s_[ao_limits[iat][0]:ao_limits[iat][1]] diff --git a/qstack/spahm/rho/Dmatrix.py b/qstack/spahm/rho/Dmatrix.py index dba38772..b77f38ff 100644 --- a/qstack/spahm/rho/Dmatrix.py +++ b/qstack/spahm/rho/Dmatrix.py @@ -2,6 +2,7 @@ import numpy as np from numpy import sqrt +from qstack.tools import Cursor def c_split(mol, c): @@ -19,14 +20,11 @@ def c_split(mol, c): coefficients is the subset of c for that angular momentum shell. """ cs = [] - i0 = 0 + slicer = Cursor(inc=lambda l: 2*l+1, action='slicer') for at0, at1 in mol.aoslice_by_atom()[:,:2]: for b in range(at0, at1): l = mol.bas_angular(b) - msize = 2*l+1 - for _n in range(mol.bas_nctr(b)): - cs.append([l, c[i0:i0+msize]]) - i0 += msize + cs.extend([[l, c[slicer(l)]] for _n in range(mol.bas_nctr(b))]) return cs diff --git a/qstack/tools.py b/qstack/tools.py index 480cfee9..2fcc06b9 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -118,3 +118,76 @@ def slice_generator(iterable, inc=lambda x: x, i0=0): starts_ends = itertools.pairwise(starts) for elem, (start, end) in zip(iterable, starts_ends, strict=True): yield elem, np.s_[start:end] + + +class Cursor: + """Cursor class to manage dynamic indexing. + + Args: + action (str): Type of indexing action ('slicer' or 'ranger'). + inc (callable: int->int): Function to determine increment size. + Defaults to identity function. + i0 (int): Initial index position. Defaults to 0. + + Attributes: + i (int): Current index position. + i_prev (int): Previous index position. + cur (range or slice: Current range or slice. + inc (callable int->int): Increment function. + + Methods: + add(di): Advances the cursor by increment and returns current range/slice. + __call__(di=None): Advances the cursor if di is not None, + returns current range/slice. + """ + def __init__(self, action='slicer', inc=lambda x: x, i0=0): + self.i = i0 + self.i_prev = None + self.inc = inc + self.cur = None + self.action = action + self.actions_dict = {'slicer': self._slicer, 'ranger': self._ranger} + + def add(self, di): + """Advances the cursor and returns the current range or slice. + Args: + di: Element to determine increment size. + Returns: + Current range or slice after advancing. + """ + self._add(di) + self.cur = self.actions_dict[self.action]() + return self.cur + + def _ranger(self): + return range(self.i_prev, self.i) + + def _slicer(self): + return np.s_[self.i_prev:self.i] + + def _add(self, di): + self.i_prev = self.i + self.i += self.inc(di) + + def __call__(self, di=None): + """Optionally advance the cursor and return the current range or slice. + + If the argument is passed, it is used to advance the cursor. + If not, the current value is returned. + + Args: + di (optional): Element to determine increment size. + + Returns: + Current range or slice (after advancing). + """ + if di is None: + return self.cur + else: + return self.add(di) + + def __str__(self): + return str(self.i) + + def __repr__(self): + return str(self.i) From 998ccaa42eadc202f847914b9b40e6845e3729d0 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Mon, 10 Nov 2025 18:16:50 +0100 Subject: [PATCH 21/23] Fix docstring using ruff --- qstack/basis_opt/basis_tools.py | 4 +- qstack/basis_opt/opt.py | 4 +- qstack/compound.py | 17 +++--- qstack/constants.py | 3 +- qstack/equio.py | 8 +-- qstack/fields/decomposition.py | 12 ++-- qstack/fields/density2file.py | 12 ++-- qstack/fields/dm.py | 6 +- qstack/fields/dori.py | 14 ++--- qstack/fields/excited.py | 14 ++--- qstack/fields/hf_otpd.py | 8 +-- qstack/fields/hirshfeld.py | 6 +- qstack/fields/moments.py | 29 ++++++---- qstack/mathutils/array.py | 2 +- qstack/mathutils/rotation_matrix.py | 8 +-- qstack/mathutils/wigner.py | 11 ++-- qstack/mathutils/xyz_integrals_float.py | 4 +- qstack/mathutils/xyz_integrals_sym.py | 6 +- qstack/orcaio.py | 3 +- qstack/qml/b2r2.py | 18 +++--- qstack/qml/slatm.py | 17 +++--- qstack/regression/condition.py | 5 +- qstack/regression/cross_validate_results.py | 4 +- qstack/regression/final_error.py | 5 +- qstack/regression/global_kernels.py | 14 ++--- qstack/regression/hyperparameters.py | 7 ++- qstack/regression/kernel.py | 2 +- qstack/regression/kernel_utils.py | 35 +++++------- qstack/regression/local_kernels.py | 13 +++-- qstack/regression/oos.py | 2 +- qstack/regression/parser.py | 27 ++++++++- qstack/regression/regression.py | 5 +- qstack/reorder.py | 3 +- qstack/spahm/LB2020guess.py | 33 +++++------ qstack/spahm/compute_spahm.py | 10 ++-- qstack/spahm/guesses.py | 30 +++++----- qstack/spahm/rho/Dmatrix.py | 13 ++--- qstack/spahm/rho/atom.py | 4 +- qstack/spahm/rho/atomic_density.py | 2 +- qstack/spahm/rho/bond.py | 4 +- qstack/spahm/rho/bond_selected.py | 6 +- qstack/spahm/rho/compute_rho_spahm.py | 8 +-- qstack/spahm/rho/dmb_rep_atom.py | 22 +++++--- qstack/spahm/rho/dmb_rep_bond.py | 27 ++++----- qstack/spahm/rho/lowdin.py | 6 +- qstack/spahm/rho/sym.py | 26 +++++---- qstack/spahm/rho/utils.py | 39 +++++++------ qstack/tools.py | 17 +++--- ruff.toml | 61 ++++++++++++++++++++- 49 files changed, 373 insertions(+), 263 deletions(-) diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py index 8afb57e6..5f6fcaef 100644 --- a/qstack/basis_opt/basis_tools.py +++ b/qstack/basis_opt/basis_tools.py @@ -6,7 +6,7 @@ def energy_mol(newbasis, moldata): - """Computes loss function (fitting error) for one molecule. + """Compute loss function (fitting error) for one molecule. Args: newbasis (dict): Basis set. @@ -32,7 +32,7 @@ def energy_mol(newbasis, moldata): def gradient_mol(nexp, newbasis, moldata): - """Computes loss function and gradient for one molecule. + """Compute loss function and gradient for one molecule. Args: nexp (int): Number of exponents. diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index 3cfd9cbc..f0140743 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -12,7 +12,7 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in="CG", printlvl=2, check=False): - """ Optimize a given basis set. + """Optimize a given basis set. Args: elements_in (str): List of elements to optimize. If None, optimize all elements in the basis. @@ -233,7 +233,7 @@ def make_moldata(fname): def main(): - """Main function for basis set optimization command-line interface.""" + """Run basis set optimization via command-line interface.""" parser = argparse.ArgumentParser(description='Optimize a density fitting basis set.') parser.add_argument('--elements', type=str, dest='elements', nargs='+', help='elements for optimization') parser.add_argument('--basis', type=str, dest='basis', nargs='+', help='initial df bases', required=True) diff --git a/qstack/compound.py b/qstack/compound.py index edb4096c..2c6470de 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -27,7 +27,7 @@ def xyz_comment_line_parser(line): - """Reads the 'comment' line of a XYZ file and tries to infer its meaning. + """Read the 'comment' line of a XYZ file and tries to infer its meaning. Args: line (str): Comment line from XYZ file. @@ -91,7 +91,7 @@ def xyz_comment_line_parser(line): def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit=None, ecp=None, parse_comment=False): - """Reads a molecular file in xyz format and returns a pyscf Mole object. + """Read a molecular file in xyz format and returns a pyscf Mole object. Args: inp (str): Path of the xyz file to read, or xyz file contents. @@ -170,7 +170,7 @@ def xyz_to_mol(inp, basis="def2-svp", charge=None, spin=None, ignore=False, unit def mol_to_xyz(mol, fout, fmt="xyz"): - """Converts a pyscf Mole object into a molecular file in xyz format. + """Convert a pyscf Mole object into a molecular file in xyz format. Args: mol (pyscf.gto.Mole): pyscf Mole object. @@ -199,7 +199,7 @@ def mol_to_xyz(mol, fout, fmt="xyz"): def make_auxmol(mol, basis, copy_ecp=False): - """Builds an auxiliary Mole object given a basis set and a pyscf Mole object. + """Build an auxiliary Mole object given a basis set and a pyscf Mole object. Args: mol (pyscf.gto.Mole): Original pyscf Mole object. @@ -245,7 +245,7 @@ def rotate_molecule(mol, a, b, g, rad=False): def fragments_read(frag_file): - """Loads fragment definition from a file. + """Load fragment definition from a file. Args: frag_file (str): Path to the fragment file containing space-separated atom indices (1-based). @@ -259,7 +259,7 @@ def fragments_read(frag_file): def fragment_partitioning(fragments, prop_atom_inp, normalize=True): - """Computes the contribution of each fragment. + """Compute the contribution of each fragment. Args: fragments (list): Fragment definition as list of numpy arrays. @@ -304,7 +304,7 @@ def make_atom(q, basis, ecp=None): def singleatom_basis_enumerator(basis): - """Enumerates the different tensors of atomic orbitals within a 1-atom basis set. + """Enumerate the different tensors of atomic orbitals within a 1-atom basis set. Each tensor is a 2l+1-sized group of orbitals that share a radial function and l value. @@ -338,11 +338,12 @@ def singleatom_basis_enumerator(basis): def basis_flatten(mol, return_both=True, return_shells=False): - """Flattens a basis set definition for AOs. + """Flatten a basis set definition for AOs. Args: mol (pyscf.gto.Mole): pyscf Mole object. return_both (bool): Whether to return both AO info and primitive Gaussian info. Defaults to True. + return_shells (bool): Whether to return angular momenta per shell. Defaults to False. Returns: - numpy.ndarray: 3×mol.nao int array where each column corresponds to an AO and rows are: diff --git a/qstack/constants.py b/qstack/constants.py index b5d7176d..5f3affc2 100644 --- a/qstack/constants.py +++ b/qstack/constants.py @@ -1,5 +1,4 @@ -""" -NIST physical constants and unit conversion +"""NIST physical constants and unit conversion. https://physics.nist.gov/cuu/Constants/ https://physics.nist.gov/cuu/Constants/Table/allascii.txt diff --git a/qstack/equio.py b/qstack/equio.py index 9d95640b..aa025aee 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -42,7 +42,7 @@ def _get_llist(mol): def _get_tsize(tensor): - """Computes the size of a tensor. + """Compute the size of a tensor. Args: tensor (metatensor.TensorMap): Tensor. @@ -54,7 +54,7 @@ def _get_tsize(tensor): def _labels_to_array(labels): - """Represents a set of metatensor labels as an array. + """Represent a set of metatensor labels as an array. Args: labels (metatensor.Labels): Labels object. @@ -68,7 +68,7 @@ def _labels_to_array(labels): def vector_to_tensormap(mol, c): - """Transforms an vector into a tensor map. + """Transform an vector into a tensor map. Each element of the vector corresponds to an atomic orbital of the molecule. @@ -181,7 +181,7 @@ def tensormap_to_vector(mol, tensor): def matrix_to_tensormap(mol, dm): - """Transforms a matrix into a tensor map. + """Transform a matrix into a tensor map. Each element of the matrix corresponds to a pair of atomic orbitals. diff --git a/qstack/fields/decomposition.py b/qstack/fields/decomposition.py index 38c91179..91ba4b2a 100644 --- a/qstack/fields/decomposition.py +++ b/qstack/fields/decomposition.py @@ -27,7 +27,7 @@ def decompose(mol, dm, auxbasis): def get_integrals(mol, auxmol): - """Computes overlap integrals and 2-/3-centers ERI matrices. + """Compute overlap integrals and 2-/3-centers ERI matrices. Args: mol (pyscf Mole): pyscf Mole object used for the computation of the density matrix. @@ -48,7 +48,7 @@ def get_integrals(mol, auxmol): def get_self_repulsion(mol_or_mf, dm): - r"""Computes the self-repulsion of the density. + r"""Compute the self-repulsion of the density. \int \int \rho_DM(r1) 1/|r1-r2| \rho_DM(r2) dr1 dr2 @@ -67,7 +67,7 @@ def get_self_repulsion(mol_or_mf, dm): def optimal_decomposition_error(self_repulsion, c, eri2c): - r"""Computes the decomposition error for optimal density fitting. + r"""Compute the decomposition error for optimal density fitting. \int \int \rho_DM(r1) 1/|r1-r2| \rho_DF(r2) dr1 dr2 @@ -87,7 +87,7 @@ def optimal_decomposition_error(self_repulsion, c, eri2c): def decomposition_error(self_repulsion, c, eri2c, eri3c, dm): - r"""Computes the decomposition error for optimal density fitting. + r"""Compute the decomposition error for optimal density fitting. \int \int \rho_DM(r1) 1/|r1-r2| \rho_DF(r2) dr1 dr2 @@ -110,7 +110,7 @@ def decomposition_error(self_repulsion, c, eri2c, eri3c, dm): def get_coeff(dm, eri2c, eri3c, slices=None): - """Computes the density expansion coefficients. + """Compute the density expansion coefficients. Args: dm (numpy ndarray): Density matrix. @@ -143,7 +143,7 @@ def get_coeff(dm, eri2c, eri3c, slices=None): def _get_inv_metric(mol, metric, v): - """Computes the inverse metric applied to a vector. + """Compute the inverse metric applied to a vector. Args: mol (pyscf Mole): pyscf Mole object. diff --git a/qstack/fields/density2file.py b/qstack/fields/density2file.py index e6a72938..686f70a1 100644 --- a/qstack/fields/density2file.py +++ b/qstack/fields/density2file.py @@ -8,7 +8,7 @@ def coeffs_to_cube(mol, coeffs, cubename, nx=80, ny=80, nz=80, resolution=0.1, margin=3.0): - """Saves the electron density to a cube file. + """Save the electron density to a cube file. Args: mol (pyscf Mole): pyscf Mole object. @@ -20,8 +20,8 @@ def coeffs_to_cube(mol, coeffs, cubename, nx=80, ny=80, nz=80, resolution=0.1, m resolution (float): Grid spacing in Bohr. Defaults to 0.1. margin (float): Extra space around molecule in Bohr. Defaults to 3.0. - Returns: - None: Creates a file named .cube on disk. + Output: + Creates a file named .cube on disk. """ grid = Cube(mol, nx, ny, nz, resolution, margin) coords = grid.get_coords() @@ -32,15 +32,15 @@ def coeffs_to_cube(mol, coeffs, cubename, nx=80, ny=80, nz=80, resolution=0.1, m def coeffs_to_molden(mol, coeffs, moldenname): - """Saves the electron density to a MOLDEN file. + """Save the electron density to a MOLDEN file. Args: mol (pyscf Mole): pyscf Mole object. coeffs (numpy ndarray): 1D array of density expansion coefficients. moldenname (str): Output filename for the MOLDEN file. - Returns: - None: Creates a file named .molden on disk. + Output: + Creates a file named .molden on disk. """ with open(moldenname, 'w') as f: N = moments.r2_c(mol, coeffs, moments=[0])[0] diff --git a/qstack/fields/dm.py b/qstack/fields/dm.py index b1d0b95a..2b77c863 100644 --- a/qstack/fields/dm.py +++ b/qstack/fields/dm.py @@ -7,7 +7,7 @@ def get_converged_mf(mol, xc, dm0=None, verbose=False): - """Performs SCF calculation. + """Perform SCF calculation. Args: mol (pyscf Mole): pyscf Mole object. @@ -40,7 +40,7 @@ def get_converged_mf(mol, xc, dm0=None, verbose=False): def get_converged_dm(mol, xc, verbose=False): - """Wrapper around get_converged_mf to get the DM. + """Get a converged density matrix. Args: mol (pyscf Mole): pyscf Mole object. @@ -54,7 +54,7 @@ def get_converged_dm(mol, xc, verbose=False): def make_grid_for_rho(mol, grid_level=3): - """Generates a grid of real space coordinates and weights for integration. + """Generate a grid of real space coordinates and weights for integration. Args: mol (pyscf Mole): pyscf Mole object. diff --git a/qstack/fields/dori.py b/qstack/fields/dori.py index a15d96bd..76abddfa 100644 --- a/qstack/fields/dori.py +++ b/qstack/fields/dori.py @@ -8,7 +8,7 @@ def eval_rho_dm(mol, ao, dm, deriv=2): - """Calculates electron density and its derivatives from a density matrix. + """Compute electron density and its derivatives from a density matrix. Modified from pyscf/dft/numint.py to return full second derivative matrices needed for DORI calculations. @@ -53,7 +53,7 @@ def eval_rho_dm(mol, ao, dm, deriv=2): def eval_rho_df(ao, c, deriv=2): - """Calculates electron density and its derivatives from density-fitting coefficients. + """Compute electron density and its derivatives from density-fitting coefficients. Args: ao (numpy ndarray): 3D array of shape (*, ngrids, nao) where: @@ -83,7 +83,7 @@ def eval_rho_df(ao, c, deriv=2): def compute_rho(mol, coords, dm=None, c=None, deriv=2, eps=1e-4): - """Wrapper to calculate electron density and derivatives efficiently. + """Calculate electron density and derivatives efficiently. Computes density and its spatial derivatives on a grid from either a density matrix or fitting coefficients, with optimizations for numerical stability. @@ -132,7 +132,7 @@ def compute_rho(mol, coords, dm=None, c=None, deriv=2, eps=1e-4): def compute_s2rho(rho, d2rho_dr2, eps=1e-4): - """Computes signed density based on second eigenvalue of the density Hessian. + """Compute signed density based on second eigenvalue of the density Hessian. Useful for distinguishing bonding vs. non-bonding regions. The sign of the second eigenvalue of the Hessian indicates local density topology. @@ -153,7 +153,7 @@ def compute_s2rho(rho, d2rho_dr2, eps=1e-4): def compute_dori(rho, drho_dr, d2rho_dr2, eps=1e-4): - """Computes Density Overlap Regions Indicator (DORI) analytically. + """Compute Density Overlap Regions Indicator (DORI) analytically. Args: rho (numpy ndarray): 1D array (ngrids,) of electron density. @@ -181,7 +181,7 @@ def compute_dori(rho, drho_dr, d2rho_dr2, eps=1e-4): def compute_dori_num(mol, coords, dm=None, c=None, eps=1e-4, dx=1e-4): - """Computes DORI using numerical differentiation (semi-numerical approach). + """Compute DORI using numerical differentiation (semi-numerical approach). Alternative to analytical DORI calculation using finite differences for derivatives of k², where k=dρ/dr. Useful for validation or when analytical @@ -229,7 +229,7 @@ def compute_k2(coords, mol=None, dm=None, c=None): def dori_on_grid(mol, coords, dm=None, c=None, eps=1e-4, alg='analytical', mem=1, dx=1e-4, progress=False): - """Computes DORI on a user-specified grid with memory management. + """Compute DORI on a user-specified grid with memory management. Main computational function for DORI evaluation. Handles large grids by chunking based on available memory. diff --git a/qstack/fields/excited.py b/qstack/fields/excited.py index 79d43799..c52bb771 100644 --- a/qstack/fields/excited.py +++ b/qstack/fields/excited.py @@ -5,7 +5,7 @@ def get_cis(mf, nstates): - """Wrapper for CIS (Configuration interaction singles) / TDA (Tamm-Dancoff approximation) computation. + """Run a CIS (Configuration interaction singles) / TDA (Tamm-Dancoff approximation) computation. Args: mf: Pyscf mean-field object. @@ -23,7 +23,7 @@ def get_cis(mf, nstates): def get_cis_tdm(td): - """Extracts transition density matrices from TDA/CIS calculation. + """Extract transition density matrices from TDA/CIS calculation. Args: td: TDA/CIS calculation object containing excitation amplitudes. @@ -35,7 +35,7 @@ def get_cis_tdm(td): def get_holepart(mol, x, coeff): - """Computes the hole and particle density matrices (in AO basis) for a selected state. + """Compute the hole and particle density matrices (in AO basis) for a selected state. Args: mol (pyscf Mole): pyscf Mole object. @@ -56,7 +56,7 @@ def mo2ao(mat, coeff): def get_transition_dm(mol, x_mo, coeff): - """Computes the transition density matrix for a selected state. + """Compute the transition density matrix for a selected state. Args: mol (pyscf Mole): pyscf Mole object. @@ -72,7 +72,7 @@ def get_transition_dm(mol, x_mo, coeff): def exciton_properties_c(mol, hole, part): - """Computes the decomposed/predicted hole-particle distance, the hole size, and the particle size. + """Compute the decomposed/predicted hole-particle distance, the hole size, and the particle size. Args: mol (pyscf Mole): pyscf Mole object. @@ -95,7 +95,7 @@ def exciton_properties_c(mol, hole, part): def exciton_properties_dm(mol, hole, part): - """Computes the ab initio hole-particle distance, the hole size, and the particle size. + """Compute the ab initio hole-particle distance, the hole size, and the particle size. Args: mol (pyscf Mole): pyscf Mole object. @@ -124,7 +124,7 @@ def exciton_properties_dm(mol, hole, part): def exciton_properties(mol, hole, part): - """Computes the ab initio or decomposed/predicted hole-particle distance, the hole size, and the particle size. + """Compute the ab initio or decomposed/predicted hole-particle distance, the hole size, and the particle size. Distance is defined as |_hole - _part|, and size as sqrt( - ^2). diff --git a/qstack/fields/hf_otpd.py b/qstack/fields/hf_otpd.py index e8a7e292..850cfb79 100644 --- a/qstack/fields/hf_otpd.py +++ b/qstack/fields/hf_otpd.py @@ -6,7 +6,7 @@ def hf_otpd(mol, dm, grid_level=3, save_otpd=False, return_all=False): - """Computes the Hartree-Fock uncorrelated on-top pair density (OTPD) on a grid. + """Compute the Hartree-Fock uncorrelated on-top pair density (OTPD) on a grid. The on-top pair density is the probability density of finding two electrons at the same position. For Hartree-Fock, this is computed as (rho/2)^2. @@ -38,7 +38,7 @@ def hf_otpd(mol, dm, grid_level=3, save_otpd=False, return_all=False): def save_OTPD(mol, otpd, grid): - """Saves on-top pair density computation results to a NumPy compressed file. + """Save on-top pair density computation results to a NumPy compressed file. Creates a .npz file containing the molecular structure, OTPD values, grid coordinates, and integration weights for later analysis. @@ -48,8 +48,8 @@ def save_OTPD(mol, otpd, grid): otpd (numpy ndarray): 1D array of on-top pair density values on the grid. grid (pyscf Grid): Grid object containing coordinates and weights. - Returns: - None: Creates a file named _otpd_data.npz on disk. + Output: + Creates a file named _otpd_data.npz on disk. """ output = ''.join(mol.elements)+"_otpd_data" np.savez(output, atom=mol.atom, rho=otpd, coords=grid.coords, weights=grid.weights) diff --git a/qstack/fields/hirshfeld.py b/qstack/fields/hirshfeld.py index ffaa64ec..81038c65 100644 --- a/qstack/fields/hirshfeld.py +++ b/qstack/fields/hirshfeld.py @@ -6,7 +6,7 @@ def spherical_atoms(elements, atm_bas): - """Computes density matrices for spherically averaged isolated atoms. + """Compute density matrices for spherically averaged isolated atoms. For each element, creates an isolated atom calculation with appropriate spin and computes its density matrix using atomic Hartree-Fock initial guess. @@ -26,7 +26,7 @@ def spherical_atoms(elements, atm_bas): def _hirshfeld_weights(mol, grid_coord, atm_dm, atm_bas, dominant): - """Computes Hirshfeld partitioning weights for each atom at grid points. + """Compute Hirshfeld partitioning weights for each atom at grid points. Hirshfeld partitioning divides the molecular density among atoms based on their promolecular (free atom) densities. Dominant partitioning assigns @@ -69,7 +69,7 @@ def _hirshfeld_weights(mol, grid_coord, atm_dm, atm_bas, dominant): def hirshfeld_charges(mol, cd, dm_atoms=None, atm_bas=None, dominant=True, occupations=False, grid_level=3): - """Computes atomic charges or occupations using Hirshfeld partitioning. + """Compute atomic charges or occupations using Hirshfeld partitioning. Partitions the molecular electron density among atoms using Hirshfeld weights based on free atom densities. Can work with either density-fitting coefficients diff --git a/qstack/fields/moments.py b/qstack/fields/moments.py index a5e6bfc3..f784d6a2 100644 --- a/qstack/fields/moments.py +++ b/qstack/fields/moments.py @@ -6,7 +6,7 @@ def first(mol, rho): - r"""Wrapper to compute the first moment of a molecular density needed for dipole moments. + r"""Compute the first moment of a molecular density needed for dipole moments. $$\int r \rho(r) dr$$ @@ -16,6 +16,9 @@ def first(mol, rho): Returns: numpy ndarray: Electronic dipole moment vector (3 components). + + Raises: + RuntimeError: If `rho` is not 1D or 2D. """ if rho.ndim==1: return r2_c(mol, rho, moments=(1,))[0] @@ -26,7 +29,7 @@ def first(mol, rho): def r_dm(mol, dm): - """Computes the first moment of a density matrix. + """Compute the first moment of a density matrix. Args: mol (pyscf Mole): pyscf Mole object. @@ -42,24 +45,25 @@ def r_dm(mol, dm): def r2_c(mol, rho, moments=(0,1,2), per_atom=False): - """Compute the zeroth ( :math:`<1>` ), first ( :math:`` ), and second ( :math:``) moments of a fitted density. + r"""Compute the zeroth ( :math:`<1>` ), first ( :math:`` ), and second ( :math:``) moments of a fitted density. .. math:: - <1> = \\int \\rho d r - \\quad + <1> = \int \rho d r + \quad ; - \\quad - = \\int \\hat{r} \\rho d r - \\quad + \quad + = \\int \hat{r} \rho d r + \quad ; - \\quad - = \\int \\hat{r}^{2} \\rho d r + \quad + = \int \hat{r}^{2} \rho d r Args: mol (pyscf Mole): pyscf Mole object. rho (numpy ndarray): 1D array of density-fitting coefficients. Can be None to compute AO integrals instead. moments (tuple): Moments to compute (0, 1, and/or 2). + per_atom (bool): If return AO integrals / moments per atom. Returns: tuple: If rho!=None, values representing the requested moments, possibly containing: @@ -83,9 +87,12 @@ def r2_c(mol, rho, moments=(0,1,2), per_atom=False): 0st moment: (mol.natm,) 1st moment: (3, mol.natm) 2nd moment: (mol.natm,) + + Raises: + NotImplementedError: If a moment > 2 is requested. """ if max(moments)>2: - raise RuntimeError('Only moments 0, 1, and 2 are supported.') + raise NotImplementedError('Only moments 0, 1, and 2 are supported.') ret = {} (iat, l, _), (a, c) = basis_flatten(mol) diff --git a/qstack/mathutils/array.py b/qstack/mathutils/array.py index d2c9e1eb..9647126e 100644 --- a/qstack/mathutils/array.py +++ b/qstack/mathutils/array.py @@ -28,7 +28,7 @@ def scatter(values, indices): def safe_divide(a, b): - """Wrapper for numpy divide that avoids division by zero. + """Divide numpy arrays avoiding division by zero. Args: a (numpy.ndarray): Numerator array. diff --git a/qstack/mathutils/rotation_matrix.py b/qstack/mathutils/rotation_matrix.py index d76c67f8..c4cf40b5 100644 --- a/qstack/mathutils/rotation_matrix.py +++ b/qstack/mathutils/rotation_matrix.py @@ -4,7 +4,7 @@ def _Rz(a): - """Computes the rotation matrix around laboratory z-axis. + """Compute the rotation matrix around laboratory z-axis. Args: a (float): Rotation angle in radians. @@ -21,7 +21,7 @@ def _Rz(a): def _Ry(b): - """Computes the rotation matrix around laboratory y-axis. + """Compute the rotation matrix around laboratory y-axis. Args: b (float): Rotation angle in radians. @@ -38,7 +38,7 @@ def _Ry(b): def _Rx(g): - """Computes the rotation matrix around laboratory x-axis. + """Compute the rotation matrix around laboratory x-axis. Args: g (float): Rotation angle in radians. @@ -55,7 +55,7 @@ def _Rx(g): def rotate_euler(a, b, g, rad=False): - """Computes the rotation matrix given Cardan angles (x-y-z) + """Compute the rotation matrix given Cardan angles (x-y-z). Args: a (float): Alpha Euler angle. diff --git a/qstack/mathutils/wigner.py b/qstack/mathutils/wigner.py index d3a4cc16..dc5037a8 100755 --- a/qstack/mathutils/wigner.py +++ b/qstack/mathutils/wigner.py @@ -18,7 +18,7 @@ def real_Y_correct_phase(l, m, theta, phi): - """Returns real spherical harmonic in Condon--Shortley phase convention. + """Return real spherical harmonic in Condon--Shortley phase convention. Note: sympy's Znm uses a different convention. @@ -42,7 +42,7 @@ def real_Y_correct_phase(l, m, theta, phi): def get_polynom_Y(l, m): - """Rewrites a real spherical harmonic as a polynomial of x, y, z. + """Rewrite a real spherical harmonic as a polynomial of x, y, z. Args: l (int): Orbital angular momentum quantum number. @@ -68,7 +68,7 @@ def get_polynom_Y(l, m): def xyzint_wrapper(knm, integrals_xyz_dict): - """Wrapper for xyz integrals with caching. + """Compute xyz integrals with caching. Computes the integral of x^k * y^n * z^m over the unit sphere. Integral is zero if any power is odd. @@ -91,7 +91,7 @@ def xyzint_wrapper(knm, integrals_xyz_dict): def product_Y(Y1,Y2): - """Computes the product of two spherical harmonics. + """Compute the product of two spherical harmonics. Args: Y1 (sympy.Expr): First spherical harmonic polynomial. @@ -122,8 +122,7 @@ def print_wigner(D): def compute_wigner(lmax): - """Compute Wigner D matrices for real spherical harmonics - up to a maximum angular momentum. + """Compute Wigner D matrices for real spherical harmonics up to a maximum angular momentum. Args: lmax (int): Maximum angular momentum quantum number. diff --git a/qstack/mathutils/xyz_integrals_float.py b/qstack/mathutils/xyz_integrals_float.py index 4d0543b7..26374f57 100755 --- a/qstack/mathutils/xyz_integrals_float.py +++ b/qstack/mathutils/xyz_integrals_float.py @@ -6,7 +6,7 @@ def xyz(n, m, k): - """Computes the integral of x^2k y^2n z^2m over a unit sphere. + """Compute the integral of x^2k y^2n z^2m over a unit sphere. Args: n (int): Half of power of y. @@ -48,7 +48,7 @@ def I23(n,m,k): def trinomial(k1, k2, k3): - """Computes the trinomial coefficient (k1+k2+k3)! / (k1! * k2! * k3!). + """Compute the trinomial coefficient (k1+k2+k3)! / (k1! * k2! * k3!). Args: k1 (int) diff --git a/qstack/mathutils/xyz_integrals_sym.py b/qstack/mathutils/xyz_integrals_sym.py index 8c37ad0d..c1b07929 100755 --- a/qstack/mathutils/xyz_integrals_sym.py +++ b/qstack/mathutils/xyz_integrals_sym.py @@ -15,7 +15,7 @@ def xyz(n, m, k): - """Computes the integral of x^2k y^2n z^2m over a unit sphere. + """Compute the integral of x^2k y^2n z^2m over a unit sphere. Args: n (int): Half of power of y. @@ -38,7 +38,7 @@ def xyz(n, m, k): def I23(n,m,k): - """Computes an auxiliary integral needed for the integral over the unit sphere. + """Compute an auxiliary integral needed for the integral over the unit sphere. Args: n (int) @@ -60,7 +60,7 @@ def I23(n,m,k): def trinomial(k1, k2, k3): - """Computes the trinomial coefficient (k1+k2+k3)! / (k1! * k2! * k3!). + """Compute the trinomial coefficient (k1+k2+k3)! / (k1! * k2! * k3!). Args: k1 (int) diff --git a/qstack/orcaio.py b/qstack/orcaio.py index 61a4fa4e..184ce968 100644 --- a/qstack/orcaio.py +++ b/qstack/orcaio.py @@ -1,6 +1,7 @@ """ORCA quantum chemistry package I/O utilities. -Read and parse ORCA output files, including orbitals and densities binary files.""" +Read and parse ORCA output files, including orbitals and densities binary files. +""" import warnings import struct diff --git a/qstack/qml/b2r2.py b/qstack/qml/b2r2.py index aa25e017..7e498cb1 100644 --- a/qstack/qml/b2r2.py +++ b/qstack/qml/b2r2.py @@ -1,7 +1,7 @@ """Bond-based reaction representation (B2R2) for chemical reactions. Provides: - - defaults: default parameters for B2R2 computation. + defaults: default parameters for B2R2 computation. """ import itertools @@ -15,7 +15,7 @@ def get_bags(unique_ncharges): - """Generates all unique element pair combinations including self-interactions. + """Generate all unique element pair combinations including self-interactions. Args: unique_ncharges (array-like): Array of unique atomic charges/numbers. @@ -47,7 +47,7 @@ def get_mu_sigma(R): def get_gaussian(x, R): - """Computes Gaussian function values for a given interatomic distance. + """Compute Gaussian function values for a given interatomic distance. Args: x (numpy ndarray): Grid points to evaluate the Gaussian. @@ -63,7 +63,7 @@ def get_gaussian(x, R): def get_skew_gaussian_l_both(x, R, Z_I, Z_J): - """Computes skewed Gaussian distributions for B2R2_l representation. + """Compute skewed Gaussian distributions for B2R2_l representation. Args: x (numpy ndarray): Grid points to evaluate the functions. @@ -89,7 +89,7 @@ def get_skew_gaussian_l_both(x, R, Z_I, Z_J): def get_skew_gaussian_n_both(x, R, Z_I, Z_J): - """Computes combined skewed Gaussian distribution for B2R2_n representation. + """Compute combined skewed Gaussian distribution for B2R2_n representation. Args: x (numpy ndarray): Grid points to evaluate the function. @@ -116,7 +116,7 @@ def get_skew_gaussian_n_both(x, R, Z_I, Z_J): def get_b2r2_n_molecular(ncharges, coords, elements, rcut=defaults.rcut, gridspace=defaults.gridspace): - """Computes B2R2_n representation for a single molecule. + """Compute B2R2_n representation for a single molecule. Args: ncharges (array-like): Atomic numbers for all atoms in the molecule. @@ -148,7 +148,7 @@ def get_b2r2_n_molecular(ncharges, coords, elements, def get_b2r2_a_molecular(ncharges, coords, elements, rcut=defaults.rcut, gridspace=defaults.gridspace): - """Computes B2R2_a representation for a single molecule. + """Compute B2R2_a representation for a single molecule. Args: ncharges (array-like): Atomic numbers for all atoms in the molecule. @@ -185,7 +185,7 @@ def get_b2r2_a_molecular(ncharges, coords, elements, def get_b2r2_l_molecular(ncharges, coords, elements, rcut=defaults.rcut, gridspace=defaults.gridspace): - """Computes B2R2_l representation for a single molecule. + """Compute B2R2_l representation for a single molecule. Args: ncharges (array-like): Atomic numbers for all atoms in the molecule. @@ -270,7 +270,7 @@ def get_b2r2_inner(reactions, progress=False, rcut=defaults.rcut, gridspace=defaults.gridspace, get_b2r2_molecular=None, combine=None): - """Computes the B2R2 representations for a list of reactions. + """Compute the B2R2 representations for a list of reactions. Internal implementation function that computes B2R2 representations using provided molecular representation function and combination strategy. diff --git a/qstack/qml/slatm.py b/qstack/qml/slatm.py index d680a872..d1386753 100644 --- a/qstack/qml/slatm.py +++ b/qstack/qml/slatm.py @@ -1,7 +1,7 @@ """Spectrum of London and Axilrod-Teller-Muto potential (SLATM) representation. Provides: - - defaults: Default parameters for SLATM representation. + defaults: Default parameters for SLATM representation. """ import itertools @@ -15,7 +15,7 @@ def get_mbtypes(qs, qml=False): - """Generates many-body types (elements, pairs, triples) for SLATM representation. + """Generate many-body types (elements, pairs, triples) for SLATM representation. Args: qs (list): List of atomic number arrays for all molecules. @@ -54,7 +54,7 @@ def get_mbtypes(qs, qml=False): def get_two_body(i, mbtype, q, dist, r0=defaults.r0, rcut=defaults.rcut, sigma=defaults.sigma2, dgrid=defaults.dgrid2): - """Computes two-body London dispersion contribution for atom i. + """Compute two-body London dispersion contribution for atom i. Evaluates the two-body term from pairwise 1/r^6 London dispersion interactions, projected onto a radial grid with Gaussian broadening of interatomic distances. @@ -72,7 +72,6 @@ def get_two_body(i, mbtype, q, dist, Returns: numpy.ndarray: Two-body contribution on radial grid (ngrid,). """ - ngrid = int((rcut - r0)/dgrid) + 1 rgrid = np.linspace(r0, rcut, ngrid) @@ -104,7 +103,7 @@ def get_two_body(i, mbtype, q, dist, def get_three_body(j, mbtype, q, r, dist, rcut=defaults.rcut, theta0=defaults.theta0, sigma=defaults.sigma3, dgrid=defaults.dgrid3): - """Computes three-body Axilrod-Teller-Muto contribution for atom j. + """Compute three-body Axilrod-Teller-Muto contribution for atom j. Evaluates the three-body ATM term from triple-dipole interactions, projected onto an angular grid with Gaussian broadening of bond angles. @@ -125,7 +124,7 @@ def get_three_body(j, mbtype, q, r, dist, """ def get_cos(a, b, c): - """Computes cosine of angle abc from atomic positions. + """Compute cosine of angle abc from atomic positions. Args: a (int): Index of first atom. @@ -177,7 +176,7 @@ def get_slatm(q, r, mbtypes, qml_compatible=True, stack_all=True, global_repr=False, r0=defaults.r0, rcut=defaults.rcut, sigma2=defaults.sigma2, dgrid2=defaults.dgrid2, theta0=defaults.theta0, sigma3=defaults.sigma3, dgrid3=defaults.dgrid3): - """Computes SLATM representation for a single molecule. + """Compute SLATM representation for a single molecule. Constructs the SLATM (Spectrum of London and Axilrod-Teller-Muto potential) representation by combining one-body (nuclear charges), two-body (London dispersion), @@ -271,7 +270,7 @@ def get_slatm_for_dataset(molecules, qml_mbtypes=True, qml_compatible=True, stack_all=True, r0=defaults.r0, rcut=defaults.rcut, sigma2=defaults.sigma2, dgrid2=defaults.dgrid2, theta0=defaults.theta0, sigma3=defaults.sigma3, dgrid3=defaults.dgrid3): - """Computes the (a)SLATM representation for a set of molecules. + """Compute the (a)SLATM representation for a set of molecules. Generates SLATM descriptors for molecular datasets, automatically determining many-body types from all molecules. @@ -326,7 +325,7 @@ def get_slatm_for_dataset(molecules, def get_slatm_rxn(reactions, progress=False, qml_mbtypes=True, r0=defaults.r0, rcut=defaults.rcut, sigma2=defaults.sigma2, dgrid2=defaults.dgrid2, theta0=defaults.theta0, sigma3=defaults.sigma3, dgrid3=defaults.dgrid3): - """Computes the SLATM_d representation for chemical reactions. + """Compute the SLATM_d representation for chemical reactions. Calculates reaction representations as the difference between product and reactant SLATM descriptors (ΔR = R_products - R_reactants), suitable for predicting diff --git a/qstack/regression/condition.py b/qstack/regression/condition.py index e909afa4..b1cedb03 100644 --- a/qstack/regression/condition.py +++ b/qstack/regression/condition.py @@ -11,7 +11,7 @@ def condition(X, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, test_size=defaults.test_size, idx_test=None, idx_train=None, sparse=None, random_state=defaults.random_state): - """ Compute kernel matrix condition number + """Compute kernel matrix condition number. Args: X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. @@ -29,6 +29,9 @@ def condition(X, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, Returns: float: Condition number. + + Raises: + RuntimeError: If 'X' is a kernel and sparse regression is chosen. """ idx_train, _, _, _ = train_test_split_idx(y=np.arange(len(X)), idx_test=idx_test, idx_train=idx_train, test_size=test_size, random_state=random_state) diff --git a/qstack/regression/cross_validate_results.py b/qstack/regression/cross_validate_results.py index 5481c918..034b2dcf 100644 --- a/qstack/regression/cross_validate_results.py +++ b/qstack/regression/cross_validate_results.py @@ -16,7 +16,7 @@ def cv_results(X, y, adaptive=False, read_kernel=False, n_rep=defaults.n_rep, save=False, preffix='unknown', save_pred=False, progress=False, sparse=None, seed0=0): - """ Computes various learning curves (LC) ,with random sampling, and returns the average performance. + """Compute various learning curves (LC) ,with random sampling, and returns the average performance. Args: X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. @@ -30,7 +30,7 @@ def cv_results(X, y, train_size (list): List of training set size fractions used to evaluate the points on the LC. splits (int): K number of splits for the Kfold cross-validation. printlevel (int): Controls level of output printing. - adaptative (bool): To expand the grid for optimization adaptatively. + adaptive (bool): To expand the grid for optimization adaptatively. read_kernel (bool): If 'X' is a kernel and not an array of representations. n_rep (int): The number of repetition for each point (using random sampling). save (bool): Wheather to save intermediate LCs (.npy). diff --git a/qstack/regression/final_error.py b/qstack/regression/final_error.py index 14650026..67e35d9a 100644 --- a/qstack/regression/final_error.py +++ b/qstack/regression/final_error.py @@ -14,7 +14,7 @@ def final_error(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, test_size=defaults.test_size, idx_test=None, idx_train=None, sparse=None, random_state=defaults.random_state, return_pred=False, return_alpha=False): - """ Perform prediction on the test set using the full training set. + """Perform prediction on the test set using the full training set. Args: X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. @@ -37,6 +37,9 @@ def final_error(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, np.1darray(Ntest) : prediction absolute errors on the test set np.1darray(Ntest) : (if return_pred is True) predictions on the test set np.1darray(Ntrain or sparse) : (if return_alpha is True) regression weights + + Raises: + RuntimeError: If 'X' is a kernel and sparse regression is chosen. """ idx_train, idx_test, y_train, y_test = train_test_split_idx(y=y, idx_test=idx_test, idx_train=idx_train, test_size=test_size, random_state=random_state) diff --git a/qstack/regression/global_kernels.py b/qstack/regression/global_kernels.py index 8ccfe052..38bf9494 100644 --- a/qstack/regression/global_kernels.py +++ b/qstack/regression/global_kernels.py @@ -12,7 +12,7 @@ def get_global_K(X, Y, sigma, local_kernel, global_kernel, options): - """Computes global kernel matrix between two sets of molecular representations. + """Compute global kernel matrix between two sets of molecular representations. Args: X (list): List of molecular representations (first set). @@ -69,7 +69,7 @@ def get_global_K(X, Y, sigma, local_kernel, global_kernel, options): def get_covariance(mol1, mol2, species, max_atoms, max_size, kernel, sigma=None): - """Computes the covariance matrix between two molecules using local kernels. + """Compute the covariance matrix between two molecules using local kernels. Args: mol1 (dict): First molecule represented as dictionary of atomic environments by species. @@ -97,7 +97,7 @@ def get_covariance(mol1, mol2, species, max_atoms, max_size, kernel, sigma=None) def normalize_kernel(kernel, self_x=None, self_y=None, verbose=0): - """Normalizes a kernel matrix using self-kernel values. + """Normalize a kernel matrix using self-kernel values. Args: kernel (numpy ndarray): Kernel matrix to normalize. @@ -118,7 +118,7 @@ def normalize_kernel(kernel, self_x=None, self_y=None, verbose=0): def mol_to_dict(mol, species): - """Converts molecular representation to a dictionary organized by atomic species. + """Convert molecular representation to a dictionary organized by atomic species. Args: mol (numpy ndarray): Molecular representation where each row is [atomic_number, features...]. @@ -136,7 +136,7 @@ def mol_to_dict(mol, species): def sumsq(x): - """Computes sum of squares (dot product with itself). + """Compute sum of squares (dot product with itself). Args: x (numpy ndarray): Input vector. @@ -148,7 +148,7 @@ def sumsq(x): def avg_kernel(kernel, _options): - """Computes the average kernel value. + """Compute the average kernel value. Args: kernel (numpy ndarray): Kernel matrix. @@ -161,7 +161,7 @@ def avg_kernel(kernel, _options): def rematch_kernel(kernel, options): - """Computes the REMatch (Regularized Entropy Match) kernel. + """Compute the REMatch (Regularized Entropy Match) kernel. Uses Sinkhorn algorithm to compute optimal transport-based kernel similarity. diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py index e679a9a1..2e682c9e 100644 --- a/qstack/regression/hyperparameters.py +++ b/qstack/regression/hyperparameters.py @@ -14,7 +14,7 @@ def hyperparameters(X, y, sigma=defaults.sigmaarr, eta=defaults.etaarr, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, test_size=defaults.test_size, splits=defaults.splits, idx_test=None, idx_train=None, printlevel=0, adaptive=False, read_kernel=False, sparse=None, random_state=defaults.random_state): - """ Performs a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter). + """Perform a Kfold cross-validated hyperparameter optimization (for width of kernel and regularization parameter). Args: X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. @@ -29,7 +29,7 @@ def hyperparameters(X, y, idx_test (numpy.1darray): List of indices for the test set (based on the sequence in X). idx_train (numpy.1darray): List of indices for the training set (based on the sequence in X). printlevel (int): Controls level of output printing. - adaptative (bool): To expand the grid search adaptatively. + adaptive (bool): To expand the grid search adaptatively. read_kernel (bool): If 'X' is a kernel and not an array of representations. sparse (int): The number of reference environnments to consider for sparse regression. random_state (int): The seed used for random number generator (controls train/test splitting). @@ -38,6 +38,9 @@ def hyperparameters(X, y, The results of the grid search as a numpy.2darray [Cx(MAE,std,eta,sigma)], where C is the number of parameter set and the array is sorted according to MAEs (last is minimum) + + Raises: + RuntimeError: If 'X' is a kernel and sparse regression is chosen. """ def k_fold_opt(K_all, eta): kfold = KFold(n_splits=splits, shuffle=False) diff --git a/qstack/regression/kernel.py b/qstack/regression/kernel.py index 7f83088c..af91346b 100644 --- a/qstack/regression/kernel.py +++ b/qstack/regression/kernel.py @@ -8,7 +8,7 @@ def kernel(X, Y=None, sigma=defaults.sigma, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict): - """ Computes a kernel between sets A and B (or A and A) using their representations. + """Compute a kernel between sets A and B (or A and A) using their representations. Args: X (numpy.ndarray): Representation of A. diff --git a/qstack/regression/kernel_utils.py b/qstack/regression/kernel_utils.py index b82bf32e..1fbaf601 100644 --- a/qstack/regression/kernel_utils.py +++ b/qstack/regression/kernel_utils.py @@ -1,7 +1,11 @@ -"""Kernel computation utility functions and defaults.""" +"""Kernel computation utility functions and defaults. + +Provides: + REGMODULE_PATH: Path to the module. + defaults: Default parameters. +""" import os -import argparse import warnings from types import SimpleNamespace import numpy as np @@ -13,20 +17,6 @@ REGMODULE_PATH = os.path.dirname(__file__) -class ParseKwargs(argparse.Action): - def __call__(self, _parser, namespace, values, _option_string=None): - setattr(namespace, self.dest, defaults.gdict) - for value in values: - key, value = value.split('=') - for t in [int, float]: - try: - value = t(value) - break - except ValueError: - continue - getattr(namespace, self.dest)[key] = value - - defaults = SimpleNamespace( sigma=32.0, eta=1e-5, @@ -45,7 +35,7 @@ def __call__(self, _parser, namespace, values, _option_string=None): def get_local_kernel(arg): - """Obtains a local-environment kernel function by name. + """Obtain a local-environment kernel function by name. Args: arg (str): Kernel name. Available options include: @@ -72,7 +62,7 @@ def get_local_kernel(arg): def get_global_kernel(arg, local_kernel): - """Creates a global kernel function from a local kernel. + """Create a global kernel function from a local kernel. Args: arg (tuple): Tuple of (gkernel_name, options_dict). @@ -93,7 +83,7 @@ def get_global_kernel(arg, local_kernel): def get_kernel(arg, arg2=None): - """Returns the appropriate kernel function based on arguments. + """Return the appropriate kernel function based on arguments. Args: arg (str): Local kernel name. @@ -112,7 +102,7 @@ def get_kernel(arg, arg2=None): def train_test_split_idx(y, idx_test=None, idx_train=None, test_size=defaults.test_size, random_state=defaults.random_state): - """ Perfrom test/train data split based on random shuffling or given indices. + """Perfrom test/train data split based on random shuffling or given indices. If neither `idx_test` nor `idx_train` are specified, the splitting is done randomly using `random_state`. @@ -134,6 +124,9 @@ def train_test_split_idx(y, idx_test=None, idx_train=None, numpy.1darray(Ntrain, dtype=int) : train indices numpy.1darray(Ntest, dtype=float) : test set target property numpy.1darray(Ntrain, dtype=float) : train set target property + + Raises: + RuntimeError: If test indices are repeated. """ if idx_test is None and idx_train is None: idx_train, idx_test = train_test_split(np.arange(len(y)), test_size=test_size, random_state=random_state) @@ -159,7 +152,7 @@ def train_test_split_idx(y, idx_test=None, idx_train=None, def sparse_regression_kernel(K_train, y_train, sparse_idx, eta): - r""" Compute the sparse regression matrix and vector. + r"""Compute the sparse regression matrix and vector. Solution of a sparse regression problem is $$ \vec w = \left( \mathbf{K}_{MN} \mathbf{K}_{NM} + \eta \mathbf{1} \right) ^{-1} \mathbf{K}_{MN}\vec y $$ diff --git a/qstack/regression/local_kernels.py b/qstack/regression/local_kernels.py index 428667a4..bb076b2f 100644 --- a/qstack/regression/local_kernels.py +++ b/qstack/regression/local_kernels.py @@ -14,7 +14,7 @@ def custom_laplacian_kernel(X, Y, gamma): - """Computes Laplacian kernel between X and Y using Python implementation. + """Compute Laplacian kernel between X and Y using Python implementation. K(x, y) = exp(-gamma * ||x - y||_1) @@ -45,7 +45,7 @@ def cdist(X, Y): def custom_C_kernels(kernel_function, return_distance_function=False): - """Creates kernel function wrappers using C implementation for speed. + """Create kernel function wrappers using C implementation for speed. Args: kernel_function (str): Kernel type ('L' for Laplacian, 'G' for Gaussian). @@ -99,7 +99,7 @@ def kernel_func_c(X, Y, gamma): def dot_kernel_wrapper(x, y, *_kargs, **_kwargs): - """Wrapper for linear (dot product) kernel. + """Compute linear (dot product) kernel. Args: x (numpy ndarray): First set of samples. @@ -114,7 +114,7 @@ def dot_kernel_wrapper(x, y, *_kargs, **_kwargs): def cosine_similarity_wrapper(x, y, *_kargs, **_kwargs): - """Wrapper for cosine similarity kernel. + """Compute cosine similarity kernel. Args: x (numpy ndarray): First set of samples. @@ -129,8 +129,9 @@ def cosine_similarity_wrapper(x, y, *_kargs, **_kwargs): def local_laplacian_kernel_wrapper(X, Y, gamma): - """ Wrapper that acts as a generic Laplacian kernel function. - It decides which kernel implementation to call. + """Decide which kernel implementation to call. + + Wrapper that acts as a generic Laplacian kernel function. Args: X (numpy ndarray): First set of samples (can be multi-dimensional). diff --git a/qstack/regression/oos.py b/qstack/regression/oos.py index 43694fa0..3f64ed00 100644 --- a/qstack/regression/oos.py +++ b/qstack/regression/oos.py @@ -12,7 +12,7 @@ def oos(X, X_oos, alpha, sigma=defaults.sigma, akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict, test_size=defaults.test_size, idx_test=None, idx_train=None, sparse=None, random_state=defaults.random_state): - """ Perform prediction on an out-of-sample (OOS) set. + """Perform prediction on an out-of-sample (OOS) set. Args: X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. diff --git a/qstack/regression/parser.py b/qstack/regression/parser.py index 71f72be8..d807eab3 100644 --- a/qstack/regression/parser.py +++ b/qstack/regression/parser.py @@ -2,7 +2,30 @@ import argparse from qstack.tools import FlexParser -from .kernel_utils import defaults, ParseKwargs, local_kernels_dict, global_kernels_dict +from .kernel_utils import defaults, local_kernels_dict, global_kernels_dict + + +class ParseKwargs(argparse.Action): + """Parser for the global kernel parameters.""" + def __call__(self, _parser, namespace, values, _option_string=None): + """Set attributes. + + Args: + _parser: Unused (for interface compatibility). + namespace (argparse.Namespace): Namespace to set attributes to. + values (list[str]): The associated command-line arguments. + _option_string: Unused (for interface compatibility). + """ + setattr(namespace, self.dest, defaults.gdict) + for value in values: + key, value = value.split('=') + for t in [int, float]: + try: + value = t(value) + break + except ValueError: + continue + getattr(namespace, self.dest)[key] = value class RegressionParser(FlexParser): @@ -65,7 +88,7 @@ def __init__(self, hyperparameters_set=None, **kwargs): "L_custompy" is suited to open-shell systems') parser.add_argument('--gkernel', type=str, dest='gkernel', default=defaults.gkernel, choices=global_kernels_dict.keys(), help='global kernel type: "avg" for average, "rem" for REMatch') - parser.add_argument('--gdict', action=ParseKwargs, dest='gdict', default=defaults.gdict, nargs='*', help='dictionary like input string to initialize global kernel parameters') + parser.add_argument('--gdict', action=ParseKwargs, dest='gdict', default=defaults.gdict, nargs='*', help='dictionary like input string to initialize global kernel parameters, e.g. "--gdict alpha=2 normalize=0"') parser.add_argument('--test', type=float, dest='test_size', default=defaults.test_size, help='test set fraction') parser.add_argument('--train', type=float, dest='train_size', default=defaults.train_size, nargs='+', help='training set fractions') parser.add_argument('--ll', action='store_true', dest='ll', default=False, help='if correct for the numper of threads') diff --git a/qstack/regression/regression.py b/qstack/regression/regression.py index 66460359..b38e71b6 100644 --- a/qstack/regression/regression.py +++ b/qstack/regression/regression.py @@ -13,7 +13,7 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, test_size=defaults.test_size, train_size=defaults.train_size, n_rep=defaults.n_rep, random_state=defaults.random_state, idx_test=None, idx_train=None, sparse=None, debug=False, save_pred=False): - """ Produces learning curves (LC) data, for various training sizes, using kernel ridge regression and the user specified parameters + """Produce learning curves (LC) data using kernel ridge regression. Args: X (numpy.ndarray[Nsamples,...]): Array containing the representations of all Nsamples. @@ -37,6 +37,9 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, Returns: The computed LC, as a list containing all its points (train size, MAE, std) If save_pres is True, a tuple with (results, (target values, predicted values)) + + Raises: + RuntimeError: If 'X' is a kernel and sparse regression is chosen. """ idx_train, idx_test, y_train, y_test = train_test_split_idx(y=y, idx_test=idx_test, idx_train=idx_train, test_size=test_size, random_state=random_state) diff --git a/qstack/reorder.py b/qstack/reorder.py index f3ca5f4a..8c1e3fe8 100644 --- a/qstack/reorder.py +++ b/qstack/reorder.py @@ -39,7 +39,8 @@ def _orca2gpr_idx(l_slices, m): Additionally, Orca uses a different sign convention for |m|>=3. Args: - l (np.ndarray): Array of angular momentum quantum numbers per shell. + l_slices (iterator): Iterator that yeilds (l: int, s: slice) per shell, where + l is angular momentum quantum number and s is the corresponding slice of size 2*l+1. m (np.ndarray): Array of magnetic quantum numbers per AO. Returns: diff --git a/qstack/spahm/LB2020guess.py b/qstack/spahm/LB2020guess.py index 93e05814..78f5218b 100644 --- a/qstack/spahm/LB2020guess.py +++ b/qstack/spahm/LB2020guess.py @@ -24,7 +24,7 @@ def __init__(self, fname=None, parameters='HF'): def renormalize(self, a): - r"""Computes renormalization factor for Gaussian basis functions. + r"""Compute renormalization factor for Gaussian basis functions. The auxiliary basis functions are given in charge normalization, thus we need to renormalize them to square-integral normalization for use in integrals. @@ -44,7 +44,7 @@ def renormalize(self, a): def read_ac(self, fname): - """Reads auxiliary basis parameters from file. + """Read auxiliary basis parameters from file. Args: fname (str, optional): Path to parameter file. If None, uses default. @@ -72,13 +72,13 @@ def read_ac(self, fname): def add_caps(self, basis): - """Adds cap (diffuse) functions to the auxiliary basis. + """Add cap (diffuse) functions to the auxiliary basis. Args: basis (dict): Basis set dictionary to modify in-place. - Returns: - None. Modifies basis in-place. + Output: + Modifies basis in-place. """ for q in range(1, self.Qmax+1): a = self._caps_array[q] @@ -89,7 +89,7 @@ def add_caps(self, basis): def get_basis(self, fname, parameters): - """Initializes auxiliary basis set from file or predefined parameters. + """Initialize auxiliary basis set from file or predefined parameters. Loads basis set from either predefined HF/HFS parameters or custom file, then adds cap functions and stores in self.acbasis. @@ -114,7 +114,7 @@ def get_basis(self, fname, parameters): def use_charge(self, mol): - """Adjusts basis coefficients based on molecular charge. + """Adjust basis coefficients based on molecular charge. For charged molecules with HF parameters, scales the cap function coefficient to account for charge redistribution. @@ -134,7 +134,7 @@ def use_charge(self, mol): def use_ecp(self, mol, acbasis): - """Adjusts basis set to account for effective core potentials (ECP). + """Adjust basis set to account for effective core potentials (ECP). When ECP is present, removes basis functions corresponding to core electrons by reducing coefficients proportionally until core charge is accounted for. @@ -178,7 +178,7 @@ def use_ecp(self, mol, acbasis): def get_auxweights(self, auxmol): - """Extracts auxiliary basis weights from the basis. + """Extract auxiliary basis weights from the basis. Collects the coefficients from each auxiliary basis primitive into a single array aligned with auxiliary orbital indices. @@ -216,7 +216,7 @@ def merge_caps(self, w, eri3c): def get_eri3c(self, mol, auxmol): - """Computes 3-center electron repulsion integrals. + """Compute 3-center electron repulsion integrals. Args: mol (pyscf.gto.Mole): Main molecule object. @@ -233,7 +233,7 @@ def get_eri3c(self, mol, auxmol): def check_coefficients(self, mol, acbasis): - """Validates that auxiliary basis coefficients sum to correct total charge. + """Validate that auxiliary basis coefficients sum to correct total charge. Ensures basis set modifications (charge adjustment, ECP) maintain consistency with molecular electronic structure. @@ -252,7 +252,7 @@ def check_coefficients(self, mol, acbasis): def HLB20(self, mol): - """Computes the LB2020 effective potential matrix. + """Compute the LB2020 effective potential matrix. Args: mol (pyscf.gto.Mole): Molecule object. @@ -270,7 +270,7 @@ def HLB20(self, mol): def Heff(self, mol): - """Constructs one-electron Hamiltonian for initial guess. + """Construct one-electron Hamiltonian for initial guess. Combines standard core Hamiltonian with LB2020 effective potential. @@ -287,7 +287,7 @@ def Heff(self, mol): def HLB20_ints_generator(self, mol, auxmol): - """Creates generator for LB2020 potential gradients. + """Create generator for LB2020 potential gradients. Computes derivative integrals and returns a function that evaluates the gradient of LB2020 potential with respect to atomic positions. @@ -318,7 +318,7 @@ def HLB20_ints_deriv(iat): def HLB20_generator(self, mol): - """Creates generator for LB2020 potential gradient contributions. + """Create generator for LB2020 potential gradient contributions. Args: mol (pyscf.gto.Mole): Molecule object. @@ -339,7 +339,8 @@ def HLB20_deriv(iat): def init_data(self): - """Set parameters: + """Set parameters. + - self._caps_array: Diffuse function exponents for each element. - self._hf_basis: Predefined HF parameter set for all elements. - self._hfs_basis: Predefined HFS parameter set for all elements. diff --git a/qstack/spahm/compute_spahm.py b/qstack/spahm/compute_spahm.py index 5a14b804..998d3fe4 100644 --- a/qstack/spahm/compute_spahm.py +++ b/qstack/spahm/compute_spahm.py @@ -6,7 +6,7 @@ def get_guess_orbitals(mol, guess, xc="pbe", field=None, return_ao_dip=False): - """Computes MO energies and vectors using an initial guess Hamiltonian. + """Compute MO energies and vectors using an initial guess Hamiltonian. Args: mol (pyscf Mole): pyscf Mole object. @@ -47,7 +47,7 @@ def get_guess_orbitals(mol, guess, xc="pbe", field=None, return_ao_dip=False): def ext_field_generator(mol, field): - """Generates external electric field Hamiltonian gradient function. + """Generate external electric field Hamiltonian gradient function. Creates a function that computes derivatives of the external field interaction Hamiltonian (H_ext) with respect to nuclear coordinates for each atom. @@ -79,7 +79,7 @@ def field_deriv(iat): def get_guess_orbitals_grad(mol, guess, field=None): - """Computes guess Hamiltonian eigenvalues and their nuclear/field gradients. + """Compute guess Hamiltonian eigenvalues and their nuclear/field gradients. Calculates orbital energies and their derivatives with respect to both nuclear coordinates (for geometry optimization/force calculations) and electric field @@ -114,7 +114,7 @@ def get_guess_orbitals_grad(mol, guess, field=None): def get_spahm_representation(mol, guess_in, xc="pbe", field=None): - """Computes the ε-SPAHM molecular representation. + """Compute the ε-SPAHM molecular representation. Reference: A. Fabrizio, K. R. Briling, C. Corminboeuf, @@ -140,7 +140,7 @@ def get_spahm_representation(mol, guess_in, xc="pbe", field=None): def get_spahm_representation_grad(mol, guess_in, field=None): - """Computes SPAHM representation and its nuclear/field gradients for force/response calculations. + """Compute SPAHM representation and its nuclear/field gradients for force/response calculations. Calculates the SPAHM descriptor (occupied orbital energies) along with derivatives needed for molecular dynamics, geometry optimization, and response properties. diff --git a/qstack/spahm/guesses.py b/qstack/spahm/guesses.py index c7441bd0..cfbee972 100644 --- a/qstack/spahm/guesses.py +++ b/qstack/spahm/guesses.py @@ -3,7 +3,7 @@ Implements various guess methods: Hcore, Hückel, GWH, SAD, SAP, LB2020. Provides: - - guesses_dict: Dictionary mapping guess names to functions. + guesses_dict: Dictionary mapping guess names to functions. """ import warnings @@ -14,7 +14,7 @@ def hcore(mol, *_): - """Computes guess Hamiltonian from core contributions (kinetic + nuclear + ECP). + """Compute guess Hamiltonian from core contributions (kinetic + nuclear + ECP). Args: mol (pyscf Mole): pyscf Mole object. @@ -27,7 +27,7 @@ def hcore(mol, *_): def GWH(mol, *_): - """Computes guess Hamiltonian using Generalized Wolfsberg-Helmholtz (GWH) method. + """Compute guess Hamiltonian using Generalized Wolfsberg-Helmholtz (GWH) method. Uses the formula: H_ij = 0.5 * K * (H_ii + H_jj) * S_ij with K = 1.75. @@ -52,7 +52,7 @@ def GWH(mol, *_): def SAD(mol, xc): - """Computes guess Hamiltonian using Superposition of Atomic Densities (SAD). + """Compute guess Hamiltonian using Superposition of Atomic Densities (SAD). Constructs the Fock matrix from atomic Hartree-Fock density matrices summed together as an initial guess for molecular calculations. @@ -83,7 +83,7 @@ def SAD(mol, xc): def SAP(mol, *_): - """Computes guess Hamiltonian using Superposition of Atomic Potentials (SAP). + """Compute guess Hamiltonian using Superposition of Atomic Potentials (SAP). Constructs initial Hamiltonian from kinetic energy plus summed atomic potentials. @@ -102,7 +102,7 @@ def SAP(mol, *_): def LB(mol, *_): - """Computes guess Hamiltonian using Laikov-Briling 2020 model with HF parameters. + """Compute guess Hamiltonian using Laikov-Briling 2020 model with HF parameters. Args: mol (pyscf Mole): pyscf Mole object. @@ -115,7 +115,7 @@ def LB(mol, *_): def LB_HFS(mol, *_): - """Computes guess Hamiltonian using Laikov-Briling 2020 model with HFS parameters. + """Compute guess Hamiltonian using Laikov-Briling 2020 model with HFS parameters. Args: mol (pyscf Mole): pyscf Mole object. @@ -144,7 +144,7 @@ def solveF(mol, fock): def get_guess(arg): - """Returns guess Hamiltonian function by name. + """Return guess Hamiltonian function by name. Args: arg (str): Guess method name. Available options: @@ -169,7 +169,7 @@ def get_guess(arg): def check_nelec(nelec, nao): - """Validates that the number of electrons can be accommodated by available orbitals. + """Validate that the number of electrons can be accommodated by available orbitals. Args: nelec (tuple or int): Number of electrons (alpha, beta) or total. @@ -189,7 +189,7 @@ def check_nelec(nelec, nao): def get_occ(e, nelec, spin): - """Extracts occupied orbital eigenvalues/energies. + """Extract occupied orbital eigenvalues/energies. Args: e (numpy ndarray): Full array of orbital eigenvalues (1D) @@ -215,7 +215,7 @@ def get_occ(e, nelec, spin): def get_dm(v, nelec, spin): - """Constructs density matrix from occupied molecular orbitals. + """Construct density matrix from occupied molecular orbitals. Args: v (numpy ndarray): 2D array of MO coefficients (eigenvectors), columns are MOs. @@ -240,7 +240,7 @@ def get_dm(v, nelec, spin): def hcore_grad(mf): - """Returns core Hamiltonian gradient generator function. + """Return core Hamiltonian gradient generator function. Args: mf: PySCF mean-field object. @@ -252,7 +252,7 @@ def hcore_grad(mf): def LB_grad(mf): - """Returns Laikov-Briling Hamiltonian gradient generator function. + """Return Laikov-Briling Hamiltonian gradient generator function. Combines core Hamiltonian gradient with LB2020 model gradient. @@ -270,7 +270,7 @@ def H_grad(iat): def get_guess_g(arg): - """Returns both guess Hamiltonian function and its gradient generator. + """Return both guess Hamiltonian function and its gradient generator. Args: arg (str): Guess method name. Available: 'core', 'lb'. @@ -289,7 +289,7 @@ def get_guess_g(arg): def eigenvalue_grad(mol, e, c, s1, h1): - """Computes nuclear gradients of orbital eigenvalues from generalized eigenvalue problem HC = eSC. + """Compute nuclear gradients of orbital eigenvalues from generalized eigenvalue problem HC = eSC. Uses the Hellmann-Feynman theorem for eigenvalue derivatives. diff --git a/qstack/spahm/rho/Dmatrix.py b/qstack/spahm/rho/Dmatrix.py index b77f38ff..e9c7522a 100644 --- a/qstack/spahm/rho/Dmatrix.py +++ b/qstack/spahm/rho/Dmatrix.py @@ -6,7 +6,7 @@ def c_split(mol, c): - """Splits coefficient vector by angular momentum quantum number for each atom. + """Split coefficient vector by angular momentum quantum number for each atom. Organizes expansion coefficients into sublists grouped by angular momentum (l) for each atomic basis function. @@ -29,7 +29,7 @@ def c_split(mol, c): def rotate_c(D, cs): - """Rotates coefficient vector using real Wigner D-matrices. + """Rotate coefficient vector using real Wigner D-matrices. Applies angular momentum rotation to each angular momentum block separately. @@ -44,7 +44,7 @@ def rotate_c(D, cs): def new_xy_axis(z): - """Constructs orthonormal coordinate system from a given z-axis. + """Construct orthonormal coordinate system from a given z-axis. Finds optimal x' and y' axes that form a right-handed orthonormal system with the given z' direction. The algorithm chooses x' to have maximal @@ -67,7 +67,7 @@ def new_xy_axis(z): def Dmatrix(xyz, lmax, order='xyz'): - """Generates real Wigner D-matrices for spatial rotation of spherical harmonics. + """Generate real Wigner D-matrices for spatial rotation of spherical harmonics. Computes rotation matrices D^l for angular momenta l = 0 to lmax, where D^l[m1, m2] transforms spherical harmonics under the specified rotation. @@ -301,7 +301,7 @@ def Dmatrix(xyz, lmax, order='xyz'): def Dmatrix_for_z(z, lmax, order='xyz'): - """Generates Wigner D-matrices for rotation that aligns z-axis with given vector. + """Generate Wigner D-matrices for rotation that aligns z-axis with given vector. Wrapper function that combines new_xy_axis() and Dmatrix() to compute rotation matrices for a rotation defined only by the target z-direction. @@ -313,9 +313,6 @@ def Dmatrix_for_z(z, lmax, order='xyz'): Returns: list: List of Wigner D-matrices for l=0 to lmax. - - Raises: - NotImplementedError: If lmax > 4. """ return Dmatrix(new_xy_axis(z), lmax, order) diff --git a/qstack/spahm/rho/atom.py b/qstack/spahm/rho/atom.py index 291d4cb2..0ba3bfa4 100644 --- a/qstack/spahm/rho/atom.py +++ b/qstack/spahm/rho/atom.py @@ -15,8 +15,8 @@ def main(args=None): Args: args (list, optional): Command-line arguments. If None, uses sys.argv. Defaults to None. - Returns: - None: Saves representation to numpy file specified by --name argument. + Output: + Saves representation to numpy file specified by --name argument. """ parser = SpahmParser(description='This program computes the SPAHM(a) representation for a given molecular system', atom=True) parser.add_argument('--mol', dest='mol', required=True, type=str, help="the path to the xyz file with the molecular structure") diff --git a/qstack/spahm/rho/atomic_density.py b/qstack/spahm/rho/atomic_density.py index 78f0b97b..1208ba55 100644 --- a/qstack/spahm/rho/atomic_density.py +++ b/qstack/spahm/rho/atomic_density.py @@ -6,7 +6,7 @@ def fit(mol, dm, aux_basis, short=False, w_slicing=True, only_i=None): - """Creates atomic density representations using Löwdin partitioning and density fitting. + """Create atomic density representations using Löwdin partitioning and density fitting. Decomposes the molecular density matrix into atomic contributions using Löwdin orthogonalization, then fits each atomic density onto auxiliary basis set. diff --git a/qstack/spahm/rho/bond.py b/qstack/spahm/rho/bond.py index 0eaeba04..c745458c 100644 --- a/qstack/spahm/rho/bond.py +++ b/qstack/spahm/rho/bond.py @@ -17,8 +17,8 @@ def main(args=None): Args: args (list, optional): Command-line arguments. If None, uses sys.argv. Defaults to None. - Returns: - None: Saves representations to numpy files with names based on --name argument. + Output: + Saves representations to numpy files with names based on --name argument. """ parser = SpahmParser(description='This program computes the SPAHM(b) representation for a given molecular system or a list of thereof', unified=True, bond=True) args = parser.parse_args(args=args) diff --git a/qstack/spahm/rho/bond_selected.py b/qstack/spahm/rho/bond_selected.py index dca2640b..90b30e84 100644 --- a/qstack/spahm/rho/bond_selected.py +++ b/qstack/spahm/rho/bond_selected.py @@ -12,7 +12,7 @@ def get_spahm_b_selected(mols, bondidx, xyzlist, readdm=None, guess=defaults.guess, xc=defaults.xc, spin=None, cutoff=defaults.cutoff, printlevel=0, omods=defaults.omod, bpath=defaults.bpath, only_m0=False, same_basis=False): - """Computes SPAHM(b) representations for specific bonds in molecules. + """Compute SPAHM(b) representations for specific bonds in molecules. Generates bond-centered representations for user-specified atom pairs across a dataset of molecules, useful for targeted bond analysis. @@ -69,8 +69,8 @@ def main(): Args: None: Parses command-line arguments. - Returns: - None: Saves bond representations to numpy files in specified directory. + Output: + Saves bond representations to numpy files in specified directory. """ parser = SpahmParser(description='This program computes the SPAHM(b) representation for a list of bonds', bond=True) parser.remove_argument('elements') diff --git a/qstack/spahm/rho/compute_rho_spahm.py b/qstack/spahm/rho/compute_rho_spahm.py index 10619c94..627baa45 100644 --- a/qstack/spahm/rho/compute_rho_spahm.py +++ b/qstack/spahm/rho/compute_rho_spahm.py @@ -15,7 +15,7 @@ def spahm_a_b(rep_type, mols, dms, elements=None, only_m0=False, zeros=False, printlevel=0, auxbasis=defaults.auxbasis, model=defaults.model, pairfile=None, dump_and_exit=False, same_basis=False, only_z=None): - """Computes SPAHM(a) or SPAHM(b) representations for a set of molecules. + """Compute SPAHM(a) or SPAHM(b) representations for a set of molecules. Reference: K. R. Briling, Y. Calvino Alonso, A. Fabrizio, C. Corminboeuf, @@ -98,7 +98,7 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= elements=None, only_m0=False, zeros=False, split=False, printlevel=0, auxbasis=defaults.auxbasis, model=defaults.model, with_symbols=False, only_z=None, merge=True): - """Computes and reshapes SPAHM(a) or SPAHM(b) representations with flexible output formats. + """Compute and reshapes SPAHM(a) or SPAHM(b) representations with flexible output formats. High-level interface that handles density matrix computation, representation generation, and output formatting including splitting, symbol labeling, and merging options. @@ -221,8 +221,8 @@ def main(args=None): Args: args (list, optional): Command-line arguments. If None, uses sys.argv. Defaults to None. - Returns: - None: Saves representations to numpy files based on --name argument and options. + Output: + Saves representations to numpy files based on --name argument and options. """ parser = SpahmParser(description='This program computes the SPAHM(a,b) representations for a given molecular system or a list thereof', unified=True, atom=True, bond=True) parser.add_argument('--rep', dest='rep', type=str, choices=['atom', 'bond'], required=True, help='the type of representation') diff --git a/qstack/spahm/rho/dmb_rep_atom.py b/qstack/spahm/rho/dmb_rep_atom.py index c0631288..7ff7360d 100644 --- a/qstack/spahm/rho/dmb_rep_atom.py +++ b/qstack/spahm/rho/dmb_rep_atom.py @@ -3,7 +3,7 @@ Implements various models: pure, SAD-diff, occupation-corrected, Löwdin partitioning. Provides: - - models_dict: Dictionary of available models. + models_dict: Dictionary of available models. """ import numpy as np @@ -14,7 +14,7 @@ def get_basis_info(atom_types, auxbasis): - """Gathers auxiliary basis information for all atom types. + """Gather auxiliary basis information for all atom types. Computes overlap matrices, basis function indices, and metric matrices needed for atomic density fitting. @@ -43,7 +43,7 @@ def get_basis_info(atom_types, auxbasis): def _make_models_dict(): - """Creates dictionary of available SPAHM(a) models. + """Create a dictionary of available SPAHM(a) models. Defines density fitting functions for each model. @@ -114,7 +114,7 @@ def maxlen_MR2021(idx, elements): def get_model(arg): - """Returns density fitting and symmetrization functions for specified model. + """Return density fitting and symmetrization functions for specified model. Args: arg (str): Model name. Available options: @@ -130,14 +130,18 @@ def get_model(arg): Returns: tuple: (density_fitting_function, symmetrization_function, maxlen_function). - density_fitting_function (callable): Function performing density fitting. + Args: mol (pyscf Mole): Molecule object. dm (numpy ndarray): Density matrix (2D). auxbasis (str or dict): Auxiliary basis set. only_i (list[int]): List of atom indices to use. + Returns: list: Density fitting coefficients per atom (1D numpy ndarrays). + - symmetrization_function (callable): Function for symmetrizing coefficients. + Args: maxlen (int): Maximum feature length. c (numpy ndarray): Density fitting coefficients (1D). @@ -147,12 +151,16 @@ def get_model(arg): ao_len (dict): Basis set sizes per element. M (dict): Metric matrices per element (2D numpy ndarrays). only_i (list[int]): List of atom indices to use. + Returns: numpy ndarray: Symmetrized atomic feature vectors. + - maxlen_function (callable): Function computing max. feature size. + Args: idx (dict): Pair indices per element. elements (list[str]): Elements for which representation is computed. + Returns: int: Maximum feature length. @@ -166,7 +174,7 @@ def get_model(arg): def coefficients_symmetrize_MR2021(maxlen, c, atoms, idx, ao, _, _M, only_i): - """Symmetrizes density fitting coefficients using MR2021 method. + """Symmetrize density fitting coefficients using MR2021 method. Reference: J. T. Margraf, K. Reuter, @@ -196,7 +204,7 @@ def coefficients_symmetrize_MR2021(maxlen, c, atoms, idx, ao, _, _M, only_i): def coefficients_symmetrize_short(maxlen, c, atoms, idx, ao, _, M, only_i): - """Symmetrizes coefficients for each atom. + """Symmetrize coefficients for each atom. For each atom, use contributions from the said atom. @@ -222,7 +230,7 @@ def coefficients_symmetrize_short(maxlen, c, atoms, idx, ao, _, M, only_i): def coefficients_symmetrize_long(maxlen, c_df, atoms, idx, ao, ao_len, M, _): - """Symmetrizes coefficients for long Löwdin models. + """Symmetrize coefficients for long Löwdin models. For each atom, use contributions from the said atom as well as all other atoms. diff --git a/qstack/spahm/rho/dmb_rep_bond.py b/qstack/spahm/rho/dmb_rep_bond.py index d95d8c06..a47e66e6 100644 --- a/qstack/spahm/rho/dmb_rep_bond.py +++ b/qstack/spahm/rho/dmb_rep_bond.py @@ -10,7 +10,7 @@ def make_bname(q0, q1): - """Creates canonical bond name from two element symbols. + """Create canonical bond name from two element symbols. Args: q0 (str): First element symbol. @@ -23,7 +23,7 @@ def make_bname(q0, q1): def get_basis_info(qqs, mybasis, only_m0, printlevel): - """Computes basis indices and metric matrices for bond pairs. + """Compute basis indices and metric matrices for bond pairs. Args: qqs (list): List of bond pair names (e.g., ['CC', 'CH', 'OH']). @@ -51,7 +51,7 @@ def get_basis_info(qqs, mybasis, only_m0, printlevel): def read_df_basis(bnames, bpath, same_basis=False): - """Loads bond-optimized basis sets from .bas files. + """Load bond-optimized basis sets from .bas files. Args: bnames (list): List of bond pair names (e.g., ['CC', 'CH']). @@ -72,7 +72,7 @@ def read_df_basis(bnames, bpath, same_basis=False): def get_element_pairs(elements): - """Generates all possible element pair combinations. + """Generate all possible element pair combinations. Creates complete list of bond types assuming all elements can bond with each other. @@ -98,7 +98,7 @@ def get_element_pairs(elements): def get_element_pairs_cutoff(elements, mols, cutoff, align=False): - """Determines element pairs based on actual distances in molecules. + """Determine element pairs based on actual distances in molecules. Identifies which element pairs actually form bonds within the distance cutoff by scanning molecular geometries. @@ -145,7 +145,7 @@ def get_element_pairs_cutoff(elements, mols, cutoff, align=False): def read_basis_wrapper_pairs(mols, bondidx, bpath, only_m0, printlevel, same_basis=False): - """Reads basis sets and computes metric matrices for specified bond pairs. + """Read basis sets and computes metric matrices for specified bond pairs. Processes bond pairs from molecular structures and loads their corresponding basis sets from disk, then computes basis indices and metric matrices. @@ -174,7 +174,7 @@ def read_basis_wrapper_pairs(mols, bondidx, bpath, only_m0, printlevel, same_bas def read_basis_wrapper(mols, bpath, only_m0, printlevel, cutoff=None, elements=None, pairfile=None, dump_and_exit=False, same_basis=False): - """Reads basis sets for all element pairs present in molecules. + """Read basis sets for all element pairs present in molecules. Determines which element pairs exist (either all possible or within cutoff distance), loads corresponding basis sets, and computes metric matrices and indices. @@ -226,7 +226,7 @@ def read_basis_wrapper(mols, bpath, only_m0, printlevel, cutoff=None, elements=N def bonds_dict_init(qqs, M): - """Initializes storage for bond representations. + """Initialize storage for bond representations. Creates a dictionary with zero-initialized arrays for each bond type, with array sizes matching the corresponding metric matrix dimensions. @@ -250,7 +250,7 @@ def bonds_dict_init(qqs, M): def fit_dm(dm, mol, mybasis, ri0, ri1): - """Fits density matrix using auxiliary basis functions at bond center. + """Fit density matrix using auxiliary basis functions at bond center. Decomposes the bond density matrix into auxiliary basis coefficients centered at the bond midpoint, then splits coefficients by angular momentum. @@ -263,7 +263,8 @@ def fit_dm(dm, mol, mybasis, ri0, ri1): ri1 (numpy.ndarray): Coordinates of second atom in Å. Returns: - list: Coefficients split by angular momentum quantum number [(l, coeff), ...].""" + list: Coefficients split by angular momentum quantum number [(l, coeff), ...]. + """ rm = (ri0+ri1)*0.5 atom = f"No {rm[0]} {rm[1]} {rm[2]}" auxmol = gto.M(atom=atom, basis=mybasis) @@ -274,7 +275,7 @@ def fit_dm(dm, mol, mybasis, ri0, ri1): def vec_from_cs(z, cs, lmax, idx): - """Rotates basis coefficients to bond axis and creates vectorized representation. + """Rotate basis coefficients to bond axis and creates vectorized representation. Applies Wigner D-matrix rotation to align coefficients with the bond vector (same as pretending the bond is along the z-axis), @@ -296,7 +297,7 @@ def vec_from_cs(z, cs, lmax, idx): def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): - """Computes bond representation for a specific atom pair. + """Compute bond representation for a specific atom pair. Extracts bond density, fits it with basis functions at the bond center, and symmetrizes the representation from both atom perspectives. @@ -333,7 +334,7 @@ def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): def repr_for_mol(mol, dm, qqs, M, mybasis, idx, maxlen, cutoff, only_z=None): - """Computes SPAHM(b) representations for all atoms in a molecule. + """Compute SPAHM(b) representations for all atoms in a molecule. Constructs bond-based atomic representations by summing contributions from all bonds of the same type within cutoff distance. diff --git a/qstack/spahm/rho/lowdin.py b/qstack/spahm/rho/lowdin.py index 0d761c61..23b4ff18 100644 --- a/qstack/spahm/rho/lowdin.py +++ b/qstack/spahm/rho/lowdin.py @@ -18,7 +18,7 @@ class Lowdin_split: dmL (numpy ndarray): Löwdin-orthogonalized density matrix. """ def __init__(self, mol, dm): - """Initializes Löwdin split with molecule and density matrix. + """Initialize Löwdin split with molecule and density matrix. Args: mol (pyscf Mole): pyscf Mole object. @@ -34,7 +34,7 @@ def __init__(self, mol, dm): self.dmL = S12 @ dm @ S12 def sqrtm(self, m): - """Computes matrix square root and inverse square root via eigendecomposition. + """Compute matrix square root and inverse square root via eigendecomposition. Args: m (numpy ndarray): Symmetric positive-definite matrix. @@ -49,7 +49,7 @@ def sqrtm(self, m): return (sm+sm.T)*0.5, (sm1+sm1.T)*0.5 def get_bond(self, at1idx, at2idx): - """Extracts bond density matrix for an atom pair. + """Extract bond density matrix for an atom pair. Isolates the density matrix components corresponding to interactions between two atoms, transforming back to AO basis. diff --git a/qstack/spahm/rho/sym.py b/qstack/spahm/rho/sym.py index e42c84b0..14867533 100644 --- a/qstack/spahm/rho/sym.py +++ b/qstack/spahm/rho/sym.py @@ -7,7 +7,7 @@ def c_split_atom(mol, c, only_i=None): - """Splits coefficient vector by angular momentum quantum number for each atom. + """Split coefficient vector by angular momentum quantum number for each atom. Organizes expansion coefficients into sublists grouped by angular momentum (l) for each atomic basis function. @@ -28,7 +28,7 @@ def c_split_atom(mol, c, only_i=None): def idxl0(i, l, ao): - """Returns index of basis function with same L and N quantum numbers but M=0. + """Return index of basis function with same L and N quantum numbers but M=0. Finds the m=0 component of the same angular momentum shell. @@ -47,7 +47,7 @@ def idxl0(i, l, ao): def get_S(q, basis): - """Computes overlap matrix and angular momentum info for an atom. + """Compute overlap matrix and angular momentum info for an atom. Creates single-atom molecule and extracts basis function structure. @@ -75,7 +75,7 @@ def get_S(q, basis): def store_pair_indices(ao): - """Stores basis function pair indices with matching L and M quantum numbers. + """Store basis function pair indices with matching L and M quantum numbers. Creates list of all (i,j) pairs where basis functions have identical angular momenta. @@ -95,7 +95,7 @@ def store_pair_indices(ao): def store_pair_indices_short(ao, ao_start): - """Stores basis function pair indices for m=0 components only. + """Store basis function pair indices for m=0 components only. Creates list of (i,j) pairs using only the first basis function (m=0) of each angular momentum shell, for compact representation. @@ -119,7 +119,7 @@ def store_pair_indices_short(ao, ao_start): def metric_matrix(q, idx, ao, S): - """Computes metric matrix for symmetrization of density fitting coefficients. + """Compute metric matrix for symmetrization of density fitting coefficients. Constructs metric matrix from overlap integrals of basis function pairs, normalized by angular momentum degeneracy (2l+1). Returns square root @@ -151,7 +151,7 @@ def metric_matrix(q, idx, ao, S): def metric_matrix_short(idx, ao, S): - """Computes metric matrix for symmetrization of short-format coefficients. + """Compute metric matrix for symmetrization of short-format coefficients. Args: idx (numpy ndarray): [i, j] basis function pair indices. @@ -243,7 +243,7 @@ def vectorize_c_short(idx, ao, c): def store_pair_indices_z(ao): - """Stores basis function pairs with matching |m| quantum numbers. + """Store basis function pairs with matching |m| quantum numbers. Creates list of all (i,j) pairs where basis functions have equal absolute values of magnetic quantum number m. @@ -264,7 +264,7 @@ def store_pair_indices_z(ao): def store_pair_indices_z_only0(ao): - """Stores basis function pairs restricted to m=0 components only. + """Store basis function pairs restricted to m=0 components only. Creates list of all (i,j) pairs where both basis functions have m=0. @@ -272,7 +272,8 @@ def store_pair_indices_z_only0(ao): ao (dict): Angular momentum info with 'l' and 'm' keys. Returns: - numpy ndarray: [i, j] index pairs where both m_i = m_j = 0.""" + numpy ndarray: [i, j] index pairs where both m_i = m_j = 0. + """ idx = [] for i, mi in enumerate(ao['m']): if mi!=0: @@ -285,7 +286,7 @@ def store_pair_indices_z_only0(ao): def metric_matrix_z(idx, ao, S): - """Computes metric matrix for z-axis symmetric representations. + """Compute metric matrix for z-axis symmetric representations. Constructs metric matrix accounting for m and -m degeneracy. Matrix elements are nonzero only when angular momenta match and m quantum @@ -297,7 +298,8 @@ def metric_matrix_z(idx, ao, S): S (numpy ndarray): Overlap matrix. Returns: - numpy ndarray: Square root of metric matrix for z-symmetric normalization.""" + numpy ndarray: Square root of metric matrix for z-symmetric normalization. + """ N = len(idx) A = np.zeros((N,N)) for p in range(N): diff --git a/qstack/spahm/rho/utils.py b/qstack/spahm/rho/utils.py index e1b828e8..b47a4d64 100644 --- a/qstack/spahm/rho/utils.py +++ b/qstack/spahm/rho/utils.py @@ -29,7 +29,7 @@ def get_chsp(fname, n): - """Loads charge and spin information from file. + """Load charge and spin information from file. Reads a file containing charge/spin values, converting 'None' strings to None objects. @@ -61,7 +61,7 @@ def chsp_converter(chsp): def load_mols(xyzlist, charge, spin, basis, printlevel=0, units='ANG', ecp=None, progress=False, srcdir=None): - """Loads molecules from XYZ files and creates pyscf Mole objects. + """Load molecules from XYZ files and creates pyscf Mole objects. Args: xyzlist (list): List of XYZ filenames. @@ -97,7 +97,7 @@ def load_mols(xyzlist, charge, spin, basis, printlevel=0, units='ANG', ecp=None, def mols_guess(mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=None, printlevel=0): - """Computes or loads guess density matrices for a list of molecules. + """Compute or loads guess density matrices for a list of molecules. Args: mols (list): List of pyscf Mole objects. @@ -153,7 +153,7 @@ def dm_open_mod(dm, omod): elif dm.ndim == 2: raise RuntimeError('Density matrix is closed-shell (2D) but omod is not None') if omod not in omod_fns_dict: - raise ValueError(f'unknown open-shell mode: must be in {list(omod_fns_dict.keys())}, None if the system is closed-shell') + raise NotImplementedError(f'unknown open-shell mode: must be in {list(omod_fns_dict.keys())}, None if the system is closed-shell') return omod_fns_dict[omod](dm) @@ -206,20 +206,23 @@ def load_reps(f_in, from_list=True, srcdir=None, with_labels=False, file_format=None): """Load representations from disk. - Args: - f_in (str): Path to the input file. - from_list (bool): If the input file is a text file containing a list of paths to the representations. - srcdir (str) : The path prefix to be at the begining of each file in `f_in`. Defaults to current working directory. - with_labels (bool): If return atom type labes along with the representations. - local (bool): If the representations are local (per-atom) or global (per-molecule). - sum_local (bool): Sums the local components into a global representation, only if local=True. - printlevel (int): Verbosity level. - progress (bool): If shows a progress bar. - file_format (dict): Structure of the input data, with keys=('is_labeled;, 'is_single'). - Defaults to structure auto determination (for "experienced users" only). - - Returns: - np.array with shape (N_representations, N_features), or a tuple containing a list of atomic labels and said np.array. + Args: + f_in (str): Path to the input file. + from_list (bool): If the input file is a text file containing a list of paths to the representations. + srcdir (str) : The path prefix to be at the begining of each file in `f_in`. Defaults to current working directory. + with_labels (bool): If return atom type labes along with the representations. + local (bool): If the representations are local (per-atom) or global (per-molecule). + sum_local (bool): Sums the local components into a global representation, only if local=True. + printlevel (int): Verbosity level. + progress (bool): If shows a progress bar. + file_format (dict): Structure of the input data, with keys=('is_labeled;, 'is_single'). + Defaults to structure auto determination (for "experienced users" only). + + Returns: + np.array with shape (N_representations, N_features), or a tuple containing a list of atomic labels and said np.array. + + Raises: + RuntimeError: In case of shape mismatch. """ if file_format is None: # Do not use mutable data structures for argument defaults file_format = {'is_labeled':None, 'is_single':None} diff --git a/qstack/tools.py b/qstack/tools.py index 2fcc06b9..0a1c76bb 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -1,6 +1,7 @@ """Utility functions and classes for Q-stack. -Provides decorators, argument parsers, and helper functions for command-line tools.""" +Provides decorators, argument parsers, and helper functions for command-line tools. +""" import os import time @@ -11,7 +12,7 @@ def unix_time_decorator(func): - """Decorator to measure and print execution time statistics for a function. + """Measure and print execution time statistics for a function. Measures real, user, and system time for the decorated function. Thanks to https://gist.github.com/turicas/5278558 @@ -35,7 +36,7 @@ def wrapper(*args, **kwargs): def unix_time_decorator_with_tvalues(func): - """Decorator to measure execution time statistics and return them along with function result. + """Measure execution time statistics and return them along with function result. Measures real, user, and system time for the decorated function and returns timing dict. Thanks to https://gist.github.com/turicas/5278558 @@ -76,7 +77,7 @@ class FlexParser(argparse.ArgumentParser): """ def remove_argument(self, arg): - """Removes an argument from the parser. + """Remove an argument from the parser. Utility method for customizing parsers by removing unwanted arguments from the pre-configured set. Useful when deriving specialized parsers. @@ -84,8 +85,8 @@ def remove_argument(self, arg): Args: arg (str): Option destination name. - Returns: - None: Modifies parser in place. + Output: + Modifies parser in place. """ for action in self._actions: opts = action.option_strings @@ -102,7 +103,7 @@ def remove_argument(self, arg): def slice_generator(iterable, inc=lambda x: x, i0=0): - """Generates slices for elements in an iterable based on increments. + """Generate slices for elements in an iterable based on increments. Args: iterable (iterable): Iterable of elements to generate slices for. @@ -150,8 +151,10 @@ def __init__(self, action='slicer', inc=lambda x: x, i0=0): def add(self, di): """Advances the cursor and returns the current range or slice. + Args: di: Element to determine increment size. + Returns: Current range or slice after advancing. """ diff --git a/ruff.toml b/ruff.toml index 14f13f8c..4b90ae4c 100644 --- a/ruff.toml +++ b/ruff.toml @@ -41,11 +41,15 @@ docstring-code-line-length = "dynamic" [lint] + + + select = [ "A", "E", "F", "B", "S", "COM", "C4", "EXE", "ICN", "PIE", "PLR1714", "ARG", "PERF", "FURB", "PLE", "TRY002", "W", "UP", "RUF", "SIM", "NPY", - #"FIX", "TD", "D", # later + "D", + #"FIX", "TD", # later ] ignore = [ "E741", # ambiguous variable name @@ -57,8 +61,53 @@ ignore = [ "SIM3", # yoda condition "SIM114", # combine if branches "SIM108", # use ternary operator +"D105", # missing docstring in magic method +"D107", # missing docstring in __init__ +"D203", # blank line required before class docstring +"D204", # blank line required after class docstring +"D213", # multi-line docstring summary should start at the second line +"D413", # missing blank line after last section ] +preview = true +extend-select = ["DOC"] +extend-ignore = [ + "E265", # 1 + "FURB103", # 1 + "FURB152", # 1 + "E266", # 2 + "E271", # 2 + "E305", # 2 + "FURB101", # 2 + "E262", # 3 + "E306", # 5 + "E228", # 6 + "E272", # 6 + "W391", # 6 + "RUF031", # 7 + "E261", # 12 + "E275", # 13 + "E251", # 18 + "E211", # 19 + "E303", # 24 + "E201", # 33 + "E202", # 34 + "E203", # 51 + "E221", # 113 + "E225", # 179 + "E241", # 268 + "E222", # 278 + "E231", # 608 + "E226", # 4313 + "FURB118", + "E117", + "E111", + "C419", +] + +#[lint.pydocstyle] +#convention = "google" + [lint.per-file-ignores] "qstack/spahm/rho/Dmatrix.py" = ["E702"] # multiple statements on one line (semicolon) "qstack/spahm/rho/bond.py" = ["E711"] # comparison to `None` for np.array elements @@ -67,4 +116,14 @@ ignore = [ "tests/*" = [ "S101", # use of assert "S306", # use of mktemp +"D", "DOC", +] +"qstack/mathutils/xyz_integrals_sym.py" = ["D417"] # missing argument descriptions +"qstack/mathutils/xyz_integrals_float.py" = ["D417"] # missing argument descriptions +"qstack/reorder.py" = ["DOC502"] # raised exception is not explicitly raised +"qstack/orcaio.py" = ["DOC502"] # raised exception is not explicitly raised +"qstack/spahm/rho/dmb_rep_atom.py" = [ +"DOC201", # `return` is not documented in docstring +"DOC102", # documented parameter is not in the function's signature +"D214", # section is over-indented ] From bd29fdbe0407d1ea589c7fc17cc332c55fcf7c82 Mon Sep 17 00:00:00 2001 From: Ksenia Date: Mon, 10 Nov 2025 19:34:25 +0100 Subject: [PATCH 22/23] Format according to preview ruff rules --- qstack/basis_opt/opt.py | 5 -- qstack/constants.py | 7 +- qstack/equio.py | 15 ++--- qstack/fields/dm.py | 1 - qstack/fields/dori.py | 2 +- qstack/fields/excited.py | 2 +- qstack/mathutils/wigner.py | 4 +- qstack/mathutils/xyz_integrals_float.py | 1 - qstack/orcaio.py | 2 - qstack/qml/b2r2.py | 2 +- qstack/regression/condition.py | 2 +- qstack/regression/cross_validate_results.py | 10 +-- qstack/regression/final_error.py | 2 +- qstack/regression/hyperparameters.py | 10 +-- qstack/regression/kernel.py | 2 +- qstack/regression/local_kernels.py | 3 +- qstack/regression/oos.py | 2 +- qstack/regression/regression.py | 6 +- qstack/reorder.py | 3 +- qstack/spahm/LB2020guess.py | 21 ++---- qstack/spahm/compute_spahm.py | 3 +- qstack/spahm/guesses.py | 5 +- qstack/spahm/rho/Dmatrix.py | 5 +- qstack/spahm/rho/bond.py | 1 + qstack/spahm/rho/compute_rho_spahm.py | 5 +- qstack/spahm/rho/dmb_rep_atom.py | 1 - qstack/spahm/rho/dmb_rep_bond.py | 2 +- qstack/spahm/rho/sym.py | 4 +- qstack/spahm/rho/utils.py | 8 +-- qstack/tools.py | 2 +- ruff.toml | 48 +++---------- tests/test_c2mio.py | 7 +- tests/test_dori.py | 1 + tests/test_equio.py | 19 +++--- tests/test_excited.py | 16 ++--- tests/test_fitting.py | 8 +-- tests/test_global.py | 25 ++++--- tests/test_kernels.py | 20 +++--- tests/test_molden.py | 2 +- tests/test_moments.py | 22 +++--- tests/test_opt.py | 7 +- tests/test_orca.py | 7 +- tests/test_regression.py | 10 +-- tests/test_reorder.py | 16 ++--- tests/test_rxn-repr.py | 9 ++- tests/test_slatm.py | 4 +- tests/test_spahm.py | 14 ++-- tests/test_spahm_a.py | 52 ++++++++------ tests/test_spahm_b.py | 30 +++++---- tests/test_spahm_b_selected.py | 3 +- tests/test_spahm_grad.py | 12 ++-- tests/test_splitting.py | 13 ++-- tests/test_utils.py | 75 ++++++++++++--------- 53 files changed, 276 insertions(+), 282 deletions(-) diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index f0140743..75f0d443 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -43,7 +43,6 @@ def energy(x): E += qbbt.energy_mol(newbasis, m) return E - def gradient(x): """Compute total loss function (fitting error) and gradient for given exponents. @@ -76,7 +75,6 @@ def gradient(x): dE_dx = dE_da * exponents return E, dE_dx - def gradient_only(x): """Compute only the gradient of the loss function (wrapper for optimization algorithms). @@ -88,7 +86,6 @@ def gradient_only(x): """ return gradient(x)[1] - def read_bases(basis_files): """Read basis set definitions from files or dicts. @@ -117,7 +114,6 @@ def read_bases(basis_files): basis.update(i) return basis - def make_bf_start(): """Create basis function index bounds for each element. @@ -131,7 +127,6 @@ def make_bf_start(): bf_bounds[q] = [start, start+nbf[i]] return bf_bounds - def make_moldata(fname): """Create molecular data dictionary from file or dict. diff --git a/qstack/constants.py b/qstack/constants.py index 5f3affc2..c9085d51 100644 --- a/qstack/constants.py +++ b/qstack/constants.py @@ -3,10 +3,13 @@ https://physics.nist.gov/cuu/Constants/ https://physics.nist.gov/cuu/Constants/Table/allascii.txt """ +import math + + # Constants SPEED_LIGHT = 299792458.0 PLANCK = 6.62607004e-34 -HBAR = PLANCK/(2*3.141592653589793) +HBAR = PLANCK/(2*math.pi) FUND_CHARGE = 1.6021766208e-19 MOL_NA = 6.022140857e23 MASS_E = 9.10938356e-31 @@ -20,4 +23,4 @@ BOHR2ANGS = 0.52917721092 # Angstroms HARTREE2J = HBAR**2/(MASS_E*(BOHR2ANGS*1e-10)**2) HARTREE2EV = 27.21138602 -AU2DEBYE = FUND_CHARGE * BOHR2ANGS*1e-10 / DEBYE # 2.541746 +AU2DEBYE = FUND_CHARGE * BOHR2ANGS*1e-10 / DEBYE # 2.541746 diff --git a/qstack/equio.py b/qstack/equio.py index aa025aee..35d2adf2 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -28,7 +28,6 @@ _molid_name = 'mol_id' - def _get_llist(mol): """Get list of angular momentum quantum numbers for basis functions of each element of a molecule. @@ -50,7 +49,7 @@ def _get_tsize(tensor): Returns: int: Total size of the tensor (total number of elements). """ - return sum([np.prod(tensor.block(key).values.shape) for key in tensor.keys]) + return sum(np.prod(tensor.block(key).values.shape) for key in tensor.keys) def _labels_to_array(labels): @@ -120,11 +119,11 @@ def vector_to_tensormap(mol, c): if llists[q]==sorted(llists[q]): for l in set(llists[q]): msize = 2*l+1 - nsize = blocks[(l,q)].shape[-1] + nsize = blocks[l,q].shape[-1] cslice = c[i(nsize*msize)].reshape(nsize,msize).T if l==1: # for l=1, the pyscf order is x,y,z (1,-1,0) cslice = cslice[pyscf2gpr_l1_order] - blocks[(l,q)][iq[q],:,:] = cslice + blocks[l,q][iq[q],:,:] = cslice else: il = dict.fromkeys(range(max(llists[q]) + 1), 0) for l in llists[q]: @@ -132,7 +131,7 @@ def vector_to_tensormap(mol, c): cslice = c[i(msize)] if l==1: # for l=1, the pyscf order is x,y,z (1,-1,0) cslice = cslice[pyscf2gpr_l1_order] - blocks[(l,q)][iq[q],:,il[l]] = cslice + blocks[l,q][iq[q],:,il[l]] = cslice il[l] += 1 iq[q] += 1 @@ -256,7 +255,7 @@ def matrix_to_tensormap(mol, dm): dmslice = np.transpose(dmslice, axes=[1,3,0,2]).reshape(msize1,msize2,-1) block = tensor_blocks[tm_label_vals.index((l1,l2,q1,q2))] at_p = block.samples.position((iat1,iat2)) - blocks[(l1,l2,q1,q2)][at_p,:,:,:] = dmslice + blocks[l1,l2,q1,q2][at_p,:,:,:] = dmslice iq2[q2] += 1 iq1[q1] += 1 else: @@ -275,7 +274,7 @@ def matrix_to_tensormap(mol, dm): block = tensor_blocks[tm_label_vals.index((l1, l2, q1, q2))] at_p = block.samples.position((iat1, iat2)) n_p = block.properties.position((il1[l1], il2[l2])) - blocks[(l1,l2,q1,q2)][at_p,:,:,n_p] = dmslice + blocks[l1,l2,q1,q2][at_p,:,:,n_p] = dmslice il2[l2] += 1 iq2[q2] += 1 il1[l1] += 1 @@ -486,7 +485,7 @@ def split(tensor): continue sampleidx = [t[0] for t in samples] samplelbl = [t[1] for t in samples] - #sampleidx = [block.samples.position(lbl) for lbl in samplelbl] + # sampleidx = [block.samples.position(lbl) for lbl in samplelbl] blocks[key] = block.values[sampleidx] block_samp_labels[key] = metatensor.Labels(tensor.sample_names[1:], np.array(samplelbl)[:,1:]) diff --git a/qstack/fields/dm.py b/qstack/fields/dm.py index 2b77c863..90651f13 100644 --- a/qstack/fields/dm.py +++ b/qstack/fields/dm.py @@ -103,4 +103,3 @@ def sphericalize_density_matrix(mol, dm): spherical_dm[idx+m,jdx+m] = trace / msize return spherical_dm - diff --git a/qstack/fields/dori.py b/qstack/fields/dori.py index 76abddfa..e911802f 100644 --- a/qstack/fields/dori.py +++ b/qstack/fields/dori.py @@ -44,7 +44,7 @@ def eval_rho_dm(mol, ao, dm, deriv=2): if deriv==2: DM_dAO_dr_i = 2 * _dot_ao_dm(mol, dAO_dr[i], dm, None, None, None) for j in range(i, 3): - d2rho_dr2[i,j] = _contract_rho(dAO_dr[j], DM_dAO_dr_i) + 2.0*np.einsum('ip,ip->i', d2AO_dr2[triu_idx[(i,j)]], DM_AO) + d2rho_dr2[i,j] = _contract_rho(dAO_dr[j], DM_dAO_dr_i) + 2.0*np.einsum('ip,ip->i', d2AO_dr2[triu_idx[i,j]], DM_AO) d2rho_dr2[j,i] = d2rho_dr2[i,j] if deriv==1: diff --git a/qstack/fields/excited.py b/qstack/fields/excited.py index c52bb771..868a323e 100644 --- a/qstack/fields/excited.py +++ b/qstack/fields/excited.py @@ -120,7 +120,7 @@ def exciton_properties_dm(mol, hole, part): dist = np.linalg.norm(hole_r-part_r) hole_extent = np.sqrt(hole_r2-hole_r@hole_r) part_extent = np.sqrt(part_r2-part_r@part_r) - return(dist, hole_extent, part_extent) + return dist, hole_extent, part_extent def exciton_properties(mol, hole, part): diff --git a/qstack/mathutils/wigner.py b/qstack/mathutils/wigner.py index dc5037a8..7b822945 100755 --- a/qstack/mathutils/wigner.py +++ b/qstack/mathutils/wigner.py @@ -56,7 +56,7 @@ def get_polynom_Y(l, m): r = Symbol('r', nonnegative=True) expr = real_Y_correct_phase(l,m, theta, phi) * r**l expr = expand(expr, func=True) - expr = expr.rewrite(sp.cos)#.simplify().trigsimp() + expr = expr.rewrite(sp.cos) # .simplify().trigsimp() expr = expand_trig(expr) expr = cancel(expr) expr = expr.subs({r: sp.sqrt(x*x+y*y+z*z), phi: sp.atan2(y,x), theta: sp.atan2(sp.sqrt(x*x+y*y),z)}) @@ -139,7 +139,6 @@ def compute_wigner(lmax): # rotated spherical harmonic Y_rot[l][m] = Y[l][m].subs({x: x1, y:y1, z:z1}).subs({x1:xx*x+xy*y+xz*z, y1:yx*x+yy*y+yz*z, z1:zx*x+zy*y+zz*z}) - D = [zeros(2*l+1,2*l+1) for l in range(lmax+1)] integrals_xyz_dict = {} for l in range(lmax+1): @@ -159,4 +158,3 @@ def compute_wigner(lmax): D = compute_wigner(lmax) print_wigner(D) - diff --git a/qstack/mathutils/xyz_integrals_float.py b/qstack/mathutils/xyz_integrals_float.py index 26374f57..f0b4b2f4 100755 --- a/qstack/mathutils/xyz_integrals_float.py +++ b/qstack/mathutils/xyz_integrals_float.py @@ -70,4 +70,3 @@ def trinomial(k1, k2, k3): if __name__ == "__main__": k,n,m = map(int, sys.argv[1:4]) print(f"{xyz(k,n,m):.15f} π") - diff --git a/qstack/orcaio.py b/qstack/orcaio.py index 184ce968..a39ade1c 100644 --- a/qstack/orcaio.py +++ b/qstack/orcaio.py @@ -91,7 +91,6 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re else: dm = np.fromfile(path[0], offset=8, count=mol.nao*mol.nao*nspin).reshape((nspin,mol.nao,mol.nao)) - is_def2 = 'def2' in pyscf.gto.basis._format_basis_name(mol.basis) has_3d = np.any([21 <= pyscf.data.elements.charge(q) <= 30 for q in mol.elements]) if is_def2 and has_3d: @@ -277,4 +276,3 @@ def read_gbw(mol, fname, reorder_dest='pyscf', sort_l=True): if reorder_dest is not None: reorder_coeff_inplace(c, mol, reorder_dest, ls if (ls and sort_l) else None) return c, e, occ - diff --git a/qstack/qml/b2r2.py b/qstack/qml/b2r2.py index 7e498cb1..6a27b14e 100644 --- a/qstack/qml/b2r2.py +++ b/qstack/qml/b2r2.py @@ -177,7 +177,7 @@ def get_b2r2_a_molecular(ncharges, coords, elements, coords_b = coords[j] R = np.linalg.norm(coords_b - coords_a) if R < rcut: - twobodyrep[bag_idx[(ncharge_a, ncharge_b)]] += get_gaussian(grid, R) + twobodyrep[bag_idx[ncharge_a, ncharge_b]] += get_gaussian(grid, R) twobodyrep = 2.0*np.concatenate(twobodyrep) return twobodyrep diff --git a/qstack/regression/condition.py b/qstack/regression/condition.py index b1cedb03..e029d659 100644 --- a/qstack/regression/condition.py +++ b/qstack/regression/condition.py @@ -62,7 +62,7 @@ def main(): parser.remove_argument('train_size') args = parser.parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) c = condition(X, read_kernel=args.readk, sigma=args.sigma, eta=args.eta, diff --git a/qstack/regression/cross_validate_results.py b/qstack/regression/cross_validate_results.py index 034b2dcf..50dc3bd1 100644 --- a/qstack/regression/cross_validate_results.py +++ b/qstack/regression/cross_validate_results.py @@ -79,8 +79,8 @@ def cv_results(X, y, np.save(f"{preffix}_{n_rep}-lc-runs.npy", lc_runs) if save_pred: np_pred = np.array(predictions_n) - ##### Can not take means !!! Test-set varies with run ! - ##### pred_mean = np.concatenate([np_pred.mean(axis=0),np_pred.std(axis=0)[1].reshape((1,-1))], axis=0) + # Can not take means !!! Test-set varies with run ! + # pred_mean = np.concatenate([np_pred.mean(axis=0),np_pred.std(axis=0)[1].reshape((1,-1))], axis=0) pred_mean = np.concatenate([*np_pred.reshape((n_rep, 2, -1))], axis=0) np.savetxt(f"{preffix}_{n_rep}-predictions.txt", pred_mean.T) return lc @@ -95,14 +95,14 @@ def main(): parser.add_argument('--save-pred', action='store_true', dest='save_pred', default=False, help='if save test-set prediction') args = parser.parse_args() - if(args.readk): + if args.readk: args.sigma = [np.nan] - if(args.ll): + if args.ll: correct_num_threads() + print(vars(args)) X = np.load(args.repr) y = np.loadtxt(args.prop) - print(vars(args)) final = cv_results(X, y, sigmaarr=args.sigma, etaarr=args.eta, gdict=args.gdict, gkernel=args.gkernel, akernel=args.akernel, read_kernel=args.read_kernel, diff --git a/qstack/regression/final_error.py b/qstack/regression/final_error.py index 67e35d9a..0cf2ed23 100644 --- a/qstack/regression/final_error.py +++ b/qstack/regression/final_error.py @@ -82,7 +82,7 @@ def main(): parser.add_argument('--save-alpha', type=str, dest='save_alpha', default=None, help='file to write the regression coefficients to') args = parser.parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) y = np.loadtxt(args.prop) diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py index 2e682c9e..9d86af91 100644 --- a/qstack/regression/hyperparameters.py +++ b/qstack/regression/hyperparameters.py @@ -125,9 +125,9 @@ def hyper_loop(sigma, eta): # at the 1st iteration if is checked twice on purpose if direction=='up' and best_sigma==max(work_sigma): - new_sigma = best_sigma*np.array(defaults.sigmaarr_mult[1:]) + new_sigma = best_sigma*np.array(defaults.sigmaarr_mult[1:]) elif direction=='down' and best_sigma==min(work_sigma): - new_sigma = best_sigma/np.array(defaults.sigmaarr_mult[1:]) + new_sigma = best_sigma/np.array(defaults.sigmaarr_mult[1:]) if new_sigma is None: break @@ -142,11 +142,11 @@ def main(): parser.remove_argument("random_state") parser.remove_argument("train_size") args = parser.parse_args() - if(args.readk): + if args.readk: args.sigma = [np.nan] - print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() + print(vars(args)) X = np.load(args.repr) y = np.loadtxt(args.prop) diff --git a/qstack/regression/kernel.py b/qstack/regression/kernel.py index af91346b..9f2685e5 100644 --- a/qstack/regression/kernel.py +++ b/qstack/regression/kernel.py @@ -41,7 +41,7 @@ def main(): parser.add_argument('--dir', type=str, dest='dir', default='./', help='directory to save the output in') args = parser.parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() if os.path.isfile(args.repr): X = np.load(args.repr) diff --git a/qstack/regression/local_kernels.py b/qstack/regression/local_kernels.py index bb076b2f..69aa7130 100644 --- a/qstack/regression/local_kernels.py +++ b/qstack/regression/local_kernels.py @@ -31,6 +31,7 @@ def custom_laplacian_kernel(X, Y, gamma): """ if X.shape[1:] != Y.shape[1:]: raise RuntimeError(f"Incompatible shapes {X.shape} and {Y.shape}") + def cdist(X, Y): K = np.zeros((len(X),len(Y))) for i,x in enumerate(X): @@ -147,7 +148,7 @@ def local_laplacian_kernel_wrapper(X, Y, gamma): X, Y = np.asarray(X), np.asarray(Y) if X.shape[1:] != Y.shape[1:]: raise RuntimeError(f"Incompatible shapes {X.shape} and {Y.shape}") - if X.ndim==1: # do not extend so the behavior is the same for 'L' and 'L_custom_py' + if X.ndim==1: # do not extend so the behavior is the same for 'L' and 'L_custom_py' raise RuntimeError("Dimensionality of X should be > 1") if X.ndim>2: diff --git a/qstack/regression/oos.py b/qstack/regression/oos.py index 3f64ed00..eca53cbf 100644 --- a/qstack/regression/oos.py +++ b/qstack/regression/oos.py @@ -54,7 +54,7 @@ def main(): parser.add_argument('--alpha', type=str, dest='alpha', required=True, help='path to the regression weights file') args = parser.parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) X_oos = np.load(args.x_oos) diff --git a/qstack/regression/regression.py b/qstack/regression/regression.py index b38e71b6..572c3cd1 100644 --- a/qstack/regression/regression.py +++ b/qstack/regression/regression.py @@ -58,7 +58,7 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, else: if read_kernel: raise RuntimeError('Cannot do FPS with kernels') - sparse_idx = do_fps(X_train)[0][:sparse] # indices within the training set + sparse_idx = do_fps(X_train)[0][:sparse] # indices within the training set if debug: # Ensures reproducibility of the sample selection for each train_size over repetitions (n_rep) @@ -71,7 +71,7 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, size_train = int(np.floor(len(y_train)*size)) if size <= 1.0 else size maes = [] for _rep in range(n_rep): - train_idx = rng.choice(all_indices_train, size = size_train, replace=False) + train_idx = rng.choice(all_indices_train, size=size_train, replace=False) y_kf_train = y_train[train_idx] if not sparse: @@ -97,7 +97,7 @@ def main(): parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='enable debug') args = parser.parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) y = np.loadtxt(args.prop) diff --git a/qstack/reorder.py b/qstack/reorder.py index 8c1e3fe8..5c50ba75 100644 --- a/qstack/reorder.py +++ b/qstack/reorder.py @@ -51,7 +51,7 @@ def _orca2gpr_idx(l_slices, m): idx[s] = np.concatenate((idx[s][::-2], idx[s][1::2])) signs = np.ones_like(idx) signs[np.where(np.abs(m)>=3)] = -1 # in pyscf order - signs[idx] = signs # in orca order + signs[idx] = signs # in orca order return idx, signs @@ -131,4 +131,3 @@ def get_idx(L, m, convention): newvector *= sign_dest return newvector - diff --git a/qstack/spahm/LB2020guess.py b/qstack/spahm/LB2020guess.py index 78f5218b..3227d562 100644 --- a/qstack/spahm/LB2020guess.py +++ b/qstack/spahm/LB2020guess.py @@ -22,7 +22,6 @@ def __init__(self, fname=None, parameters='HF'): self.init_data() self.get_basis(fname, parameters) - def renormalize(self, a): r"""Compute renormalization factor for Gaussian basis functions. @@ -42,7 +41,6 @@ def renormalize(self, a): x = np.sqrt(np.sqrt(0.5*a/np.pi)) return x*x*x - def read_ac(self, fname): """Read auxiliary basis parameters from file. @@ -70,7 +68,6 @@ def read_ac(self, fname): basis[data.elements.ELEMENTS[q]] = qbasis return basis - def add_caps(self, basis): """Add cap (diffuse) functions to the auxiliary basis. @@ -87,7 +84,6 @@ def add_caps(self, basis): basis[qname].append( [0, [a, self.renormalize(a) ]] ) return - def get_basis(self, fname, parameters): """Initialize auxiliary basis set from file or predefined parameters. @@ -112,7 +108,6 @@ def get_basis(self, fname, parameters): self.acbasis = self._hfs_basis self.parameters = 'HFS' - def use_charge(self, mol): """Adjust basis coefficients based on molecular charge. @@ -132,7 +127,6 @@ def use_charge(self, mol): acbasis[q][-1][1][1] *= factor return acbasis - def use_ecp(self, mol, acbasis): """Adjust basis set to account for effective core potentials (ECP). @@ -176,7 +170,6 @@ def use_ecp(self, mol, acbasis): acbasis[q].pop(i) return acbasis - def get_auxweights(self, auxmol): """Extract auxiliary basis weights from the basis. @@ -198,7 +191,6 @@ def get_auxweights(self, auxmol): iao+=1 return w - def merge_caps(self, w, eri3c): """Contracts 3-center integrals with auxiliary basis weights. @@ -214,7 +206,6 @@ def merge_caps(self, w, eri3c): """ return np.einsum('...i,i->...', eri3c, w) - def get_eri3c(self, mol, auxmol): """Compute 3-center electron repulsion integrals. @@ -231,7 +222,6 @@ def get_eri3c(self, mol, auxmol): eri3c = pmol.intor('int3c2e_sph', shls_slice=shls_slice) return eri3c - def check_coefficients(self, mol, acbasis): """Validate that auxiliary basis coefficients sum to correct total charge. @@ -250,7 +240,6 @@ def check_coefficients(self, mol, acbasis): if not np.isclose(ch1, ch2): raise RuntimeError("Coefficients corrupted after adding ECP") - def HLB20(self, mol): """Compute the LB2020 effective potential matrix. @@ -268,7 +257,6 @@ def HLB20(self, mol): auxw = self.get_auxweights(auxmol) return self.merge_caps(auxw, eri3c) - def Heff(self, mol): """Construct one-electron Hamiltonian for initial guess. @@ -285,7 +273,6 @@ def Heff(self, mol): self.H = self.Hcore + self.HLB20(mol) return self.H - def HLB20_ints_generator(self, mol, auxmol): """Create generator for LB2020 potential gradients. @@ -302,10 +289,11 @@ def HLB20_ints_generator(self, mol, auxmol): """ pmol = mol + auxmol shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) - eri3c2e_ip1 = pmol.intor('int3c2e_ip1', shls_slice=shls_slice) # (nabla \, \| \) - eri3c2e_ip2 = pmol.intor('int3c2e_ip2', shls_slice=shls_slice) # ( \, \| nabla\) + eri3c2e_ip1 = pmol.intor('int3c2e_ip1', shls_slice=shls_slice) # (nabla \, \| \) + eri3c2e_ip2 = pmol.intor('int3c2e_ip2', shls_slice=shls_slice) # ( \, \| nabla\) aoslices = mol.aoslice_by_atom()[:,2:] auxaoslices = auxmol.aoslice_by_atom()[:,2:] + def HLB20_ints_deriv(iat): p0, p1 = aoslices[iat] P0, P1 = auxaoslices[iat] @@ -316,7 +304,6 @@ def HLB20_ints_deriv(iat): return -eri3c2e_ip return HLB20_ints_deriv - def HLB20_generator(self, mol): """Create generator for LB2020 potential gradient contributions. @@ -333,11 +320,11 @@ def HLB20_generator(self, mol): auxmol = df.make_auxmol(mol, acbasis) eri3c = self.HLB20_ints_generator(mol, auxmol) auxw = self.get_auxweights(auxmol) + def HLB20_deriv(iat): return self.merge_caps(auxw, eri3c(iat)) return HLB20_deriv - def init_data(self): """Set parameters. diff --git a/qstack/spahm/compute_spahm.py b/qstack/spahm/compute_spahm.py index 998d3fe4..900cf8b4 100644 --- a/qstack/spahm/compute_spahm.py +++ b/qstack/spahm/compute_spahm.py @@ -64,10 +64,11 @@ def ext_field_generator(mol, field): """ shls_slice = (0, mol.nbas, 0, mol.nbas) with mol.with_common_orig((0,0,0)): - int1e_irp = mol.intor('int1e_irp', shls_slice=shls_slice).reshape(3, 3, mol.nao, mol.nao) # ( | rc nabla | ) + int1e_irp = mol.intor('int1e_irp', shls_slice=shls_slice).reshape(3, 3, mol.nao, mol.nao) # ( | rc nabla | ) aoslices = mol.aoslice_by_atom()[:,2:] if field is None: field = (0,0,0) + def field_deriv(iat): p0, p1 = aoslices[iat] dmu_dr = np.zeros_like(int1e_irp) # dim(mu)×dim(r)×nao×nao diff --git a/qstack/spahm/guesses.py b/qstack/spahm/guesses.py index cfbee972..3314d9a7 100644 --- a/qstack/spahm/guesses.py +++ b/qstack/spahm/guesses.py @@ -77,8 +77,8 @@ def SAD(mol, xc): else: fock = hc + vhf[0] if not np.array_equal(vhf[0], vhf[1]): - msg = f'The effective potential ({xc}) returned different alpha and beta matrix components from atomicHF DM' - warnings.warn(msg, RuntimeWarning, stacklevel=2) + msg = f'The effective potential ({xc}) returned different alpha and beta matrix components from atomicHF DM' + warnings.warn(msg, RuntimeWarning, stacklevel=2) return fock @@ -264,6 +264,7 @@ def LB_grad(mf): """ hcore_grad = mf.hcore_generator(mf.mol) HLB_grad = LB20().HLB20_generator(mf.mol) + def H_grad(iat): return hcore_grad(iat) + HLB_grad(iat) return H_grad diff --git a/qstack/spahm/rho/Dmatrix.py b/qstack/spahm/rho/Dmatrix.py index e9c7522a..755c7585 100644 --- a/qstack/spahm/rho/Dmatrix.py +++ b/qstack/spahm/rho/Dmatrix.py @@ -61,7 +61,7 @@ def new_xy_axis(z): i = np.argmin(abs(z)) # find the axis with the minimal projection of the vector z x = -z[i] * z x[i] += 1.0 # create a vector orthogonal to z with dominant component i - x /= np.sqrt(1.0-z[i]*z[i]) # normalize + x /= np.sqrt(1.0-z[i]*z[i]) # normalize y = np.cross(z,x) return np.array([x,y,z]) @@ -113,7 +113,7 @@ def Dmatrix(xyz, lmax, order='xyz'): D[1][l+ 1,l+ -1] = xy D[1][l+ 1,l+ 0] = xz D[1][l+ 1,l+ 1] = xx - elif order=='xyz': # 1 -1 0 + elif order=='xyz': # 1 -1 0 D[1][ 0, 0] = xx D[1][ 0, 1] = xy D[1][ 0, 2] = xz @@ -315,4 +315,3 @@ def Dmatrix_for_z(z, lmax, order='xyz'): list: List of Wigner D-matrices for l=0 to lmax. """ return Dmatrix(new_xy_axis(z), lmax, order) - diff --git a/qstack/spahm/rho/bond.py b/qstack/spahm/rho/bond.py index c745458c..e332a495 100644 --- a/qstack/spahm/rho/bond.py +++ b/qstack/spahm/rho/bond.py @@ -74,5 +74,6 @@ def main(args=None): else: np.save(args.name_out + mod_suffix, modvec) + if __name__ == "__main__": main() diff --git a/qstack/spahm/rho/compute_rho_spahm.py b/qstack/spahm/rho/compute_rho_spahm.py index 627baa45..234d632a 100644 --- a/qstack/spahm/rho/compute_rho_spahm.py +++ b/qstack/spahm/rho/compute_rho_spahm.py @@ -60,7 +60,7 @@ def spahm_a_b(rep_type, mols, dms, elements=elements, cutoff=cutoff, pairfile=pairfile, dump_and_exit=dump_and_exit, same_basis=same_basis) qqs = qqs0 if zeros else qqs4q - maxlen = max([dmbb.bonds_dict_init(qqs[q0], M)[1] for q0 in elements]) + maxlen = max(dmbb.bonds_dict_init(qqs[q0], M)[1] for q0 in elements) elif rep_type == 'atom': if elements is None: elements = set() @@ -146,7 +146,7 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= else: all_atoms = [mol.elements for mol in mols] - spin = np.array(spin) ## a bit dirty but couldn't find a better way to ensure Iterable type! + spin = np.array(spin) # a bit dirty but couldn't find a better way to ensure Iterable type! if (spin == None).all(): omods = [None] @@ -279,5 +279,6 @@ def main(args=None): else: np.save(args.name_out + mod_suffix, modvec) + if __name__ == "__main__": main() diff --git a/qstack/spahm/rho/dmb_rep_atom.py b/qstack/spahm/rho/dmb_rep_atom.py index 7ff7360d..b6432627 100644 --- a/qstack/spahm/rho/dmb_rep_atom.py +++ b/qstack/spahm/rho/dmb_rep_atom.py @@ -92,7 +92,6 @@ def df_occup(mol, dm, auxbasis, only_i=None): c = fields.decomposition.correct_N_atomic(auxmol, Q, c0, metric=eri2c) return sym.c_split_atom(auxmol, c, only_i=only_i) - def maxlen_long(idx, _): return sum(len(v) for v in idx.values()) diff --git a/qstack/spahm/rho/dmb_rep_bond.py b/qstack/spahm/rho/dmb_rep_bond.py index a47e66e6..4f26b635 100644 --- a/qstack/spahm/rho/dmb_rep_bond.py +++ b/qstack/spahm/rho/dmb_rep_bond.py @@ -327,7 +327,7 @@ def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): dm1 = L.get_bond(i0, i1) bname = make_bname(q0, q1) cs = fit_dm(dm1, L.mol, mybasis[bname], r0, r1) - lmax = max([c[0] for c in cs]) + lmax = max(c[0] for c in cs) v0 = vec_from_cs(+z, cs, lmax, idx[bname]) v1 = vec_from_cs(-z, cs, lmax, idx[bname]) return [v0, v1], bname diff --git a/qstack/spahm/rho/sym.py b/qstack/spahm/rho/sym.py index 14867533..a1aad895 100644 --- a/qstack/spahm/rho/sym.py +++ b/qstack/spahm/rho/sym.py @@ -142,7 +142,7 @@ def metric_matrix(q, idx, ao, S): i1, j1 = idx[p1] l = ao['l'][i] l1 = ao['l'][i1] - if(l!=l1): + if l!=l1: continue A[p1,p] = A[p,p1] = 1.0/(2*l+1) \ * S[idxl0(i, l, ao[q]), idxl0(i1, l, ao[q])] \ @@ -169,7 +169,7 @@ def metric_matrix_short(idx, ao, S): i1,j1 = idx[p1] l = ao['l'][i] l1 = ao['l'][i1] - if(l!=l1): + if l!=l1: continue A[p1,p] = A[p,p1] = S[i,i1] * S[j,j1] / (2*l+1) return sqrtm(A) diff --git a/qstack/spahm/rho/utils.py b/qstack/spahm/rho/utils.py index b47a4d64..b0e7c9bb 100644 --- a/qstack/spahm/rho/utils.py +++ b/qstack/spahm/rho/utils.py @@ -53,7 +53,7 @@ def chsp_converter(chsp): return np.full(n, None, dtype=object) if os.path.isfile(fname): chsp = np.loadtxt(fname, dtype=object, converters=chsp_converter, encoding=None) - if(len(chsp)!=n): + if len(chsp)!=n: raise RuntimeError(f'Wrong length of the file {fname}') else: raise RuntimeError(f"{fname} can not be found") @@ -194,7 +194,7 @@ def check_data_struct(fin, local=False): is_labeled = False if not local and x.ndim == 1: is_single = True - elif x.shape[1] != 2: ## could be problematic! (if it's a set of local representations and nfeatures = 2 !! + elif x.shape[1] != 2: # could be problematic! (if it's a set of local representations and nfeatures = 2 !! is_single=True else: is_single = False @@ -266,10 +266,10 @@ def load_reps(f_in, from_list=True, srcdir=None, with_labels=False, else: reps.extend(x) else: - if is_labeled: + if is_labeled: reps.append(x[1]) labels.extend(x[0]) - else: + else: reps.append(x) try: reps = np.array(reps, dtype=float) diff --git a/qstack/tools.py b/qstack/tools.py index 0a1c76bb..11e30bd9 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -150,7 +150,7 @@ def __init__(self, action='slicer', inc=lambda x: x, i0=0): self.actions_dict = {'slicer': self._slicer, 'ranger': self._ranger} def add(self, di): - """Advances the cursor and returns the current range or slice. + """Advance the cursor and return the current range or slice. Args: di: Element to determine increment size. diff --git a/ruff.toml b/ruff.toml index 4b90ae4c..953699ed 100644 --- a/ruff.toml +++ b/ruff.toml @@ -42,8 +42,6 @@ docstring-code-line-length = "dynamic" [lint] - - select = [ "A", "E", "F", "B", "S", "COM", "C4", "EXE", "ICN", "PIE", "PLR1714", "ARG", @@ -69,45 +67,20 @@ ignore = [ "D413", # missing blank line after last section ] -preview = true +#preview = true + extend-select = ["DOC"] extend-ignore = [ - "E265", # 1 - "FURB103", # 1 - "FURB152", # 1 - "E266", # 2 - "E271", # 2 - "E305", # 2 - "FURB101", # 2 - "E262", # 3 - "E306", # 5 - "E228", # 6 - "E272", # 6 - "W391", # 6 - "RUF031", # 7 - "E261", # 12 - "E275", # 13 - "E251", # 18 - "E211", # 19 - "E303", # 24 - "E201", # 33 - "E202", # 34 - "E203", # 51 - "E221", # 113 - "E225", # 179 - "E241", # 268 - "E222", # 278 - "E231", # 608 - "E226", # 4313 - "FURB118", - "E117", - "E111", - "C419", +# pathlib + "FURB101", "FURB103", +# whitespaces + "E201", "E202", "E203", "E211", + "E221", "E222", "E225", "E226", "E228", + "E231", "E241", "E271", "E272", +# lambda + "FURB118", ] -#[lint.pydocstyle] -#convention = "google" - [lint.per-file-ignores] "qstack/spahm/rho/Dmatrix.py" = ["E702"] # multiple statements on one line (semicolon) "qstack/spahm/rho/bond.py" = ["E711"] # comparison to `None` for np.array elements @@ -122,6 +95,7 @@ extend-ignore = [ "qstack/mathutils/xyz_integrals_float.py" = ["D417"] # missing argument descriptions "qstack/reorder.py" = ["DOC502"] # raised exception is not explicitly raised "qstack/orcaio.py" = ["DOC502"] # raised exception is not explicitly raised +"qstack/equio.py" = ["E251"] # unexpected spaces around keyword / parameter equals "qstack/spahm/rho/dmb_rep_atom.py" = [ "DOC201", # `return` is not documented in docstring "DOC102", # documented parameter is not in the function's signature diff --git a/tests/test_c2mio.py b/tests/test_c2mio.py index f0100b21..93d41277 100755 --- a/tests/test_c2mio.py +++ b/tests/test_c2mio.py @@ -3,15 +3,16 @@ import os from qstack.c2mio import get_cell, get_mol, get_ligand + def test_c2mio(): path = os.path.dirname(os.path.realpath(__file__)) - cell = get_cell(f'{path}/data/cell2mol/YOXKUS.cif', workdir=f'{path}/data/cell2mol/') #cell = get_cell('Cell_yoxkus.cell', workdir='.') - #print(cell.moleclist) + cell = get_cell(f'{path}/data/cell2mol/YOXKUS.cif', workdir=f'{path}/data/cell2mol/') # cell = get_cell('Cell_yoxkus.cell', workdir='.') + # print(cell.moleclist) mol = get_mol(cell, mol_idx=0, ecp='def2-svp') assert mol.natm==52 cell = get_cell(f'{path}/data/cell2mol/Cell_YOXKUS.cell', workdir='.') - #print(cell.moleclist[0].ligands) + # print(cell.moleclist[0].ligands) mol_lig = get_ligand(cell, mol_idx=0, lig_idx=1) assert mol_lig.natm==47 diff --git a/tests/test_dori.py b/tests/test_dori.py index 7350721b..4769cb46 100755 --- a/tests/test_dori.py +++ b/tests/test_dori.py @@ -85,6 +85,7 @@ def test_dori_df(): dori2, _, _, _, _ = dori(mol, c=c, grid_type='cube', resolution=0.5, alg='num') assert np.allclose(dori0, dori2) + if __name__ == '__main__': test_derivatives() test_dori_deriv() diff --git a/tests/test_equio.py b/tests/test_equio.py index 0738634a..2845eba1 100755 --- a/tests/test_equio.py +++ b/tests/test_equio.py @@ -3,6 +3,7 @@ import os import tempfile import filecmp +from itertools import starmap import numpy as np from qstack import compound, equio import metatensor @@ -27,9 +28,10 @@ def test_equio_vector(): ctensor = equio.array_to_tensormap(mol, c) tmpfile = tempfile.mktemp() + MTS_EXT metatensor.save(tmpfile, ctensor) - assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.mts', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.mts', tmpfile)) c1 = equio.tensormap_to_array(mol, ctensor) - assert(np.linalg.norm(c-c1)==0) + assert (np.linalg.norm(c-c1)==0) + def test_equio_matrix(): path = os.path.dirname(os.path.realpath(__file__)) @@ -38,9 +40,10 @@ def test_equio_matrix(): dtensor = equio.array_to_tensormap(mol, dm) tmpfile = tempfile.mktemp() + MTS_EXT metatensor.save(tmpfile, dtensor) - assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.dm.mts', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist.ccpvdz.dm.mts', tmpfile)) dm1 = equio.tensormap_to_array(mol, dtensor) - assert(np.linalg.norm(dm-dm1)==0) + assert (np.linalg.norm(dm-dm1)==0) + def test_equio_joinsplit(): path = os.path.dirname(os.path.realpath(__file__)) @@ -54,12 +57,12 @@ def test_equio_joinsplit(): tmpfile = tempfile.mktemp() + MTS_EXT metatensor.save(tmpfile, ctensor_big) - assert(filecmp.cmp(path+'/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.mts', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.mts', tmpfile)) ctensors = equio.split(ctensor_big) - c11, c22 = [equio.tensormap_to_array(mol, t) for mol,t in zip([mol1,mol2], ctensors, strict=True)] - assert(np.linalg.norm(c11-c1)==0) - assert(np.linalg.norm(c22-c2)==0) + c11, c22 = [*starmap(equio.tensormap_to_array, zip([mol1, mol2], ctensors, strict=True))] + assert (np.linalg.norm(c11-c1)==0) + assert (np.linalg.norm(c22-c2)==0) if __name__ == '__main__': diff --git a/tests/test_excited.py b/tests/test_excited.py index d15988a9..908a33b0 100755 --- a/tests/test_excited.py +++ b/tests/test_excited.py @@ -23,22 +23,22 @@ def test_excited(): x_ao = fields.excited.get_transition_dm(mol, X[state_id], coeff) dip = fields.moments.first(mol, x_ao) dip0 = np.array([ 0.68927353, -2.10714637, -1.53423419]) - assert(np.allclose(dip, dip0, atol=1e-8)) + assert (np.allclose(dip, dip0, atol=1e-8)) hole_d, part_d = fields.excited.get_holepart(mol, X[state_id], coeff) - assert(np.allclose(hole_d, hole_d0, atol=1e-8)) - assert(np.allclose(part_d, part_d0, atol=1e-8)) + assert (np.allclose(hole_d, hole_d0, atol=1e-8)) + assert (np.allclose(part_d, part_d0, atol=1e-8)) auxmol = compound.make_auxmol(mol, 'ccpvqz jkfit') dip = fields.moments.first(auxmol, x_c) dip0 = np.array([-0.68919144, 2.10692116, 1.53399871]) - assert(np.allclose(dip, dip0, atol=1e-8)) + assert (np.allclose(dip, dip0, atol=1e-8)) dist, hole_extent, part_extent = fields.excited.exciton_properties(mol, hole_d, part_d) - assert(np.allclose([dist, hole_extent, part_extent], [2.59863354, 7.84850017, 5.67617426], atol=1e-7)) + assert (np.allclose([dist, hole_extent, part_extent], [2.59863354, 7.84850017, 5.67617426], atol=1e-7)) dist, hole_extent, part_extent = fields.excited.exciton_properties(auxmol, hole_c, part_c) - assert(np.allclose([dist, hole_extent, part_extent], [2.59940378, 7.8477511, 5.67541635], atol=1e-7)) + assert (np.allclose([dist, hole_extent, part_extent], [2.59940378, 7.8477511, 5.67541635], atol=1e-7)) def test_excited_frag(): @@ -56,8 +56,8 @@ def test_excited_frag(): else: omega_hole_frag0 = np.array([ 4.24698889, 25.1717958 , 7.80455406, 32.89098877, 29.88567248]) omega_part_frag0 = np.array([ 1.87258999, 19.98184387, 37.30712212, 36.77858748, 4.05985653]) - assert(np.linalg.norm(omega_hole_frag-omega_hole_frag0)<1e-8) - assert(np.linalg.norm(omega_part_frag-omega_part_frag0)<1e-8) + assert (np.linalg.norm(omega_hole_frag-omega_hole_frag0)<1e-8) + assert (np.linalg.norm(omega_part_frag-omega_part_frag0)<1e-8) if __name__ == '__main__': diff --git a/tests/test_fitting.py b/tests/test_fitting.py index f7bc4bc5..59a7f573 100755 --- a/tests/test_fitting.py +++ b/tests/test_fitting.py @@ -12,7 +12,7 @@ def test_fitting(): dm = np.load(path+'/data/H2O_dist.ccpvdz.dm.npy') c0 = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') _auxmol, c = decomposition.decompose(mol, dm, 'cc-pvdz jkfit') - assert(np.linalg.norm(c-c0)<1e-10) + assert (np.linalg.norm(c-c0)<1e-10) def test_block_fitting(): @@ -29,7 +29,7 @@ def test_block_fitting(): c0 = decomposition.get_coeff(dm, eri2c0, eri3c) c = decomposition.get_coeff(dm, eri2c0, eri3c, slices=atom_bounds) - assert(np.linalg.norm(c-c0)<1e-10) + assert (np.linalg.norm(c-c0)<1e-10) def test_fitting_error(): @@ -42,9 +42,9 @@ def test_fitting_error(): _, eri2c, eri3c = decomposition.get_integrals(mol, auxmol) self_repulsion = decomposition.get_self_repulsion(mol, dm) error = decomposition.optimal_decomposition_error(self_repulsion, c0, eri2c) - assert(np.allclose(error, error0)) + assert (np.allclose(error, error0)) error = decomposition.decomposition_error(self_repulsion, c0, eri2c, eri3c, dm) - assert(np.allclose(error, error0)) + assert (np.allclose(error, error0)) def test_fitting_noe(): diff --git a/tests/test_global.py b/tests/test_global.py index 73d641eb..ef443021 100755 --- a/tests/test_global.py +++ b/tests/test_global.py @@ -12,13 +12,12 @@ def test_avg_kernel(): mols = [np.load(f, allow_pickle=True) for f in mollist] K = kernel.kernel(mols, akernel='L', gkernel='avg', sigma=1.0) - true_K = np.array( [[1. , 1. , 0.79179528], \ - [1. , 1. , 0.79179528] , \ + true_K = np.array( [[1. , 1. , 0.79179528], + [1. , 1. , 0.79179528] , [0.79179528, 0.79179528, 1. ]]) - - assert(K.shape == (3,3)) - assert(np.abs(np.sum(K-true_K)) < 1e-05) + assert (K.shape == (3,3)) + assert (np.abs(np.sum(K-true_K)) < 1e-05) def test_rem_kernel(): @@ -28,12 +27,12 @@ def test_rem_kernel(): mols = [np.load(f, allow_pickle=True) for f in mollist] K = kernel.kernel(mols, akernel='L', gkernel='rem', sigma=1.0, gdict={'alpha':1.0, 'normalize':1, 'verbose':0}) - true_K = np.array( [[1. , 0.6528238, 1. ], \ - [0.6528238,1. ,0.6528238], \ + true_K = np.array( [[1. , 0.6528238, 1. ], + [0.6528238,1. ,0.6528238], [1. ,0.6528238 ,1. ]]) - assert(K.shape == (3,3)) - assert(np.abs(np.sum(K-true_K)) < 1e-05) + assert (K.shape == (3,3)) + assert (np.abs(np.sum(K-true_K)) < 1e-05) def test_rem_kernel_not_self(): @@ -43,12 +42,12 @@ def test_rem_kernel_not_self(): mols = [np.load(f, allow_pickle=True) for f in mollist] K = kernel.kernel(mols, Y=np.copy(mols), akernel='L', gkernel='rem', sigma=1.0, gdict={'alpha':1.0, 'normalize':1, 'verbose':0}) - true_K = np.array( [[1. , 0.6528238, 1. ], \ - [0.6528238,1. ,0.6528238], \ + true_K = np.array( [[1. , 0.6528238, 1. ], + [0.6528238,1. ,0.6528238], [1. ,0.6528238 ,1. ]]) - assert(K.shape == (3,3)) - assert(np.abs(np.sum(K-true_K)) < 1e-05) + assert (K.shape == (3,3)) + assert (np.abs(np.sum(K-true_K)) < 1e-05) if __name__ == '__main__': diff --git a/tests/test_kernels.py b/tests/test_kernels.py index 69d546f9..907f67ec 100755 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -5,19 +5,19 @@ def test_local_kernels(): - #np.random.seed(666) - #X = np.random.rand(2,4) - #Y = np.random.rand(2,4) - #K_G_good = np.zeros((len(X),len(Y))) - #K_L_good = np.zeros((len(X),len(Y))) - #for i, x in enumerate(X): + # np.random.seed(666) + # X = np.random.rand(2,4) + # Y = np.random.rand(2,4) + # K_G_good = np.zeros((len(X),len(Y))) + # K_L_good = np.zeros((len(X),len(Y))) + # for i, x in enumerate(X): # for j, y in enumerate(Y): # K_G_good[i,j] = np.dot(x-y, x-y) # K_L_good[i,j] = np.sum(abs(x-y)) - #np.exp(-K_G_good/2, out=K_G_good) - #np.exp(-K_L_good/2, out=K_L_good) - #K_dot_good = np.dot(X, Y.T) - #K_cos_good = K_dot_good / np.outer(np.linalg.norm(X, axis=1), np.linalg.norm(Y, axis=1)) + # np.exp(-K_G_good/2, out=K_G_good) + # np.exp(-K_L_good/2, out=K_L_good) + # K_dot_good = np.dot(X, Y.T) + # K_cos_good = K_dot_good / np.outer(np.linalg.norm(X, axis=1), np.linalg.norm(Y, axis=1)) X = np.array([[0.70043712, 0.84418664, 0.67651434, 0.72785806], [0.95145796, 0.0127032 , 0.4135877 , 0.04881279]]) Y = np.array([[0.09992856, 0.50806631, 0.20024754, 0.74415417], [0.192892 , 0.70084475, 0.29322811, 0.77447945]]) diff --git a/tests/test_molden.py b/tests/test_molden.py index 5ba58c7c..8ea95543 100755 --- a/tests/test_molden.py +++ b/tests/test_molden.py @@ -14,7 +14,7 @@ def test_molden(): c = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') tmpfile = tempfile.mktemp() + '.molden' coeffs_to_molden(auxmol, c, tmpfile) - assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.molden', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.molden', tmpfile)) if __name__ == '__main__': diff --git a/tests/test_moments.py b/tests/test_moments.py index e72f3db1..c95c557d 100755 --- a/tests/test_moments.py +++ b/tests/test_moments.py @@ -18,25 +18,25 @@ def test_moments(): R2 = 12.352661975356678 r0, r1, r2 = moments.r2_c(mol, c) - assert(np.allclose(r0, R0)) - assert(np.allclose(r1, R1)) - assert(np.allclose(r2, R2)) + assert (np.allclose(r0, R0)) + assert (np.allclose(r1, R1)) + assert (np.allclose(r2, R2)) I0, I1, I2 = moments.r2_c(mol, None) - assert(np.allclose(r0, I0@c)) - assert(np.allclose(r1, I1@c)) - assert(np.allclose(r2, I2@c)) + assert (np.allclose(r0, I0@c)) + assert (np.allclose(r1, I1@c)) + assert (np.allclose(r2, I2@c)) I0, I1, I2 = moments.r2_c(mol, None, per_atom=True) r0_atom = c @ I0 - assert(np.allclose(r0_atom, R0_atom)) + assert (np.allclose(r0_atom, R0_atom)) r1_atom = np.einsum('p,xpa->ax', c, I1) # (atom, component) - assert(np.allclose(r1_atom.sum(axis=0), R1)) + assert (np.allclose(r1_atom.sum(axis=0), R1)) r0_atom, r1_atom, r2_atom = moments.r2_c(mol, c, per_atom=True) - assert(np.allclose(r0_atom, R0_atom)) - assert(np.allclose(r1_atom.sum(axis=0), R1)) - assert(np.allclose(r2_atom.sum(), R2)) + assert (np.allclose(r0_atom, R0_atom)) + assert (np.allclose(r1_atom.sum(axis=0), R1)) + assert (np.allclose(r2_atom.sum(), R2)) if __name__ == '__main__': diff --git a/tests/test_opt.py b/tests/test_opt.py index e861e700..f5f216bb 100755 --- a/tests/test_opt.py +++ b/tests/test_opt.py @@ -11,15 +11,16 @@ def test_hf_otpd(): mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'def2svp', charge=0, spin=0) dm = fields.dm.get_converged_dm(mol, xc="pbe") - otpd, grid = fields.hf_otpd.hf_otpd(mol, dm, return_all = True) + otpd, grid = fields.hf_otpd.hf_otpd(mol, dm, return_all=True) mol_dict = {'atom': mol.atom, 'rho': otpd, 'coords': grid.coords, 'weights': grid.weights} g = basis_opt.opt.optimize_basis(['H'], [path+'/data/initial/H_N0.txt', path+'/data/initial/O_N0.txt'], [mol_dict], check=True, printlvl=0) - assert(np.all(g['diff'] < 1e-6)) + assert (np.all(g['diff'] < 1e-6)) ob_good = {'H': [[0, [42.30256758622713, 1]], [0, [6.83662718701579, 1]], [0, [1.8547192742478775, 1]], [0, [0.3797283290452742, 1]], [1, [12.961663119622536, 1]], [1, [2.507400755551906, 1]], [1, [0.6648804678758861, 1]], [2, [3.482167705165484, 1]], [2, [0.6053728887614225, 1]], [3, [0.6284190712545101, 1]]]} ob = basis_opt.opt.optimize_basis(['H'], [path+'/data/initial/H_N0.txt'], [path+'/data/H2.ccpvtz.grid3.npz'], printlvl=2, gtol_in=1e-5) for [_l,[a,_c]], [_l1,[a1,_c1]] in zip(ob_good['H'], ob['H'], strict=True): - assert(abs(a-a1)<1e-5) + assert (abs(a-a1)<1e-5) + if __name__ == '__main__': test_hf_otpd() diff --git a/tests/test_orca.py b/tests/test_orca.py index 7fbbbc36..99013cd9 100755 --- a/tests/test_orca.py +++ b/tests/test_orca.py @@ -32,9 +32,9 @@ def test_orca_density_reader(): dm421 = qstack.orcaio.read_density(mol, 'H2O.orca421', directory=path+'/data/orca/', version=421, openshell=True) dm504 = qstack.orcaio.read_density(mol, 'H2O.orca504', directory=path+'/data/orca/', version=504, openshell=True) - assert(np.linalg.norm(dm-dm400)<1e-4) - assert(np.linalg.norm(dm400-dm421)<1e-10) - assert(np.linalg.norm(dm504-dm421)<5e-3) + assert (np.linalg.norm(dm-dm400)<1e-4) + assert (np.linalg.norm(dm400-dm421)<1e-10) + assert (np.linalg.norm(dm504-dm421)<5e-3) def test_orca_gbw_reader(): @@ -45,6 +45,7 @@ def test_orca_gbw_reader(): c = mf.mo_coeff e = mf.mo_energy occ = mf.mo_occ + def compare_MO(c0, c1): for s in range(c0.shape[0]): for i in range(c0.shape[-1]): diff --git a/tests/test_regression.py b/tests/test_regression.py index bfb8b989..9702fa55 100755 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -23,7 +23,7 @@ def test_hyperparameters(): [5.18262767e-01,3.00473746e-01,1.00000000e-10,3.16227766e+01], [5.10592542e-01,3.38247735e-01,1.00000000e+00,3.16227766e+01]] - assert(np.allclose(hyper, true_hyper)) + assert (np.allclose(hyper, true_hyper)) def test_regression(): @@ -38,7 +38,7 @@ def test_regression(): (6, 0.24018169400891018, 0.08584295185009833), (8, 0.2708852104417901, 7.021666937153402e-17)] - assert(np.allclose(lc, true_lc)) + assert (np.allclose(lc, true_lc)) def test_regression_sparse(): @@ -52,7 +52,7 @@ def test_regression_sparse(): (4, 0.4803773474666784, 0.19356070353924582), (6, 0.333707374435793, 0.13803898307368923), (8, 0.4501685644789055, 8.95090418262362e-17)] - assert(np.allclose(lc, true_lc)) + assert (np.allclose(lc, true_lc)) def test_regression_idx(): @@ -106,6 +106,7 @@ def test_oos(): pred3 = oos.oos(X, X[idx_train], weights, sigma=3.162278e+01, random_state=666) assert np.allclose(pred3, y[idx_train]) + def test_cross_validate_results(): path = os.path.dirname(os.path.realpath(__file__)) X = np.load(os.path.join(path, 'data/mols/X_lb.npy')) @@ -116,8 +117,7 @@ def test_cross_validate_results(): (4, 0.7336549 , 0.59839317), (6, 0.7288867 , 0.50714861), (8, 0.72604955, 0.48307486)] - assert(np.allclose(lc, true_lc)) - + assert (np.allclose(lc, true_lc)) if __name__ == '__main__': diff --git a/tests/test_reorder.py b/tests/test_reorder.py index 501f2dd2..1076814c 100755 --- a/tests/test_reorder.py +++ b/tests/test_reorder.py @@ -13,13 +13,13 @@ def test_reorder_pyscf_gpr(): dm = np.load(path+'/data/H2O_dist.ccpvdz.dm.npy') dm1 = reorder.reorder_ao(mol, dm, src='pyscf', dest='gpr') dm2 = reorder.reorder_ao(mol, dm1, src='gpr', dest='pyscf') - assert(np.linalg.norm(dm-dm2)==0) + assert (np.linalg.norm(dm-dm2)==0) auxmol = compound.make_auxmol(mol, 'cc-pvdz jkfit') c = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') c1 = reorder.reorder_ao(auxmol, c, src='pyscf', dest='gpr') c2 = reorder.reorder_ao(auxmol, c1, src='gpr', dest='pyscf') - assert(np.linalg.norm(c-c2)==0) + assert (np.linalg.norm(c-c2)==0) def test_reorder_pyscf_gpr_orca(): @@ -30,19 +30,19 @@ def test_reorder_pyscf_gpr_orca(): dm_pyscf = from_tril(np.load(path+'/data/reorder/2_3FOD.pyscf.dm.npy')) dm_gpr1 = reorder.reorder_ao(mol, dm_orca, 'orca', 'gpr') - assert(np.linalg.norm(dm_gpr1-dm_gpr)==0) + assert (np.linalg.norm(dm_gpr1-dm_gpr)==0) dm_gpr1 = reorder.reorder_ao(mol, dm_pyscf, 'pyscf', 'gpr') - assert(np.linalg.norm(dm_gpr1-dm_gpr)==0) + assert (np.linalg.norm(dm_gpr1-dm_gpr)==0) dm_pyscf1 = reorder.reorder_ao(mol, dm_orca, 'orca', 'pyscf') - assert(np.linalg.norm(dm_pyscf1-dm_pyscf)==0) + assert (np.linalg.norm(dm_pyscf1-dm_pyscf)==0) dm_pyscf1 = reorder.reorder_ao(mol, dm_gpr, 'gpr', 'pyscf') - assert(np.linalg.norm(dm_pyscf1-dm_pyscf)==0) + assert (np.linalg.norm(dm_pyscf1-dm_pyscf)==0) dm_orca1 = reorder.reorder_ao(mol, dm_pyscf, 'pyscf', 'orca') - assert(np.linalg.norm(dm_orca1-dm_orca)==0) + assert (np.linalg.norm(dm_orca1-dm_orca)==0) dm_orca1 = reorder.reorder_ao(mol, dm_gpr, 'gpr', 'orca') - assert(np.linalg.norm(dm_orca1-dm_orca)==0) + assert (np.linalg.norm(dm_orca1-dm_orca)==0) if __name__ == '__main__': diff --git a/tests/test_rxn-repr.py b/tests/test_rxn-repr.py index d5381736..75e04c07 100755 --- a/tests/test_rxn-repr.py +++ b/tests/test_rxn-repr.py @@ -27,6 +27,7 @@ def read_mols(files): mol.set_positions(mol.positions*ase.units.Bohr) sub_mols.append(mol) return sub_mols + def get_data(): indices = np.loadtxt(idx_path, dtype=int) reactions = [] @@ -40,8 +41,12 @@ def get_data(): def test_b2r2_l(): _test_b2r2('l') + + def test_b2r2_a(): _test_b2r2('a') + + def test_b2r2_n(): _test_b2r2('n') @@ -51,7 +56,7 @@ def _test_b2r2(variant): reactions = Rxn_data(data_dir=data_dir).get_gdb7_data() b2r2_1 = b2r2.get_b2r2(reactions, variant=variant) b2r2_0 = np.load(f'{data_dir}/b2r2_{variant}.npy') - assert(np.linalg.norm(b2r2_1-b2r2_0) < 1e-10) + assert (np.linalg.norm(b2r2_1-b2r2_0) < 1e-10) def test_slatm_rxn(): @@ -59,7 +64,7 @@ def test_slatm_rxn(): reactions = Rxn_data(data_dir=data_dir).get_gdb7_data() slatm_1 = slatm.get_slatm_rxn(reactions, qml_mbtypes=True, progress=False) slatm_0 = np.load(f'{data_dir}/slatm_d.npy') - assert(np.linalg.norm(slatm_1-slatm_0) < 1e-10) + assert (np.linalg.norm(slatm_1-slatm_0) < 1e-10) if __name__ == '__main__': diff --git a/tests/test_slatm.py b/tests/test_slatm.py index c120d1cb..afa35107 100755 --- a/tests/test_slatm.py +++ b/tests/test_slatm.py @@ -11,7 +11,7 @@ def test_slatm_global(): v0 = np.load(f'{path}/data/slatm/slatm_global.npy') xyzs = sorted(glob.glob(f"{path}/data/slatm/*.xyz")) v = slatm.get_slatm_for_dataset(xyzs, progress=False, global_repr=True) - assert(np.linalg.norm(v-v0)<1e-10) + assert (np.linalg.norm(v-v0)<1e-10) def test_slatm_local(): @@ -19,7 +19,7 @@ def test_slatm_local(): v0 = np.load(f'{path}/data/slatm/slatm_local.npy') xyzs = sorted(glob.glob(f"{path}/data/slatm/*.xyz")) v = slatm.get_slatm_for_dataset(xyzs, progress=False) - assert(np.linalg.norm(v-v0)<1e-10) + assert (np.linalg.norm(v-v0)<1e-10) if __name__ == '__main__': diff --git a/tests/test_spahm.py b/tests/test_spahm.py index eb96619a..2990b9b0 100755 --- a/tests/test_spahm.py +++ b/tests/test_spahm.py @@ -13,8 +13,8 @@ def test_spahm_GWH(): R = compute_spahm.get_spahm_representation(mol, 'gwh') true_R = np.array([[-33.02835203, -8.92909895, -8.00935971, -7.51145492, -7.32962602], [-33.02835203, -8.92909895, -8.00935971, -7.51145492, 0. ]]) - assert(R.shape == (2,5)) - assert(np.allclose(R, true_R)) + assert (R.shape == (2,5)) + assert (np.allclose(R, true_R)) def test_spahm_huckel(): @@ -23,8 +23,8 @@ def test_spahm_huckel(): R = compute_spahm.get_spahm_representation(mol, 'huckel') true_R = np.array([[-20.78722617, -1.29750913, -0.51773954, -0.4322361 , -0.40740531], [-20.78722617, -1.29750913, -0.51773954, -0.4322361 , -0.40740531]]) - assert(R.shape == (2,5)) - assert(np.allclose(R, true_R)) + assert (R.shape == (2,5)) + assert (np.allclose(R, true_R)) def test_spahm_LB(): @@ -33,8 +33,8 @@ def test_spahm_LB(): R = compute_spahm.get_spahm_representation(mol, 'lb') true_R = np.array( [[-18.80209878, -1.28107468, -0.79949967, -0.63587071, -0.57481672], [-18.80209878, -1.28107468, -0.79949967, -0.63587071, 0. ]]) - assert(R.shape == (2,5)) - assert(np.allclose(R, true_R)) + assert (R.shape == (2,5)) + assert (np.allclose(R, true_R)) def test_spahm_LB_ecp(): @@ -74,7 +74,7 @@ def test_generate_reps(): xmols = [compute_spahm.get_spahm_representation(mol, 'lb')[0] for mol in mols] X = vstack_padding(xmols) Xtrue = np.load(os.path.join(path, 'X_lb.npy')) - assert(np.allclose(X, Xtrue)) + assert (np.allclose(X, Xtrue)) if __name__ == '__main__': diff --git a/tests/test_spahm_a.py b/tests/test_spahm_a.py index aeae5cb0..87142d53 100755 --- a/tests/test_spahm_a.py +++ b/tests/test_spahm_a.py @@ -7,12 +7,14 @@ PATH = os.path.dirname(os.path.realpath(__file__)) + def underlying_test(true_data_relpath, X): X_true = np.load(PATH+true_data_relpath, allow_pickle=True) - assert(X.shape == X_true.shape) + assert (X.shape == X_true.shape) for a, a_true in zip(X, X_true, strict=True): - assert(a[0] == a_true[0]) # atom type - assert(np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + assert (a[0] == a_true[0]) # atom type + assert (np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + def test_water(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) @@ -21,21 +23,24 @@ def test_water(): model='lowdin-long-x', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O.npy', X) + def test_water_alternate(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) - #X = atom.get_repr(mol, ["H", "O"], None, dm=None, + # X = atom.get_repr(mol, ["H", "O"], None, dm=None, # guess='LB', model='lowdin-long-x', auxbasis='ccpvdzjkfit') X = atom.get_repr("atom", [mol], [PATH], 'LB', spin=[None], auxbasis='ccpvdzjkfit', with_symbols=True) underlying_test('/data/SPAHM_a_H2O/X_H2O.npy', X) + def test_water_lowdinshortx(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, model='lowdin-short-x', auxbasis='ccpvdzjkfit') - X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) ## trimming is necessary to get the short-version vector ! + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) # trimming is necessary to get the short-version vector ! underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-short-x.npy', X) + def test_water_lowdinlong(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', @@ -43,48 +48,53 @@ def test_water_lowdinlong(): model='lowdin-long', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-long.npy', X) + def test_water_lowdinshort(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, model='lowdin-short', auxbasis='ccpvdzjkfit') - X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) ## trimming is necessary to get the short-version vector ! + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) # trimming is necessary to get the short-version vector ! underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-short.npy', X) + def test_water_mr21(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, model='MR2021', auxbasis='ccpvdzjkfit') - X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) ## trimming is necessary to get the short-version vector ! + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) # trimming is necessary to get the short-version vector ! underlying_test('/data/SPAHM_a_H2O/X_H2O_MR2021.npy', X) + def test_water_SAD_guess_open_shell(): - mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=1, spin=1) ## test breaks when effective open-shell caluclation is needed + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=1, spin=1) # test breaks when effective open-shell caluclation is needed Xsad = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'sad', elements=["H", "O"], spin=[1], with_symbols=True, - xc = 'hf', model='sad-diff', auxbasis='ccpvdzjkfit') + xc='hf', model='sad-diff', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy', Xsad) + def test_water_SAD_guess_close_shell(): - mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=0, spin=0) ## test breaks when effective open-shell caluclation is needed + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=0, spin=0) # test breaks when effective open-shell caluclation is needed Xsad = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'sad', elements=["H", "O"], spin=None, with_symbols=True, - xc = 'hf', model='sad-diff', auxbasis='ccpvdzjkfit') + xc='hf', model='sad-diff', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O_SAD.npy', Xsad) + def test_water_single_element(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only X_true = np.load(PATH+'/data/SPAHM_a_H2O/X_H2O.npy', allow_pickle=True) # the next two lines deviate from the common template a = X[0] - assert(X.shape == np.array(X_true[0], ndmin=2).shape) + assert (X.shape == np.array(X_true[0], ndmin=2).shape) for a_true in X_true: if a[0] == a_true[0]: # atom type - assert(np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + assert (np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations def test_water_single_element_short(): @@ -92,27 +102,27 @@ def test_water_single_element_short(): X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, model='lowdin-short', auxbasis='ccpvdzjkfit', only_z=['O']) - X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) ## trimming is necessary to get the short-version vector ! + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) # trimming is necessary to get the short-version vector ! X_true = np.load(PATH+'/data/SPAHM_a_H2O/X_H2O_lowdin-short.npy', allow_pickle=True) a = X[0] - assert(X.shape == np.array(X_true[0], ndmin=2).shape) + assert (X.shape == np.array(X_true[0], ndmin=2).shape) for a_true in X_true: if a[0] == a_true[0]: # atom type - assert(np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + assert (np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations def test_water_single_element_SAD(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=0, spin=0) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'sad', elements=["H", "O"], spin=None, with_symbols=True, - xc = 'hf', model='sad-diff', auxbasis='ccpvdzjkfit', only_z=['O']) - X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) ## trimming is necessary to get the short-version vector ! + xc='hf', model='sad-diff', auxbasis='ccpvdzjkfit', only_z=['O']) + X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) # trimming is necessary to get the short-version vector ! X_true = np.load(PATH+'/data/SPAHM_a_H2O/X_H2O_SAD.npy', allow_pickle=True) a = X[0] - assert(X.shape == np.array(X_true[0], ndmin=2).shape) + assert (X.shape == np.array(X_true[0], ndmin=2).shape) for a_true in X_true: if a[0] == a_true[0]: # atom type - assert(np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + assert (np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations if __name__ == '__main__': diff --git a/tests/test_spahm_b.py b/tests/test_spahm_b.py index 552f00fe..9624ba9f 100755 --- a/tests/test_spahm_b.py +++ b/tests/test_spahm_b.py @@ -7,12 +7,14 @@ PATH = os.path.dirname(os.path.realpath(__file__)) + def underlying_test(X, truepath): true_file = PATH + truepath X_true = np.load(true_file) - assert(X_true.shape == X.shape) + assert (X_true.shape == X.shape) for Xa, Xa_true in zip(X, X_true, strict=True): - assert(np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + assert (np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + def test_water(): xyz_in = PATH+'/data/H2O.xyz' @@ -21,45 +23,50 @@ def test_water(): underlying_test(X, '/data/H2O_spahm_b.npy_alpha_beta.npy') + def test_water_closed(): xyz_in = PATH+'/data/H2O.xyz' mols = utils.load_mols([xyz_in], [None], [0], 'minao') X = bond.get_repr("bond", mols, [xyz_in], 'LB', spin=[None], with_symbols=False, same_basis=False) underlying_test(X, '/data/H2O_spahm_b.npy') + def test_water_O_only(): xyz_in = PATH+'/data/H2O.xyz' mols = utils.load_mols([xyz_in], [0], [0], 'minao') dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0]) X = bond.spahm_a_b("bond", mols, dms, only_z=['O']) - X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat) - X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main + X = np.squeeze(X) # contains a single elements but has shape (1,Nfeat) + X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main X_true = np.load(PATH+'/data/H2O_spahm_b.npy_alpha_beta.npy') X_true = X_true[0] # this line makes it incompatible with a call to underlying_test() - assert(X_true.shape == X.shape) + assert (X_true.shape == X.shape) for Xa, Xa_true in zip(X, X_true, strict=True): - assert(np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + assert (np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + def test_water_same_basis(): xyz_in = PATH+'/data/H2O.xyz' mols = utils.load_mols([xyz_in], [0], [0], 'minao') dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0]) X = bond.spahm_a_b("bond", mols, dms, same_basis=True) - X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat) - X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main + X = np.squeeze(X) # contains a single elements but has shape (1,Nfeat) + X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main underlying_test(X, '/data/H2O_spahm_b_CCbas.npy_alpha_beta.npy') + def test_ecp(): xyz_in = PATH+'/data/I2.xyz' mols = utils.load_mols([xyz_in], [0], [0], 'minao', ecp='def2-svp') dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0]) X = bond.spahm_a_b("bond", mols, dms, same_basis=True) - X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat) - X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main + X = np.squeeze(X) # contains a single elements but has shape (1,Nfeat) + X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main underlying_test(X, '/data/I2_spahm-b_minao-def2-svp_alpha-beta.npy') + def test_repr_shapes(): xyz_in = [PATH+'/data/H2O.xyz', PATH+'/data/HO_spinline.xyz'] mols = utils.load_mols(xyz_in, [0,-1], [0,0], 'ccpvdz') @@ -102,7 +109,7 @@ def test_from_list(): mols = utils.load_mols(xyzlist, charges, spins, 'minao', srcdir=PATH+"/data/") spahm_b = bond.get_repr("bond", mols, xyzlist, 'LB', spin=spins, same_basis=True) Xtrue = np.load(PATH+'/data/list_H2O_spahm-b_minao_LB_alpha-beta.npy') - assert(np.allclose(Xtrue, spahm_b)) + assert (np.allclose(Xtrue, spahm_b)) if __name__ == '__main__': @@ -113,4 +120,3 @@ def test_from_list(): test_ecp() test_repr_shapes() test_from_list() - diff --git a/tests/test_spahm_b_selected.py b/tests/test_spahm_b_selected.py index 329d8245..c52b8e3b 100755 --- a/tests/test_spahm_b_selected.py +++ b/tests/test_spahm_b_selected.py @@ -5,6 +5,7 @@ from qstack import compound from qstack.spahm.rho.bond_selected import get_spahm_b_selected + def test_spahm_b_selected(): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data/') fname = os.path.join(path, 'H2O.xyz') @@ -12,7 +13,7 @@ def test_spahm_b_selected(): mols = [compound.xyz_to_mol(fname, basis='minao', charge=0, spin=0)] X = get_spahm_b_selected(mols, bondij, [fname])[0][1] Xtrue = np.load(os.path.join(path, 'H2O.xyz_1_2.npy')) - assert(np.allclose(X, Xtrue)) + assert (np.allclose(X, Xtrue)) if __name__ == '__main__': diff --git a/tests/test_spahm_grad.py b/tests/test_spahm_grad.py index 4114d500..77722c5f 100755 --- a/tests/test_spahm_grad.py +++ b/tests/test_spahm_grad.py @@ -46,7 +46,7 @@ def spahm_ev(r, mol, guess): agrad = spahm.compute_spahm.get_guess_orbitals_grad(mol, guess)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_ev, mol, guess).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_re_grad(): @@ -60,7 +60,7 @@ def spahm_re(r, mol, guess_in): agrad = spahm.compute_spahm.get_spahm_representation_grad(mol, guess)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_re, mol, guess).reshape(mol.natm*3, -1).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_ev_grad_ecp(): @@ -74,7 +74,7 @@ def spahm_ev(r, mol, guess): agrad = spahm.compute_spahm.get_guess_orbitals_grad(mol, guess)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_ev, mol, guess).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_ev_grad_field(): @@ -89,7 +89,7 @@ def spahm_ev(r, mol, guess): agrad = spahm.compute_spahm.get_guess_orbitals_grad(mol, guess, field=field)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_ev, mol, guess).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_re_grad_field(): @@ -105,7 +105,7 @@ def spahm_re(r, mol, guess_in): agrad = spahm.compute_spahm.get_spahm_representation_grad(mol, guess, field=field)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_re, mol, guess).reshape(mol.natm*3, -1).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_re_field_grad(): @@ -119,7 +119,7 @@ def spahm_re(field, mol, guess_in): agrad = spahm.compute_spahm.get_spahm_representation_grad(mol, guess, field=field)[2].reshape(-1, 3) ngrad = derivatives_num(field, spahm_re, mol, guess).reshape(3, -1).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) if __name__ == '__main__': diff --git a/tests/test_splitting.py b/tests/test_splitting.py index f14fb187..909a3cc4 100755 --- a/tests/test_splitting.py +++ b/tests/test_splitting.py @@ -11,19 +11,22 @@ spin_list = os.path.join(path, "data", 'list_water_spins.txt') charge_list = os.path.join(path, "data", 'list_water_charges.txt') + def test_no_split(): nameout = tempfile.mktemp() sufix = "_alpha_beta.npy" rho.main(['--rep', 'atom', '--mol', mol_list, '--spin', spin_list, '--charge', charge_list, '--name', nameout]) reps = np.load(nameout+sufix) - assert(reps.shape == (9,414)) + assert (reps.shape == (9,414)) + def test_split_once(): nameout = tempfile.mktemp() sufix = "_alpha_beta.npy" rho.main(['--rep', 'atom', '--mol', mol_list, '--spin', spin_list, '--charge', charge_list, '--name', nameout, '--split']) - reps = np.load(nameout+sufix, allow_pickle=True) ## why is the `dtype` object ???? - assert(reps.shape == (3, 3, 414)) + reps = np.load(nameout+sufix, allow_pickle=True) # why is the `dtype` object ???? + assert (reps.shape == (3, 3, 414)) + def test_split_twice(): nameout = tempfile.mktemp() @@ -31,8 +34,8 @@ def test_split_twice(): rep_files = [nameout+"_"+os.path.basename(f).split(".")[0]+sufix for f in np.loadtxt(mol_list, dtype=str)] rho.main(['--rep', 'atom', '--mol', mol_list, '--spin', spin_list, '--charge', charge_list, '--name', nameout, '--split', "--split"]) for f in rep_files: - reps = np.load(f, allow_pickle=True) ## why is the `dtype` object ???? - assert(reps.shape == (3, 414)) + reps = np.load(f, allow_pickle=True) # why is the `dtype` object ???? + assert (reps.shape == (3, 414)) if __name__ == '__main__': diff --git a/tests/test_utils.py b/tests/test_utils.py index a789a75f..81085a67 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,29 +12,32 @@ def test_load_rep_from_list(): path = os.path.dirname(os.path.realpath(__file__)) paths2list = os.path.join(path, 'data/SPAHM_a_H2O/') - Xarray, symbols = ut.load_reps(paths2list+'reps_list.txt', from_list=True, \ - with_labels=True, local=True, sum_local=False, printlevel=0, progress=True, \ + Xarray, symbols = ut.load_reps(paths2list+'reps_list.txt', from_list=True, + with_labels=True, local=True, sum_local=False, printlevel=0, progress=True, srcdir=paths2list) - assert(Xarray.shape == (9,207)) - assert(len(symbols) == 9) + assert (Xarray.shape == (9,207)) + assert (len(symbols) == 9) + def test_load_reps(): path = os.path.dirname(os.path.realpath(__file__)) paths2X = os.path.join(path, 'data/SPAHM_a_H2O/X_H2O.npy') - X, symbols = ut.load_reps(paths2X, from_list=False, \ + X, symbols = ut.load_reps(paths2X, from_list=False, with_labels=True, local=True, sum_local=False, printlevel=0, progress=True) - assert(X.shape == (3,207)) - assert(len(symbols) == 3) + assert (X.shape == (3,207)) + assert (len(symbols) == 3) + -def test_load_reps_nosymbols(): #throws warning and returns empty list of symbols +def test_load_reps_nosymbols(): # throws warning and returns empty list of symbols path = os.path.dirname(os.path.realpath(__file__)) paths2X = os.path.join(path, 'data/H2O_spahm_b.npy_alpha_beta.npy') - X, symbols = ut.load_reps(paths2X, from_list=False, \ + X, symbols = ut.load_reps(paths2X, from_list=False, with_labels=True, local=True, sum_local=False, printlevel=0, progress=True) - assert(X.shape == (3,1108)) - assert(len(symbols) == 0) + assert (X.shape == (3,1108)) + assert (len(symbols) == 0) + def test_load_reps_singleatom(): path = os.path.dirname(os.path.realpath(__file__)) @@ -44,13 +47,14 @@ def test_load_reps_singleatom(): mol = compound.xyz_to_mol(xyzpath, basis="minao", charge=0, spin=0, ignore=False, unit='ANG', ecp=None) rep = atom.get_repr("atom", [mol], [xyzpath], 'LB', elements=["H", "O"], spin=[0], with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only np.save(tmpfile, rep) - X, symbols = ut.load_reps(tmpfile, from_list=False, \ + X, symbols = ut.load_reps(tmpfile, from_list=False, with_labels=True, local=True, sum_local=False, printlevel=0, progress=True) - assert(X.shape == (1,414)) - assert(len(symbols) == 1) - assert(symbols[0] == 'O') + assert (X.shape == (1,414)) + assert (len(symbols) == 1) + assert (symbols[0] == 'O') + def test_load_reps_singleatom_sum_local(): path = os.path.dirname(os.path.realpath(__file__)) @@ -60,11 +64,12 @@ def test_load_reps_singleatom_sum_local(): mol = compound.xyz_to_mol(xyzpath, basis="minao", charge=0, spin=0, ignore=False, unit='ANG', ecp=None) rep = atom.get_repr("atom", [mol], [xyzpath], 'LB', elements=["H", "O"], spin=[0], with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only np.save(tmpfile, rep) - X = ut.load_reps(tmpfile, from_list=False, \ + X = ut.load_reps(tmpfile, from_list=False, with_labels=False, local=True, sum_local=True, printlevel=0, progress=True) - assert(X.shape == (1,414)) + assert (X.shape == (1,414)) + def test_load_reps_singleatom_sum_local2(): path = os.path.dirname(os.path.realpath(__file__)) @@ -74,30 +79,33 @@ def test_load_reps_singleatom_sum_local2(): mol = compound.xyz_to_mol(xyzpath, basis="minao", charge=0, spin=0, ignore=False, unit='ANG', ecp=None) rep = atom.get_repr("atom", [mol], [xyzpath], 'LB', elements=["H", "O"], spin=[0], with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only np.save(tmpfile, rep) - X = ut.load_reps(tmpfile, from_list=False, \ + X = ut.load_reps(tmpfile, from_list=False, with_labels=False, local=True, sum_local=True, printlevel=0, progress=True) - assert(X.shape == (1,414)) + assert (X.shape == (1,414)) + def test_load_mols(): path = os.path.dirname(os.path.realpath(__file__)) molslist = [os.path.join(path, 'data', m) for m in ['H2O.xyz','H2O_dist.xyz','rotated_H2O.xyz']] mols = ut.load_mols(molslist, [0]*len(molslist), [None]*len(molslist), 'minao', progress=True) - assert(len(mols) == 3) + assert (len(mols) == 3) + def test_check_data_structure(): path = os.path.dirname(os.path.realpath(__file__)) test_files = [ - {'path2file': os.path.join(path, 'data', 'H2O_spahm-e_def2svp.npy'), 'is_local':False, 'is_single':True, 'is_labeled':False}, \ - {'path2file': os.path.join(path, 'data', 'H2O_spahm_b.npy_alpha_beta.npy'), 'is_local':True, 'is_single':True, 'is_labeled':False}, \ - {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/X_H2O.npy'), 'is_local':True, 'is_single':True, 'is_labeled':True}, \ - {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/Xs_H2O_array.npy'), 'is_local':True, 'is_single':False, 'is_labeled':True} \ + {'path2file': os.path.join(path, 'data', 'H2O_spahm-e_def2svp.npy'), 'is_local':False, 'is_single':True, 'is_labeled':False}, + {'path2file': os.path.join(path, 'data', 'H2O_spahm_b.npy_alpha_beta.npy'), 'is_local':True, 'is_single':True, 'is_labeled':False}, + {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/X_H2O.npy'), 'is_local':True, 'is_single':True, 'is_labeled':True}, + {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/Xs_H2O_array.npy'), 'is_local':True, 'is_single':False, 'is_labeled':True}, ] for ft in test_files: - is_single, is_labeled = ut.check_data_struct(ft['path2file'], local = ft['is_local']) - assert(ft['is_single'] == is_single) - assert(ft['is_labeled'] == is_labeled) + is_single, is_labeled = ut.check_data_struct(ft['path2file'], local=ft['is_local']) + assert (ft['is_single'] == is_single) + assert (ft['is_labeled'] == is_labeled) + def test_regroup_symbols(): path = os.path.dirname(os.path.realpath(__file__)) @@ -106,19 +114,20 @@ def test_regroup_symbols(): rep_count = {"H":2, "O":1} print(regrouped_species) for z,v in regrouped_species.items(): - assert(len(v) == rep_count[z]) + assert (len(v) == rep_count[z]) + def test_regroup_symbols_and_trim(): path = os.path.dirname(os.path.realpath(__file__)) filelist = os.path.join(path, "./data/list_water_lowdin-short-padded.txt") regrouped_species = ut.regroup_symbols(filelist, trim_reps=True) - #trimedlist = os.path.join(path, "./data/list_water_lowdin-short.txt") ## this is not possible because of inhomogenous array + # trimedlist = os.path.join(path, "./data/list_water_lowdin-short.txt") ## this is not possible because of inhomogenous array X_truth = np.load(path+"/data/SPAHM_a_H2O/X_H2O_lowdin-short.npy", allow_pickle=True) regrouped_truth = {z:[] for z in regrouped_species} for z,v in X_truth: regrouped_truth[z].append(v) for z in regrouped_species: - assert(np.allclose(regrouped_species[z], regrouped_truth[z])) + assert (np.allclose(regrouped_species[z], regrouped_truth[z])) if __name__ == '__main__': From e068086f9de5232e1bd506ac0b20d50052b7371b Mon Sep 17 00:00:00 2001 From: Ksenia Briling <30023616+briling@users.noreply.github.com> Date: Tue, 11 Nov 2025 13:47:01 +0100 Subject: [PATCH 23/23] Fix default branch name in lint workflow --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 89cb33f6..e063dd51 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -3,7 +3,7 @@ name: Lint with Ruff on: push: branches: - - main + - master pull_request: jobs: lint: