diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index b72f4c12..6d2dcf78 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -43,7 +43,6 @@ def energy(x): E += qbbt.energy_mol(newbasis, m) return E - def gradient(x): """Compute total loss function (fitting error) and gradient for given exponents. @@ -76,7 +75,6 @@ def gradient(x): dE_dx = dE_da * exponents return E, dE_dx - def gradient_only(x): """Compute only the gradient of the loss function (wrapper for optimization algorithms). @@ -88,7 +86,6 @@ def gradient_only(x): """ return gradient(x)[1] - def read_bases(basis_files): """Read basis set definitions from files or dicts. @@ -117,7 +114,6 @@ def read_bases(basis_files): basis.update(i) return basis - def make_bf_start(): """Create basis function index bounds for each element. @@ -131,7 +127,6 @@ def make_bf_start(): bf_bounds[q] = [start, start+nbf[i]] return bf_bounds - def make_moldata(fname): """Create molecular data dictionary from file or dict. diff --git a/qstack/compound.py b/qstack/compound.py index 8fc8f4b7..f5150f41 100644 --- a/qstack/compound.py +++ b/qstack/compound.py @@ -9,6 +9,7 @@ from qstack.reorder import get_mrange from qstack.mathutils.array import stack_padding from qstack.mathutils.rotation_matrix import rotate_euler +from qstack.tools import Cursor # detects a charge-spin line, containing only two ints (one positive or negative, the other positive and nonzero) @@ -319,7 +320,7 @@ def singleatom_basis_enumerator(basis): ao_starts = [] l_per_bas = [] n_per_bas = [] - cursor = 0 + cursor = Cursor(action='ranger') cursor_per_l = [] for bas in basis: # shape of `bas`, l, then another optional constant, then lists [exp, coeff, coeff, coeff] @@ -327,41 +328,41 @@ def singleatom_basis_enumerator(basis): # and the number of primitive gaussians (one per list) l = bas[0] while len(cursor_per_l) <= l: - cursor_per_l.append(0) - + cursor_per_l.append(Cursor(action='ranger')) n_count = len(bas[-1])-1 - n_start = cursor_per_l[l] - cursor_per_l[l] += n_count - l_per_bas += [l] * n_count - n_per_bas.extend(range(n_start, n_start+n_count)) + n_per_bas.extend(cursor_per_l[l].add(n_count)) msize = 2*l+1 - ao_starts.extend(range(cursor, cursor+msize*n_count, msize)) - cursor += msize*n_count + ao_starts.extend(cursor.add(msize*n_count)[::msize]) return l_per_bas, n_per_bas, ao_starts -def basis_flatten(mol, return_both=True): +def basis_flatten(mol, return_both=True, return_shells=False): """Flatten a basis set definition for AOs. Args: mol (pyscf.gto.Mole): pyscf Mole object. return_both (bool): Whether to return both AO info and primitive Gaussian info. Defaults to True. + return_shells (bool): Whether to return angular momenta per shell. Defaults to False. Returns: - numpy.ndarray: 3×mol.nao int array where each column corresponds to an AO and rows are: - - 0: atom index - - 1: angular momentum quantum number l - - 2: magnetic quantum number m + - 0: atom index + - 1: angular momentum quantum number l + - 2: magnetic quantum number m If return_both is True, also returns: - numpy.ndarray: 2×mol.nao×max_n float array where index (i,j,k) means: - - i: 0 for exponent, 1 for contraction coefficient of a primitive Gaussian - - j: AO index - - k: radial function index (padded with zeros if necessary) + - i: 0 for exponent, 1 for contraction coefficient of a primitive Gaussian + - j: AO index + - k: radial function index (padded with zeros if necessary) + If return_shell is True, also returns: + - numpy.ndarray: angular momentum quantum number for each shell + """ x = [] + L = [] y = np.zeros((3, mol.nao), dtype=int) - i = 0 + i = Cursor(action='slicer') a = mol.bas_exps() for iat in range(mol.natm): for bas_id in mol.atom_shell_ids(iat): @@ -373,11 +374,13 @@ def basis_flatten(mol, return_both=True): for c in cs.T: ac = np.array([a[bas_id], c]) x.extend([ac]*msize) - y[:2,i:i+msize*n] = np.array([[iat, l]]*msize*n).T - y[2,i:i+msize*n] = [*get_mrange(l)]*n - i += msize*n + y[:,i(msize*n)] = np.vstack((np.array([[iat, l]]*msize*n).T, [*get_mrange(l)]*n)) + if return_shells: + L.extend([l]*n) + + ret = [y] if return_both: - x = stack_padding(x).transpose((1,0,2)) - return y, x - else: - return y + ret.append(stack_padding(x).transpose((1,0,2))) + if return_shells: + ret.append(np.array(L)) + return ret[0] if len(ret)==1 else ret diff --git a/qstack/constants.py b/qstack/constants.py index 5f3affc2..c9085d51 100644 --- a/qstack/constants.py +++ b/qstack/constants.py @@ -3,10 +3,13 @@ https://physics.nist.gov/cuu/Constants/ https://physics.nist.gov/cuu/Constants/Table/allascii.txt """ +import math + + # Constants SPEED_LIGHT = 299792458.0 PLANCK = 6.62607004e-34 -HBAR = PLANCK/(2*3.141592653589793) +HBAR = PLANCK/(2*math.pi) FUND_CHARGE = 1.6021766208e-19 MOL_NA = 6.022140857e23 MASS_E = 9.10938356e-31 @@ -20,4 +23,4 @@ BOHR2ANGS = 0.52917721092 # Angstroms HARTREE2J = HBAR**2/(MASS_E*(BOHR2ANGS*1e-10)**2) HARTREE2EV = 27.21138602 -AU2DEBYE = FUND_CHARGE * BOHR2ANGS*1e-10 / DEBYE # 2.541746 +AU2DEBYE = FUND_CHARGE * BOHR2ANGS*1e-10 / DEBYE # 2.541746 diff --git a/qstack/equio.py b/qstack/equio.py index ebec9a2e..35d2adf2 100644 --- a/qstack/equio.py +++ b/qstack/equio.py @@ -6,7 +6,8 @@ import numpy as np from pyscf import data import metatensor -from qstack.reorder import get_mrange +from qstack.tools import Cursor +from qstack.reorder import get_mrange, pyscf2gpr_l1_order from qstack.compound import singleatom_basis_enumerator @@ -26,8 +27,6 @@ _molid_name = 'mol_id' -_pyscf2gpr_l1_order = [1,2,0] - def _get_llist(mol): """Get list of angular momentum quantum numbers for basis functions of each element of a molecule. @@ -50,7 +49,7 @@ def _get_tsize(tensor): Returns: int: Total size of the tensor (total number of elements). """ - return sum([np.prod(tensor.block(key).values.shape) for key in tensor.keys]) + return sum(np.prod(tensor.block(key).values.shape) for key in tensor.keys) def _labels_to_array(labels): @@ -115,26 +114,24 @@ def vector_to_tensormap(mol, c): # Fill in the blocks iq = dict.fromkeys(llists.keys(), 0) - i = 0 + i = Cursor(action='slicer') for q in atom_charges: if llists[q]==sorted(llists[q]): for l in set(llists[q]): msize = 2*l+1 - nsize = blocks[(l,q)].shape[-1] - cslice = c[i:i+nsize*msize].reshape(nsize,msize).T + nsize = blocks[l,q].shape[-1] + cslice = c[i(nsize*msize)].reshape(nsize,msize).T if l==1: # for l=1, the pyscf order is x,y,z (1,-1,0) - cslice = cslice[_pyscf2gpr_l1_order] - blocks[(l,q)][iq[q],:,:] = cslice - i += msize*nsize + cslice = cslice[pyscf2gpr_l1_order] + blocks[l,q][iq[q],:,:] = cslice else: il = dict.fromkeys(range(max(llists[q]) + 1), 0) for l in llists[q]: msize = 2*l+1 - cslice = c[i:i+msize] + cslice = c[i(msize)] if l==1: # for l=1, the pyscf order is x,y,z (1,-1,0) - cslice = cslice[_pyscf2gpr_l1_order] - blocks[(l,q)][iq[q],:,il[l]] = cslice - i += msize + cslice = cslice[pyscf2gpr_l1_order] + blocks[l,q][iq[q],:,il[l]] = cslice il[l] += 1 iq[q] += 1 @@ -242,48 +239,44 @@ def matrix_to_tensormap(mol, dm): if all(llists[q]==sorted(llists[q]) for q in llists): iq1 = dict.fromkeys(elements, 0) - i1 = 0 + i1 = Cursor(action='slicer') for iat1, q1 in enumerate(atom_charges): for l1 in set(llists[q1]): msize1 = 2*l1+1 nsize1 = llists[q1].count(l1) iq2 = dict.fromkeys(elements, 0) - i2 = 0 + i1.add(nsize1*msize1) + i2 = Cursor(action='slicer') for iat2, q2 in enumerate(atom_charges): for l2 in set(llists[q2]): msize2 = 2*l2+1 nsize2 = llists[q2].count(l2) - dmslice = dm[i1:i1+nsize1*msize1,i2:i2+nsize2*msize2].reshape(nsize1,msize1,nsize2,msize2) + dmslice = dm[i1(),i2(nsize2*msize2)].reshape(nsize1,msize1,nsize2,msize2) dmslice = np.transpose(dmslice, axes=[1,3,0,2]).reshape(msize1,msize2,-1) block = tensor_blocks[tm_label_vals.index((l1,l2,q1,q2))] at_p = block.samples.position((iat1,iat2)) - blocks[(l1,l2,q1,q2)][at_p,:,:,:] = dmslice - i2 += msize2*nsize2 + blocks[l1,l2,q1,q2][at_p,:,:,:] = dmslice iq2[q2] += 1 - i1 += msize1*nsize1 iq1[q1] += 1 else: iq1 = dict.fromkeys(elements, 0) - i1 = 0 + i1 = Cursor(action='slicer') for iat1, q1 in enumerate(atom_charges): il1 = dict.fromkeys(range(max(llists[q1]) + 1), 0) for l1 in llists[q1]: - msize1 = 2*l1+1 + i1.add(2*l1+1) iq2 = dict.fromkeys(elements, 0) - i2 = 0 + i2 = Cursor(action='slicer') for iat2, q2 in enumerate(atom_charges): il2 = dict.fromkeys(range(max(llists[q2]) + 1), 0) for l2 in llists[q2]: - msize2 = 2*l2+1 - dmslice = dm[i1:i1+msize1,i2:i2+msize2] + dmslice = dm[i1(),i2(2*l2+1)] block = tensor_blocks[tm_label_vals.index((l1, l2, q1, q2))] at_p = block.samples.position((iat1, iat2)) n_p = block.properties.position((il1[l1], il2[l2])) - blocks[(l1,l2,q1,q2)][at_p,:,:,n_p] = dmslice - i2 += msize2 + blocks[l1,l2,q1,q2][at_p,:,:,n_p] = dmslice il2[l2] += 1 iq2[q2] += 1 - i1 += msize1 il1[l1] += 1 iq1[q1] += 1 @@ -291,9 +284,9 @@ def matrix_to_tensormap(mol, dm): for key in blocks: l1,l2 = key[:2] if l1==1: - blocks[key] = np.ascontiguousarray(blocks[key][:,_pyscf2gpr_l1_order,:,:]) + blocks[key] = np.ascontiguousarray(blocks[key][:,pyscf2gpr_l1_order,:,:]) if l2==1: - blocks[key] = np.ascontiguousarray(blocks[key][:,:,_pyscf2gpr_l1_order,:]) + blocks[key] = np.ascontiguousarray(blocks[key][:,:,pyscf2gpr_l1_order,:]) # Build tensor map tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals] @@ -492,7 +485,7 @@ def split(tensor): continue sampleidx = [t[0] for t in samples] samplelbl = [t[1] for t in samples] - #sampleidx = [block.samples.position(lbl) for lbl in samplelbl] + # sampleidx = [block.samples.position(lbl) for lbl in samplelbl] blocks[key] = block.values[sampleidx] block_samp_labels[key] = metatensor.Labels(tensor.sample_names[1:], np.array(samplelbl)[:,1:]) diff --git a/qstack/fields/dm.py b/qstack/fields/dm.py index 3b0140d3..90651f13 100644 --- a/qstack/fields/dm.py +++ b/qstack/fields/dm.py @@ -1,8 +1,9 @@ """Density matrix manipulation and analysis functions.""" +import numpy as np from pyscf import dft from qstack import constants -import numpy as np +from qstack.tools import Cursor def get_converged_mf(mol, xc, dm0=None, verbose=False): @@ -79,27 +80,26 @@ def sphericalize_density_matrix(mol, dm): A numpy ndarray with the sphericalized density matrix. """ idx_by_l = [[] for i in range(constants.MAX_L)] - i0 = 0 + i0 = Cursor(action='ranger') for ib in range(mol.nbas): l = mol.bas_angular(ib) + msize = 2*l+1 nc = mol.bas_nctr(ib) - i1 = i0 + nc * (l*2+1) - idx_by_l[l].extend(range(i0, i1, l*2+1)) - i0 = i1 + idx_by_l[l].extend(i0(nc*msize)[::msize]) spherical_dm = np.zeros_like(dm) for l in np.nonzero(idx_by_l)[0]: + msize = 2*l+1 for idx in idx_by_l[l]: for jdx in idx_by_l[l]: if l == 0: spherical_dm[idx,jdx] = dm[idx,jdx] else: trace = 0 - for m in range(2*l+1): + for m in range(msize): trace += dm[idx+m,jdx+m] - for m in range(2*l+1): - spherical_dm[idx+m,jdx+m] = trace / (2*l+1) + for m in range(msize): + spherical_dm[idx+m,jdx+m] = trace / msize return spherical_dm - diff --git a/qstack/fields/dori.py b/qstack/fields/dori.py index 31f81813..8a1d4583 100644 --- a/qstack/fields/dori.py +++ b/qstack/fields/dori.py @@ -44,7 +44,7 @@ def eval_rho_dm(mol, ao, dm, deriv=2): if deriv==2: DM_dAO_dr_i = 2 * _dot_ao_dm(mol, dAO_dr[i], dm, None, None, None) for j in range(i, 3): - d2rho_dr2[i,j] = _contract_rho(dAO_dr[j], DM_dAO_dr_i) + 2.0*np.einsum('ip,ip->i', d2AO_dr2[triu_idx[(i,j)]], DM_AO) + d2rho_dr2[i,j] = _contract_rho(dAO_dr[j], DM_dAO_dr_i) + 2.0*np.einsum('ip,ip->i', d2AO_dr2[triu_idx[i,j]], DM_AO) d2rho_dr2[j,i] = d2rho_dr2[i,j] if deriv==1: diff --git a/qstack/fields/excited.py b/qstack/fields/excited.py index c52bb771..868a323e 100644 --- a/qstack/fields/excited.py +++ b/qstack/fields/excited.py @@ -120,7 +120,7 @@ def exciton_properties_dm(mol, hole, part): dist = np.linalg.norm(hole_r-part_r) hole_extent = np.sqrt(hole_r2-hole_r@hole_r) part_extent = np.sqrt(part_r2-part_r@part_r) - return(dist, hole_extent, part_extent) + return dist, hole_extent, part_extent def exciton_properties(mol, hole, part): diff --git a/qstack/mathutils/array.py b/qstack/mathutils/array.py index 484c8717..9647126e 100644 --- a/qstack/mathutils/array.py +++ b/qstack/mathutils/array.py @@ -1,6 +1,7 @@ """Array manipulation utility functions.""" import numpy as np +from qstack.tools import slice_generator def scatter(values, indices): @@ -89,9 +90,7 @@ def vstack_padding(xs): if len(np.unique(shapes_other_axes, axis=0))==1: return np.vstack(xs) X = np.zeros((shapes_common_axis.sum(), *shapes_other_axes.max(axis=0))) - idx = 0 - for x in xs: - slices = (np.s_[idx:idx+x.shape[0]], *(np.s_[0:s] for s in x.shape[1:])) + for x, s0 in slice_generator(xs, inc=lambda x: x.shape[0]): + slices = (s0, *(np.s_[0:s] for s in x.shape[1:])) X[slices] = x - idx += x.shape[0] return X diff --git a/qstack/mathutils/wigner.py b/qstack/mathutils/wigner.py index dc5037a8..7b822945 100755 --- a/qstack/mathutils/wigner.py +++ b/qstack/mathutils/wigner.py @@ -56,7 +56,7 @@ def get_polynom_Y(l, m): r = Symbol('r', nonnegative=True) expr = real_Y_correct_phase(l,m, theta, phi) * r**l expr = expand(expr, func=True) - expr = expr.rewrite(sp.cos)#.simplify().trigsimp() + expr = expr.rewrite(sp.cos) # .simplify().trigsimp() expr = expand_trig(expr) expr = cancel(expr) expr = expr.subs({r: sp.sqrt(x*x+y*y+z*z), phi: sp.atan2(y,x), theta: sp.atan2(sp.sqrt(x*x+y*y),z)}) @@ -139,7 +139,6 @@ def compute_wigner(lmax): # rotated spherical harmonic Y_rot[l][m] = Y[l][m].subs({x: x1, y:y1, z:z1}).subs({x1:xx*x+xy*y+xz*z, y1:yx*x+yy*y+yz*z, z1:zx*x+zy*y+zz*z}) - D = [zeros(2*l+1,2*l+1) for l in range(lmax+1)] integrals_xyz_dict = {} for l in range(lmax+1): @@ -159,4 +158,3 @@ def compute_wigner(lmax): D = compute_wigner(lmax) print_wigner(D) - diff --git a/qstack/mathutils/xyz_integrals_float.py b/qstack/mathutils/xyz_integrals_float.py index 26374f57..f0b4b2f4 100755 --- a/qstack/mathutils/xyz_integrals_float.py +++ b/qstack/mathutils/xyz_integrals_float.py @@ -70,4 +70,3 @@ def trinomial(k1, k2, k3): if __name__ == "__main__": k,n,m = map(int, sys.argv[1:4]) print(f"{xyz(k,n,m):.15f} π") - diff --git a/qstack/orcaio.py b/qstack/orcaio.py index 9f5a667b..a39ade1c 100644 --- a/qstack/orcaio.py +++ b/qstack/orcaio.py @@ -9,6 +9,7 @@ import pyscf from qstack.mathutils.matrix import from_tril from qstack.reorder import reorder_ao +from qstack.tools import Cursor def read_input(fname, basis, ecp=None): @@ -90,7 +91,6 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re else: dm = np.fromfile(path[0], offset=8, count=mol.nao*mol.nao*nspin).reshape((nspin,mol.nao,mol.nao)) - is_def2 = 'def2' in pyscf.gto.basis._format_basis_name(mol.basis) has_3d = np.any([21 <= pyscf.data.elements.charge(q) <= 30 for q in mol.elements]) if is_def2 and has_3d: @@ -206,10 +206,9 @@ def _get_indices(mol, ls_from_orca): indices_full = np.arange(mol.nao) for iat, ls in ls_from_orca.items(): indices = [] - i = 0 + i = Cursor(action='ranger') for il, l in enumerate(ls): - indices.append((l, il, i + np.arange(2*l+1))) - i += 2*l+1 + indices.append((l, il, i(2*l+1))) indices = sorted(indices, key=lambda x: (x[0], x[1])) indices = np.array([j for i in indices for j in i[2]]) atom_slice = np.s_[ao_limits[iat][0]:ao_limits[iat][1]] @@ -277,4 +276,3 @@ def read_gbw(mol, fname, reorder_dest='pyscf', sort_l=True): if reorder_dest is not None: reorder_coeff_inplace(c, mol, reorder_dest, ls if (ls and sort_l) else None) return c, e, occ - diff --git a/qstack/qml/b2r2.py b/qstack/qml/b2r2.py index 2e8a1ed7..f3db3d7c 100644 --- a/qstack/qml/b2r2.py +++ b/qstack/qml/b2r2.py @@ -177,7 +177,7 @@ def get_b2r2_a_molecular(ncharges, coords, elements, coords_b = coords[j] R = np.linalg.norm(coords_b - coords_a) if R < rcut: - twobodyrep[bag_idx[(ncharge_a, ncharge_b)]] += get_gaussian(grid, R) + twobodyrep[bag_idx[ncharge_a, ncharge_b]] += get_gaussian(grid, R) twobodyrep = 2.0*np.concatenate(twobodyrep) return twobodyrep diff --git a/qstack/regression/condition.py b/qstack/regression/condition.py index 7d094de7..5beefa97 100644 --- a/qstack/regression/condition.py +++ b/qstack/regression/condition.py @@ -67,7 +67,7 @@ def main(): """Command-line entry point for computing kernel matrix condition numbers.""" args = _get_arg_parser().parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) c = condition(X, read_kernel=args.readk, sigma=args.sigma, eta=args.eta, diff --git a/qstack/regression/cross_validate_results.py b/qstack/regression/cross_validate_results.py index 8f4a0ca7..2d342708 100644 --- a/qstack/regression/cross_validate_results.py +++ b/qstack/regression/cross_validate_results.py @@ -79,8 +79,6 @@ def cv_results(X, y, np.save(f"{preffix}_{n_rep}-lc-runs.npy", lc_runs) if save_pred: np_pred = np.array(predictions_n) - ##### Can not take means !!! Test-set varies with run ! - ##### pred_mean = np.concatenate([np_pred.mean(axis=0),np_pred.std(axis=0)[1].reshape((1,-1))], axis=0) pred_mean = np.concatenate([*np_pred.reshape((n_rep, 2, -1))], axis=0) np.savetxt(f"{preffix}_{n_rep}-predictions.txt", pred_mean.T) return lc @@ -99,14 +97,14 @@ def _get_arg_parser(): def main(): """Command-line entry point for full cross-validation with hyperparameter search.""" args = _get_arg_parser().parse_args() - if(args.readk): + if args.readk: args.sigma = [np.nan] - if(args.ll): + if args.ll: correct_num_threads() + print(vars(args)) X = np.load(args.repr) y = np.loadtxt(args.prop) - print(vars(args)) final = cv_results(X, y, sigmaarr=args.sigma, etaarr=args.eta, gdict=args.gdict, gkernel=args.gkernel, akernel=args.akernel, read_kernel=args.read_kernel, diff --git a/qstack/regression/final_error.py b/qstack/regression/final_error.py index 2f0fabc6..039c6cdd 100644 --- a/qstack/regression/final_error.py +++ b/qstack/regression/final_error.py @@ -87,7 +87,7 @@ def main(): """Command-line entry point for computing final prediction errors.""" args = _get_arg_parser().parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) y = np.loadtxt(args.prop) diff --git a/qstack/regression/global_kernels.py b/qstack/regression/global_kernels.py index 86182618..7b771af1 100644 --- a/qstack/regression/global_kernels.py +++ b/qstack/regression/global_kernels.py @@ -8,6 +8,7 @@ from collections import Counter import numpy as np from tqdm import tqdm +from qstack.tools import slice_generator def get_global_K(X, Y, sigma, local_kernel, global_kernel, options): @@ -83,18 +84,15 @@ def get_covariance(mol1, mol2, species, max_atoms, max_size, kernel, sigma=None) numpy ndarray: Covariance matrix of shape (max_size, max_size). """ K_covar = np.zeros((max_size, max_size)) - idx = 0 - for q in species: + for q, slice_ in slice_generator(species, inc=lambda q: max_atoms[q]): n1 = len(mol1[q]) n2 = len(mol2[q]) q_size = max_atoms[q] if n1==0 or n2==0: - idx += q_size continue x1 = np.pad(mol1[q], ((0, q_size - n1),(0,0)), 'constant') x2 = np.pad(mol2[q], ((0, q_size - n2),(0,0)), 'constant') - K_covar[idx:idx+q_size, idx:idx+q_size] = kernel(x1, x2, sigma) - idx += q_size + K_covar[slice_, slice_] = kernel(x1, x2, sigma) return K_covar diff --git a/qstack/regression/hyperparameters.py b/qstack/regression/hyperparameters.py index 4f1ec843..c762b388 100644 --- a/qstack/regression/hyperparameters.py +++ b/qstack/regression/hyperparameters.py @@ -125,9 +125,9 @@ def hyper_loop(sigma, eta): # at the 1st iteration if is checked twice on purpose if direction=='up' and best_sigma==max(work_sigma): - new_sigma = best_sigma*np.array(defaults.sigmaarr_mult[1:]) + new_sigma = best_sigma*np.array(defaults.sigmaarr_mult[1:]) elif direction=='down' and best_sigma==min(work_sigma): - new_sigma = best_sigma/np.array(defaults.sigmaarr_mult[1:]) + new_sigma = best_sigma/np.array(defaults.sigmaarr_mult[1:]) if new_sigma is None: break @@ -147,11 +147,11 @@ def _get_arg_parser(): def main(): """Command-line entry point for hyperparameter optimization.""" args = _get_arg_parser().parse_args() - if(args.readk): + if args.readk: args.sigma = [np.nan] - print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() + print(vars(args)) X = np.load(args.repr) y = np.loadtxt(args.prop) diff --git a/qstack/regression/kernel.py b/qstack/regression/kernel.py index b5d697be..56e12960 100644 --- a/qstack/regression/kernel.py +++ b/qstack/regression/kernel.py @@ -46,7 +46,7 @@ def main(): """Command-line entry point for computing kernel matrices.""" args = _get_arg_parser().parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() if os.path.isfile(args.repr): X = np.load(args.repr) diff --git a/qstack/regression/local_kernels.py b/qstack/regression/local_kernels.py index f1d746d8..c17b7158 100644 --- a/qstack/regression/local_kernels.py +++ b/qstack/regression/local_kernels.py @@ -205,7 +205,7 @@ def local_laplacian_kernel_wrapper(X, Y, gamma): X, Y = np.asarray(X), np.asarray(Y) if X.shape[1:] != Y.shape[1:]: raise RuntimeError(f"Incompatible shapes {X.shape} and {Y.shape}") - if X.ndim==1: # do not extend so the behavior is the same for 'L' and 'L_custom_py' + if X.ndim==1: # do not extend so the behavior is the same for 'L' and 'L_custom_py' raise RuntimeError("Dimensionality of X should be > 1") if X.ndim>2: diff --git a/qstack/regression/oos.py b/qstack/regression/oos.py index 55090c41..8f203919 100644 --- a/qstack/regression/oos.py +++ b/qstack/regression/oos.py @@ -59,7 +59,7 @@ def main(): """Command-line entry point for out-of-sample predictions.""" args = _get_arg_parser().parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) X_oos = np.load(args.x_oos) diff --git a/qstack/regression/regression.py b/qstack/regression/regression.py index 73b24bd0..8c6d68a0 100644 --- a/qstack/regression/regression.py +++ b/qstack/regression/regression.py @@ -58,7 +58,7 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, else: if read_kernel: raise RuntimeError('Cannot do FPS with kernels') - sparse_idx = do_fps(X_train)[0][:sparse] # indices within the training set + sparse_idx = do_fps(X_train)[0][:sparse] # indices within the training set if debug: # Ensures reproducibility of the sample selection for each train_size over repetitions (n_rep) @@ -71,7 +71,7 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, size_train = int(np.floor(len(y_train)*size)) if size <= 1.0 else size maes = [] for _rep in range(n_rep): - train_idx = rng.choice(all_indices_train, size = size_train, replace=False) + train_idx = rng.choice(all_indices_train, size=size_train, replace=False) y_kf_train = y_train[train_idx] if not sparse: @@ -102,7 +102,7 @@ def main(): """Command-line entry point for computing learning curves.""" args = _get_arg_parser().parse_args() print(vars(args)) - if(args.ll): + if args.ll: correct_num_threads() X = np.load(args.repr) y = np.loadtxt(args.prop) diff --git a/qstack/reorder.py b/qstack/reorder.py index 0b68bf50..5c50ba75 100644 --- a/qstack/reorder.py +++ b/qstack/reorder.py @@ -1,6 +1,14 @@ -"""Functions for reordering atomic orbitals between different conventions.""" +"""Functions for reordering atomic orbitals between different conventions. + +Provides: + pyscf2gpr_l1_order: indices to reorder l=1 orbitals from PySCF to GPR. +""" import numpy as np +from .tools import slice_generator + + +pyscf2gpr_l1_order = [1,2,0] def get_mrange(l): @@ -21,7 +29,7 @@ def get_mrange(l): return range(-l,l+1) -def _orca2gpr_idx(l, m): +def _orca2gpr_idx(l_slices, m): """Given a molecule returns a list of reordered indices to tranform Orca AO ordering into SA-GPR. In Orca, orbital ordering corresponds to: @@ -31,26 +39,23 @@ def _orca2gpr_idx(l, m): Additionally, Orca uses a different sign convention for |m|>=3. Args: - l (np.ndarray): Array of angular momentum quantum numbers. - m (np.ndarray): Array of magnetic quantum numbers. + l_slices (iterator): Iterator that yeilds (l: int, s: slice) per shell, where + l is angular momentum quantum number and s is the corresponding slice of size 2*l+1. + m (np.ndarray): Array of magnetic quantum numbers per AO. Returns: tuple: Re-arranged indices array and sign array. """ - idx = np.arange(len(l)) - i=0 - while(i < len(idx)): - msize = 2*l[i]+1 - j = np.s_[i:i+msize] - idx[j] = np.concatenate((idx[j][::-2], idx[j][1::2])) - i += msize + idx = np.arange(len(m)) + for _l, s in l_slices: + idx[s] = np.concatenate((idx[s][::-2], idx[s][1::2])) signs = np.ones_like(idx) signs[np.where(np.abs(m)>=3)] = -1 # in pyscf order - signs[idx] = signs # in orca order + signs[idx] = signs # in orca order return idx, signs -def _pyscf2gpr_idx(l): +def _pyscf2gpr_idx(l_slices, m): """Given a molecule returns a list of reordered indices to tranform pyscf AO ordering into SA-GPR. In SA-GPR, orbital ordering corresponds to: @@ -60,18 +65,17 @@ def _pyscf2gpr_idx(l): Signs are the same in both conventions, so they are returned for compatibility. Args: - l (np.ndarray): Array of angular momentum quantum numbers. + l_slices (iterator): Iterator that yeilds (l: int, s: slice) per shell, where + l is angular momentum quantum number and s is the corresponding slice of size 2*l+1. + m (np.ndarray): Array of magnetic quantum numbers per AO. Returns: tuple: Re-arranged indices array and sign array. """ - idx = np.arange(len(l)) - i=0 - while(i < len(idx)): - msize = 2*l[i]+1 - if l[i]==1: - idx[i:i+3] = [i+1,i+2,i] - i += msize + idx = np.arange(len(m)) + for l, s in l_slices: + if l==1: + idx[s] = idx[s][pyscf2gpr_l1_order] return idx, np.ones_like(idx) @@ -94,23 +98,24 @@ def reorder_ao(mol, vector, src='pyscf', dest='gpr'): NotImplementedError: If the specified convention is not implemented. ValueError: If vector dimension is not 1 or 2. """ - def get_idx(l, m, convention): + def get_idx(L, m, convention): convention = convention.lower() + l_slices = slice_generator(L, inc=lambda l: 2*l+1) if convention == 'gpr': - return np.arange(len(l)), np.ones_like(l) + return np.arange(len(m)), np.ones_like(m) elif convention == 'pyscf': - return _pyscf2gpr_idx(l) + return _pyscf2gpr_idx(l_slices, m) elif convention == 'orca': - return _orca2gpr_idx(l, m) + return _orca2gpr_idx(l_slices, m) else: errstr = f'Conversion to/from the {convention} convention is not implemented' raise NotImplementedError(errstr) from .compound import basis_flatten - _, l, m = basis_flatten(mol, return_both=False) - idx_src, sign_src = get_idx(l, m, src) - idx_dest, sign_dest = get_idx(l, m, dest) + (_, _, m), L = basis_flatten(mol, return_both=False, return_shells=True) + idx_src, sign_src = get_idx(L, m, src) + idx_dest, sign_dest = get_idx(L, m, dest) if vector.ndim == 2: sign_src = np.einsum('i,j->ij', sign_src, sign_src) @@ -126,4 +131,3 @@ def get_idx(l, m, convention): newvector *= sign_dest return newvector - diff --git a/qstack/spahm/LB2020guess.py b/qstack/spahm/LB2020guess.py index 4082ad6b..fbf072c7 100644 --- a/qstack/spahm/LB2020guess.py +++ b/qstack/spahm/LB2020guess.py @@ -22,7 +22,6 @@ def __init__(self, fname=None, parameters='HF'): self.init_data() self.get_basis(fname, parameters) - def renormalize(self, a): r"""Compute renormalization factor for Gaussian basis functions. @@ -42,7 +41,6 @@ def renormalize(self, a): x = np.sqrt(np.sqrt(0.5*a/np.pi)) return x*x*x - def read_ac(self, fname): """Read auxiliary basis parameters from file. @@ -70,7 +68,6 @@ def read_ac(self, fname): basis[data.elements.ELEMENTS[q]] = qbasis return basis - def add_caps(self, basis): """Add cap (diffuse) functions to the auxiliary basis. @@ -87,7 +84,6 @@ def add_caps(self, basis): basis[qname].append( [0, [a, self.renormalize(a) ]] ) return - def get_basis(self, fname, parameters): """Initialize auxiliary basis set from file or predefined parameters. @@ -112,7 +108,6 @@ def get_basis(self, fname, parameters): self.acbasis = self._hfs_basis self.parameters = 'HFS' - def use_charge(self, mol): """Adjust basis coefficients based on molecular charge. @@ -132,7 +127,6 @@ def use_charge(self, mol): acbasis[q][-1][1][1] *= factor return acbasis - def use_ecp(self, mol, acbasis): """Adjust basis set to account for effective core potentials (ECP). @@ -176,7 +170,6 @@ def use_ecp(self, mol, acbasis): acbasis[q].pop(i) return acbasis - def get_auxweights(self, auxmol): """Extract auxiliary basis weights from the basis. @@ -198,7 +191,6 @@ def get_auxweights(self, auxmol): iao+=1 return w - def merge_caps(self, w, eri3c): """Contracts 3-center integrals with auxiliary basis weights. @@ -214,7 +206,6 @@ def merge_caps(self, w, eri3c): """ return np.einsum('...i,i->...', eri3c, w) - def get_eri3c(self, mol, auxmol): """Compute 3-center electron repulsion integrals. @@ -231,7 +222,6 @@ def get_eri3c(self, mol, auxmol): eri3c = pmol.intor('int3c2e_sph', shls_slice=shls_slice) return eri3c - def check_coefficients(self, mol, acbasis): """Validate that auxiliary basis coefficients sum to correct total charge. @@ -250,7 +240,6 @@ def check_coefficients(self, mol, acbasis): if not np.isclose(ch1, ch2): raise RuntimeError("Coefficients corrupted after adding ECP") - def HLB20(self, mol): """Compute the LB2020 effective potential matrix. @@ -268,7 +257,6 @@ def HLB20(self, mol): auxw = self.get_auxweights(auxmol) return self.merge_caps(auxw, eri3c) - def Heff(self, mol): """Construct one-electron Hamiltonian for initial guess. @@ -285,7 +273,6 @@ def Heff(self, mol): self.H = self.Hcore + self.HLB20(mol) return self.H - def HLB20_ints_generator(self, mol, auxmol): """Create generator for LB2020 potential gradients. @@ -302,10 +289,11 @@ def HLB20_ints_generator(self, mol, auxmol): """ pmol = mol + auxmol shls_slice = (0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) - eri3c2e_ip1 = pmol.intor('int3c2e_ip1', shls_slice=shls_slice) # (nabla \, \| \) - eri3c2e_ip2 = pmol.intor('int3c2e_ip2', shls_slice=shls_slice) # ( \, \| nabla\) + eri3c2e_ip1 = pmol.intor('int3c2e_ip1', shls_slice=shls_slice) # (nabla \, \| \) + eri3c2e_ip2 = pmol.intor('int3c2e_ip2', shls_slice=shls_slice) # ( \, \| nabla\) aoslices = mol.aoslice_by_atom()[:,2:] auxaoslices = auxmol.aoslice_by_atom()[:,2:] + def HLB20_ints_deriv(iat): p0, p1 = aoslices[iat] P0, P1 = auxaoslices[iat] @@ -316,7 +304,6 @@ def HLB20_ints_deriv(iat): return -eri3c2e_ip return HLB20_ints_deriv - def HLB20_generator(self, mol): """Create generator for LB2020 potential gradient contributions. @@ -333,11 +320,11 @@ def HLB20_generator(self, mol): auxmol = df.make_auxmol(mol, acbasis) eri3c = self.HLB20_ints_generator(mol, auxmol) auxw = self.get_auxweights(auxmol) + def HLB20_deriv(iat): return self.merge_caps(auxw, eri3c(iat)) return HLB20_deriv - def init_data(self): """Set parameters. diff --git a/qstack/spahm/compute_spahm.py b/qstack/spahm/compute_spahm.py index 4ce2b09d..541803bb 100644 --- a/qstack/spahm/compute_spahm.py +++ b/qstack/spahm/compute_spahm.py @@ -64,10 +64,11 @@ def ext_field_generator(mol, field): """ shls_slice = (0, mol.nbas, 0, mol.nbas) with mol.with_common_orig((0,0,0)): - int1e_irp = mol.intor('int1e_irp', shls_slice=shls_slice).reshape(3, 3, mol.nao, mol.nao) # ( | rc nabla | ) + int1e_irp = mol.intor('int1e_irp', shls_slice=shls_slice).reshape(3, 3, mol.nao, mol.nao) # ( | rc nabla | ) aoslices = mol.aoslice_by_atom()[:,2:] if field is None: field = (0,0,0) + def field_deriv(iat): p0, p1 = aoslices[iat] dmu_dr = np.zeros_like(int1e_irp) # dim(mu)×dim(r)×nao×nao diff --git a/qstack/spahm/guesses.py b/qstack/spahm/guesses.py index ed6480ec..72ed2400 100644 --- a/qstack/spahm/guesses.py +++ b/qstack/spahm/guesses.py @@ -90,8 +90,8 @@ def SAD(mol, xc): else: fock = hc + vhf[0] if not np.array_equal(vhf[0], vhf[1]): - msg = f'The effective potential ({xc}) returned different alpha and beta matrix components from atomicHF DM' - warnings.warn(msg, RuntimeWarning, stacklevel=2) + msg = f'The effective potential ({xc}) returned different alpha and beta matrix components from atomicHF DM' + warnings.warn(msg, RuntimeWarning, stacklevel=2) return fock @@ -293,6 +293,7 @@ def LB_grad(mf): """ hcore_grad = mf.hcore_generator(mf.mol) HLB_grad = LB20().HLB20_generator(mf.mol) + def H_grad(iat): return hcore_grad(iat) + HLB_grad(iat) return H_grad diff --git a/qstack/spahm/rho/Dmatrix.py b/qstack/spahm/rho/Dmatrix.py index 28fb72a3..7a734715 100644 --- a/qstack/spahm/rho/Dmatrix.py +++ b/qstack/spahm/rho/Dmatrix.py @@ -2,6 +2,7 @@ import numpy as np from numpy import sqrt +from qstack.tools import Cursor def c_split(mol, c): @@ -19,14 +20,11 @@ def c_split(mol, c): coefficients is the subset of c for that angular momentum shell. """ cs = [] - i0 = 0 - for at in mol.aoslice_by_atom(): - for b in range(at[0], at[1]): + slicer = Cursor(inc=lambda l: 2*l+1, action='slicer') + for at0, at1 in mol.aoslice_by_atom()[:,:2]: + for b in range(at0, at1): l = mol.bas_angular(b) - msize = 2*l+1 - for _n in range(mol.bas_nctr(b)): - cs.append([l, c[i0:i0+msize]]) - i0 += msize + cs.extend([[l, c[slicer(l)]] for _n in range(mol.bas_nctr(b))]) return cs @@ -63,7 +61,7 @@ def new_xy_axis(z): i = np.argmin(abs(z)) # find the axis with the minimal projection of the vector z x = -z[i] * z x[i] += 1.0 # create a vector orthogonal to z with dominant component i - x /= np.sqrt(1.0-z[i]*z[i]) # normalize + x /= np.sqrt(1.0-z[i]*z[i]) # normalize y = np.cross(z,x) return np.array([x,y,z]) @@ -115,7 +113,7 @@ def Dmatrix(xyz, lmax, order='xyz'): D[1][l+ 1,l+ -1] = xy D[1][l+ 1,l+ 0] = xz D[1][l+ 1,l+ 1] = xx - elif order=='xyz': # 1 -1 0 + elif order=='xyz': # 1 -1 0 D[1][ 0, 0] = xx D[1][ 0, 1] = xy D[1][ 0, 2] = xz @@ -317,4 +315,3 @@ def Dmatrix_for_z(z, lmax, order='xyz'): list: List of Wigner D-matrices for l=0 to lmax. """ return Dmatrix(new_xy_axis(z), lmax, order) - diff --git a/qstack/spahm/rho/atomic_density.py b/qstack/spahm/rho/atomic_density.py index d9c7db3e..1208ba55 100644 --- a/qstack/spahm/rho/atomic_density.py +++ b/qstack/spahm/rho/atomic_density.py @@ -57,9 +57,10 @@ def fit(mol, dm, aux_basis, short=False, w_slicing=True, only_i=None): a_dfs.append(c_a) if short: - cc = [] - for i, c in zip(auxmol.aoslice_by_atom()[:,2:], a_dfs, strict=True): - cc.append(c[i[0]:i[1]]) - return np.hstack(cc) + if only_i is not None and len(only_i) > 0: + aoslice_by_atom = auxmol.aoslice_by_atom()[only_i,2:] + else: + aoslice_by_atom = auxmol.aoslice_by_atom()[:,2:] + return [c[i0:i1] for (i0, i1), c in zip(aoslice_by_atom, a_dfs, strict=True)] return a_dfs diff --git a/qstack/spahm/rho/bond.py b/qstack/spahm/rho/bond.py index 187df6b3..6e387046 100644 --- a/qstack/spahm/rho/bond.py +++ b/qstack/spahm/rho/bond.py @@ -78,5 +78,6 @@ def main(args=None): else: np.save(args.name_out + mod_suffix, modvec) + if __name__ == "__main__": main() diff --git a/qstack/spahm/rho/compute_rho_spahm.py b/qstack/spahm/rho/compute_rho_spahm.py index e8fd1c8e..3a789684 100644 --- a/qstack/spahm/rho/compute_rho_spahm.py +++ b/qstack/spahm/rho/compute_rho_spahm.py @@ -3,7 +3,7 @@ import os import itertools import numpy as np -from qstack.tools import correct_num_threads +from qstack.tools import correct_num_threads, slice_generator from . import utils, dmb_rep_bond as dmbb from . import dmb_rep_atom as dmba from .utils import defaults @@ -47,55 +47,48 @@ def spahm_a_b(rep_type, mols, dms, - max_atoms: Maximum number of atoms/bonds across all molecules - n_features: Representation dimension """ - maxlen = 0 if only_z is None: only_z = [] + if len(only_z) > 0: + print(f"Selecting atom-types in {only_z}") + natm = max(sum(sum(z==np.array(mol.elements)) for z in only_z) for mol in mols) + else: + natm = max(mol.natm for mol in mols) + if rep_type == 'bond': elements, mybasis, qqs0, qqs4q, idx, M = dmbb.read_basis_wrapper(mols, bpath, only_m0, printlevel, elements=elements, cutoff=cutoff, pairfile=pairfile, dump_and_exit=dump_and_exit, same_basis=same_basis) qqs = qqs0 if zeros else qqs4q - maxlen = max([dmbb.bonds_dict_init(qqs[q0], M)[1] for q0 in elements]) + maxlen = max(dmbb.bonds_dict_init(qqs[q0], M)[1] for q0 in elements) elif rep_type == 'atom': if elements is None: elements = set() for mol in mols: elements.update(mol.elements) elements = sorted(set(elements)) - df_wrapper, sym_wrapper = dmba.get_model(model) + df_wrapper, sym_wrapper, maxlen_fn = dmba.get_model(model) ao, ao_len, idx, M = dmba.get_basis_info(elements, auxbasis) - maxlen = sum([len(v) for v in idx.values()]) + maxlen = maxlen_fn(idx, idx.keys() if len(only_z)==0 else only_z) - if len(only_z) > 0: - print(f"Selecting atom-types in {only_z}") - zinmols = [] - for mol in mols: - zinmol = [sum(z == np.array(mol.elements)) for z in only_z] - zinmols.append(sum(zinmol)) - natm = max(zinmols) - else: - natm = max([mol.natm for mol in mols]) - zinmols = [mol.natm for mol in mols] allvec = np.zeros((len(omods), len(mols), natm, maxlen)) for imol, (mol, dm) in enumerate(zip(mols, dms, strict=True)): if printlevel>0: print('mol', imol, flush=True) - if len(only_z) >0: + if len(only_z)>0: only_i = [i for i,z in enumerate(mol.elements) if z in only_z] else: only_i = range(mol.natm) for iomod, omod in enumerate(omods): DM = utils.dm_open_mod(dm, omod) - vec = None # This too !!! (maybe a wrapper or dict) if rep_type == 'bond': vec = dmbb.repr_for_mol(mol, DM, qqs, M, mybasis, idx, maxlen, cutoff, only_z=only_z) elif rep_type == 'atom': c_df = df_wrapper(mol, DM, auxbasis, only_i=only_i) - vec = sym_wrapper(c_df, mol, idx, ao, ao_len, M, elements) + vec = sym_wrapper(maxlen, c_df, mol.elements, idx, ao, ao_len, M, only_i) allvec[iomod,imol,:len(vec)] = vec - return allvec @@ -153,8 +146,7 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= else: all_atoms = [mol.elements for mol in mols] - spin = np.array(spin) ## a bit dirty but couldn't find a better way to ensure Iterable type! - if (spin == None).all(): + if (np.asarray(spin) == None).all(): omods = [None] allvec = spahm_a_b(rep_type, mols, dms, bpath, cutoff, omods, @@ -198,24 +190,21 @@ def get_repr(rep_type, mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm= ], dtype=object) else: - natm_tot = sum(len(elems) for elems in all_atoms) - allvec_new = np.empty_like(allvec, shape=(len(omods), natm_tot, maxlen)) - atm_i = 0 - for mol_i, elems in enumerate(all_atoms): - allvec_new[:, atm_i:atm_i+len(elems), :] = allvec[:, mol_i, :len(elems), :] - atm_i += len(elems) + all_atoms_list = list(itertools.chain.from_iterable(all_atoms)) + allvec_new = np.empty_like(allvec, shape=(len(omods), len(all_atoms_list), maxlen)) + for (mol_i, elems), slice_i in slice_generator([*enumerate(all_atoms)], inc=lambda x: len(x[1])): + allvec_new[:, slice_i, :] = allvec[:, mol_i, :len(elems), :] allvec = allvec_new del allvec_new - all_atoms = list(itertools.chain.from_iterable(all_atoms)) if merge: allvec = np.hstack(allvec) if with_symbols: - allvec = np.array(list(zip(all_atoms, allvec, strict=True)), dtype=object) + allvec = np.array(list(zip(all_atoms_list, allvec, strict=True)), dtype=object) else: if with_symbols: allvec = np.array([ - np.array(list(zip(all_atoms, modvec, strict=True)), dtype=object) + np.array(list(zip(all_atoms_list, modvec, strict=True)), dtype=object) for modvec in allvec ], dtype=object) @@ -294,5 +283,6 @@ def main(args=None): else: np.save(args.name_out + mod_suffix, modvec) + if __name__ == "__main__": main() diff --git a/qstack/spahm/rho/dmb_rep_atom.py b/qstack/spahm/rho/dmb_rep_atom.py index f4f8b6f8..2b8dd316 100644 --- a/qstack/spahm/rho/dmb_rep_atom.py +++ b/qstack/spahm/rho/dmb_rep_atom.py @@ -10,6 +10,7 @@ import pyscf from qstack import compound, fields from . import sym, atomic_density, lowdin +from qstack.tools import slice_generator def get_basis_info(atom_types, auxbasis): @@ -47,18 +48,22 @@ def _make_models_dict(): Defines density fitting functions for each model. Returns: - dict: Mapping model names to (density_fitting_function, symmetrization_function). + dict: Mapping model names to (density_fitting_function, symmetrization_function, maxlen_function). """ - def df_pure(mol, dm, auxbasis): + def df_pure(mol, dm, auxbasis, only_i): """Pure density fitting without modifications.""" - return fields.decomposition.decompose(mol, dm, auxbasis)[1] + auxmol, c = fields.decomposition.decompose(mol, dm, auxbasis) + return sym.c_split_atom(auxmol, c, only_i=only_i) - def df_sad_diff(mol, dm, auxbasis): + def df_sad_diff(mol, dm, auxbasis, only_i=None): """Density fitting on difference from superposition of atomic densities (SAD).""" mf = pyscf.scf.RHF(mol) dm_sad = mf.init_guess_by_atom(mol) + if dm_sad.ndim==3: + dm_sad = dm_sad.sum(axis=0) dm = dm - dm_sad - return fields.decomposition.decompose(mol, dm, auxbasis)[1] + auxmol, c = fields.decomposition.decompose(mol, dm, auxbasis) + return sym.c_split_atom(auxmol, c, only_i=only_i) def df_lowdin_long(mol, dm, auxbasis, only_i=None): """Löwdin partitioning with block-diagonal slicing with contributions from other elements.""" @@ -76,7 +81,7 @@ def df_lowdin_short_x(mol, dm, auxbasis, only_i=None): """Löwdin partitioning.""" return atomic_density.fit(mol, dm, auxbasis, short=True, w_slicing=False, only_i=only_i) - def df_occup(mol, dm, auxbasis): + def df_occup(mol, dm, auxbasis, only_i=None): """Pure density fitting with preserving atom charges.""" L = lowdin.Lowdin_split(mol, dm) diag = np.diag(L.dmL) @@ -85,16 +90,25 @@ def df_occup(mol, dm, auxbasis): eri2c, eri3c = fields.decomposition.get_integrals(mol, auxmol)[1:] c0 = fields.decomposition.get_coeff(dm, eri2c, eri3c) c = fields.decomposition.correct_N_atomic(auxmol, Q, c0, metric=eri2c) - return c - - models_dict = {'pure' : [df_pure, coefficients_symmetrize_short ], - 'sad-diff' : [df_sad_diff, coefficients_symmetrize_short ], - 'occup' : [df_occup, coefficients_symmetrize_short ], - 'lowdin-short' : [df_lowdin_short, coefficients_symmetrize_short ], - 'lowdin-long' : [df_lowdin_long, coefficients_symmetrize_long ], - 'lowdin-short-x': [df_lowdin_short_x, coefficients_symmetrize_short ], - 'lowdin-long-x' : [df_lowdin_long_x, coefficients_symmetrize_long ], - 'mr2021' : [df_pure, coefficients_symmetrize_MR2021]} + return sym.c_split_atom(auxmol, c, only_i=only_i) + + def maxlen_long(idx, _): + return sum(len(v) for v in idx.values()) + + def maxlen_short(idx, elements): + return max(len(idx[q]) for q in elements) + + def maxlen_MR2021(idx, elements): + return max(len(np.unique(idx[q][:,0])) for q in elements) + + models_dict = {'pure' : (df_pure, coefficients_symmetrize_short , maxlen_short ), + 'sad-diff' : (df_sad_diff, coefficients_symmetrize_short , maxlen_short ), + 'occup' : (df_occup, coefficients_symmetrize_short , maxlen_short ), + 'lowdin-short' : (df_lowdin_short, coefficients_symmetrize_short , maxlen_short ), + 'lowdin-long' : (df_lowdin_long, coefficients_symmetrize_long , maxlen_long ), + 'lowdin-short-x': (df_lowdin_short_x, coefficients_symmetrize_short , maxlen_short ), + 'lowdin-long-x' : (df_lowdin_long_x, coefficients_symmetrize_long , maxlen_long ), + 'mr2021' : (df_pure, coefficients_symmetrize_MR2021, maxlen_MR2021 )} return models_dict @@ -113,7 +127,7 @@ def get_model(arg): - 'mr2021': Method from Margraf & Reuter 2021. Returns: - tuple: (density_fitting_function, symmetrization_function) pair. + tuple: (density_fitting_function, symmetrization_function, maxlen_function). - density_fitting_function (callable): Function performing density fitting. Args: @@ -123,20 +137,31 @@ def get_model(arg): only_i (list[int]): List of atom indices to use. Returns: - c (numpy ndarray or list): Density fitting coefficients (1D). + list: Density fitting coefficients per atom (1D numpy ndarrays). - symmetrization_function (callable): Function for symmetrizing coefficients. Args: + maxlen (int): Maximum feature length. c (numpy ndarray): Density fitting coefficients (1D). - mol (pyscf Mole): Molecule object. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). idx (dict): Pair indices per element. ao (dict): Angular momentum info per element. ao_len (dict): Basis set sizes per element. M (dict): Metric matrices per element (2D numpy ndarrays). + only_i (list[int]): List of atom indices to use. Returns: - v (list or numpy ndarray): Symmetrized atomic feature vectors. + numpy ndarray: Symmetrized atomic feature vectors. + + - maxlen_function (callable): Function computing max. feature size. + + Args: + idx (dict): Pair indices per element. + elements (list[str]): Elements for which representation is computed. + + Returns: + int: Maximum feature length. Raises: RuntimeError: If model name is not recognized. @@ -147,7 +172,7 @@ def get_model(arg): return models_dict[arg] -def coefficients_symmetrize_MR2021(c, mol, idx, ao, ao_len, _M, _): +def coefficients_symmetrize_MR2021(maxlen, c, atoms, idx, ao, _, _M, only_i): """Symmetrize density fitting coefficients using MR2021 method. Reference: @@ -156,81 +181,76 @@ def coefficients_symmetrize_MR2021(c, mol, idx, ao, ao_len, _M, _): Nat. Commun. 12, 344 (2021), doi:10.1038/s41467-020-20471-y Args: - c (numpy ndarray): Concatenated density fitting coefficients. - mol (pyscf Mole): pyscf Mole object. + maxlen (int): Maximum feature length. + c (list): List of coefficient arrays per atom. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). idx (dict): Pair indices per element. ao (dict): Angular momentum info per element. - ao_len (dict): Basis set sizes per element. - _M: Unused (for interface compatibility). _: Unused (for interface compatibility). + _M: Unused (for interface compatibility). + only_i (list[int]): List of atom indices to use. Returns: - list: Symmetrized vectors for each atom. + numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. """ - v = [] - i0 = 0 - for q in mol.elements: - n = ao_len[q] - v.append(sym.vectorize_c_MR2021(idx[q], ao[q], c[i0:i0+n])) - i0 += n + if only_i is not None and len(only_i)>0: + atoms = np.array(atoms)[only_i] + v = np.zeros((len(atoms), maxlen)) + for iat, (q, ci) in enumerate(zip(atoms, c, strict=True)): + vi = sym.vectorize_c_MR2021(idx[q], ao[q], ci) + v[iat,:len(vi)] = vi return v -def coefficients_symmetrize_short(c, mol, idx, ao, ao_len, M, _): +def coefficients_symmetrize_short(maxlen, c, atoms, idx, ao, _, M, only_i): """Symmetrize coefficients for each atom. For each atom, use contributions from the said atom. Args: - c (numpy ndarray): Density fitting coefficients. - mol (pyscf Mole): pyscf Mole object. + maxlen (int): Maximum feature length. + c (list): List of coefficient arrays per atom. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). idx (dict): Pair indices per element. ao (dict): Angular momentum info per element. - ao_len (dict): Basis set sizes per element. - M (dict): Metric matrices per element. _: Unused (for interface compatibility). + M (dict): Metric matrices per element. + only_i (list[int]): List of atom indices to use. Returns: numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. """ - v = [] - i0 = 0 - for q in mol.elements: - n = ao_len[q] - v.append(M[q] @ sym.vectorize_c_short(idx[q], ao[q], c[i0:i0+n])) - i0 += n - maxlen = sum([len(v) for v in idx.values()]) - v = np.array([np.pad(x, (0, maxlen-len(x)), constant_values=0) for x in v]) + if only_i is not None and len(only_i)>0: + atoms = np.array(atoms)[only_i] + v = np.zeros((len(atoms), maxlen)) + for iat, (q, ci) in enumerate(zip(atoms, c, strict=True)): + v[iat,:len(idx[q])] = M[q] @ sym.vectorize_c_short(idx[q], ao[q], ci) return v -def coefficients_symmetrize_long(c_df, mol, idx, ao, ao_len, M, atom_types): - """Symmetrizes coefficients for long Löwdin models. +def coefficients_symmetrize_long(maxlen, c_df, atoms, idx, ao, ao_len, M, _): + """Symmetrize coefficients for long Löwdin models. For each atom, use contributions from the said atom as well as all other atoms. Args: + maxlen (int): Maximum feature length. c_df (list): List of coefficient arrays per atom. - mol (pyscf Mole): pyscf Mole object. + atoms (list[str]): Atoms in molecule (from pyscf Mole.elements). idx (dict): Pair indices per element. ao (dict): Angular momentum info per element. ao_len (dict): Basis set sizes per element. M (dict): Metric matrices per element. - atom_types (list): All element types in dataset. + _: Unused (for interface compatibility). Returns: - list: Symmetrized vectors (numpy ndarrays) for each atom. + numpy ndarray: 2D array (n_atoms, max_features) with zero-padding. """ - vectors = [] - for c_a in c_df: - v_atom = {q: np.zeros(len(idx[q])) for q in atom_types} - i0 = 0 - for q in mol.elements: - n = ao_len[q] - v_atom[q] += M[q] @ sym.vectorize_c_short(idx[q], ao[q], c_a[i0:i0+n]) - i0 += n - v_a = np.hstack([v_atom[q] for q in atom_types]) - vectors.append(v_a) + vectors = np.zeros((len(c_df), maxlen)) + feature_slice = dict(slice_generator(idx.keys(), inc=lambda q: len(idx[q]))) + for iat, c_a in enumerate(c_df): + for q, ao_slice in slice_generator(atoms, inc=lambda q: ao_len[q]): + vectors[iat,feature_slice[q]] += M[q] @ sym.vectorize_c_short(idx[q], ao[q], c_a[ao_slice]) return vectors diff --git a/qstack/spahm/rho/dmb_rep_bond.py b/qstack/spahm/rho/dmb_rep_bond.py index a47e66e6..4f26b635 100644 --- a/qstack/spahm/rho/dmb_rep_bond.py +++ b/qstack/spahm/rho/dmb_rep_bond.py @@ -327,7 +327,7 @@ def repr_for_bond(i0, i1, L, mybasis, idx, q, r, cutoff): dm1 = L.get_bond(i0, i1) bname = make_bname(q0, q1) cs = fit_dm(dm1, L.mol, mybasis[bname], r0, r1) - lmax = max([c[0] for c in cs]) + lmax = max(c[0] for c in cs) v0 = vec_from_cs(+z, cs, lmax, idx[bname]) v1 = vec_from_cs(-z, cs, lmax, idx[bname]) return [v0, v1], bname diff --git a/qstack/spahm/rho/sym.py b/qstack/spahm/rho/sym.py index a3fcee5f..c18188a9 100644 --- a/qstack/spahm/rho/sym.py +++ b/qstack/spahm/rho/sym.py @@ -6,6 +6,27 @@ from qstack.reorder import get_mrange +def c_split_atom(mol, c, only_i=None): + """Split coefficient vector by angular momentum quantum number for each atom. + + Organizes expansion coefficients into sublists grouped by angular momentum (l) + for each atomic basis function. + + Args: + mol (pyscf Mole): pyscf Mole object. + c (numpy ndarray): 1D array of expansion coefficients. + only_i (list[int]): List of atom indices to use. + + Returns: + list: List of coefficients (numpy ndarrays) per atom. + """ + if only_i is None or len(only_i)==0: + aoslice_by_atom = mol.aoslice_by_atom()[:,2:] + else: + aoslice_by_atom = mol.aoslice_by_atom()[only_i,2:] + return [c[i0:i1] for i0, i1 in aoslice_by_atom] + + def idxl0(ao_i, l, ao): """Return index of basis function with same L and N quantum numbers but M=0. @@ -62,7 +83,7 @@ def store_pair_indices(ao): ao (dict): Angular momentum info with 'l' and 'm' lists for each AO. Returns: - list: List of [i, j] index pairs with matching (l, m). + numpy ndarray: [i, j] index pairs with matching (l, m). """ idx = [] for i, [li, mi] in enumerate(zip(ao['l'], ao['m'], strict=True)): @@ -70,7 +91,7 @@ def store_pair_indices(ao): if (li!=lj) or (mi!=mj): continue idx.append([i, j]) - return idx + return np.array(idx) def store_pair_indices_short(ao, ao_start): @@ -84,7 +105,7 @@ def store_pair_indices_short(ao, ao_start): ao_start (list): Starting indices for each angular momentum shell. Returns: - list: List of [i, j] index pairs for m=0 components with matching L. + numpy ndarray: [i, j] index pairs for m=0 components with matching L. """ idx = [] for i in ao_start: @@ -94,7 +115,7 @@ def store_pair_indices_short(ao, ao_start): if li!=lj: continue idx.append([i, j]) - return idx + return np.array(idx) def metric_matrix(q, idx, ao, S): @@ -106,7 +127,7 @@ def metric_matrix(q, idx, ao, S): Args: q (str): Element symbol key for angular momentum info. - idx (list): List of [i, j] basis function pair indices. + idx (numpy ndarray): [i, j] basis function pair indices. ao (dict): Angular momentum info dict with nested structure ao[q]. S (numpy ndarray): Overlap matrix. @@ -121,7 +142,7 @@ def metric_matrix(q, idx, ao, S): i1, j1 = idx[p1] l = ao['l'][i] l1 = ao['l'][i1] - if(l!=l1): + if l!=l1: continue A[p1,p] = A[p,p1] = 1.0/(2*l+1) \ * S[idxl0(i, l, ao[q]), idxl0(i1, l, ao[q])] \ @@ -133,7 +154,7 @@ def metric_matrix_short(idx, ao, S): """Compute metric matrix for symmetrization of short-format coefficients. Args: - idx (list): List of [i, j] basis function pair indices. + idx (numpy ndarray): [i, j] basis function pair indices. ao (dict): Angular momentum info with 'l' and 'm' lists for each AO. S (numpy ndarray): Overlap matrix. @@ -148,7 +169,7 @@ def metric_matrix_short(idx, ao, S): i1,j1 = idx[p1] l = ao['l'][i] l1 = ao['l'][i1] - if(l!=l1): + if l!=l1: continue A[p1,p] = A[p,p1] = S[i,i1] * S[j,j1] / (2*l+1) return sqrtm(A) @@ -160,7 +181,7 @@ def vectorize_c(idx, c): Creates rotationally invariant representation from coefficient products. Args: - idx (list): List of [i, j] basis function pair indices. + idx (numpy ndarray): [i, j] basis function pair indices. c (numpy ndarray): 1D array of coefficients. Returns: @@ -184,14 +205,14 @@ def vectorize_c_MR2021(idx_pair, ao, c): within each angular momentum shell. Args: - idx_pair (list): List of [i, j] basis function pair indices. + idx_pair (numpy ndarray): [i, j] basis function pair indices. ao (dict): Angular momentum info with 'l' and 'm' lists for each AO. c (numpy ndarray): 1D array of density fitting coefficients. Returns: numpy ndarray: 1D array of contracted coefficient norms per shell. """ - idx = sorted(set(np.array(idx_pair)[:,0])) + idx = np.unique(idx_pair[:,0]) v = np.zeros(len(idx)) for p,i in enumerate(idx): l = ao['l'][i] @@ -206,7 +227,7 @@ def vectorize_c_short(idx, ao, c): Computes representation by contracting coefficient vectors of angular momentum shells. Args: - idx (list): List of [i, j] basis function pair indices (shell starts). + idx (numpy ndarray): [i, j] basis function pair indices (shell starts). ao (dict): Angular momentum info with 'l' and 'm' lists for each AO. c (numpy ndarray): 1D array of density fitting coefficients. @@ -231,7 +252,7 @@ def store_pair_indices_z(ao): ao (dict): Angular momentum info with 'l' and 'm' lists for each AO. Returns: - list: List of [i, j] index pairs with |m_i| = |m_j|. + numpy ndarray: [i, j] index pairs with |m_i| = |m_j|. """ idx = [] for i, mi in enumerate(ao['m']): @@ -239,7 +260,7 @@ def store_pair_indices_z(ao): if abs(mi)!=abs(mj): continue idx.append([i,j]) - return idx + return np.array(idx) def store_pair_indices_z_only0(ao): @@ -251,7 +272,7 @@ def store_pair_indices_z_only0(ao): ao (dict): Angular momentum info with 'l' and 'm' lists for each AO. Returns: - list: List of [i, j] index pairs where both m_i = m_j = 0. + numpy ndarray: [i, j] index pairs where both m_i = m_j = 0. """ idx = [] for i, mi in enumerate(ao['m']): @@ -261,7 +282,7 @@ def store_pair_indices_z_only0(ao): if mj!=0: continue idx.append([i,j]) - return idx + return np.array(idx) def metric_matrix_z(idx, ao, S): @@ -272,7 +293,7 @@ def metric_matrix_z(idx, ao, S): numbers satisfy m_i=m_j AND m_i1=m_j1, or m_i=-m_j AND m_i1=-m_j1. Args: - idx (list): List of [i, j] basis function pair indices. + idx (numpy ndarray): [i, j] basis function pair indices. ao (dict): Angular momentum info with 'l' and 'm' lists for each AO. S (numpy ndarray): Overlap matrix. diff --git a/qstack/spahm/rho/utils.py b/qstack/spahm/rho/utils.py index b47a4d64..b0e7c9bb 100644 --- a/qstack/spahm/rho/utils.py +++ b/qstack/spahm/rho/utils.py @@ -53,7 +53,7 @@ def chsp_converter(chsp): return np.full(n, None, dtype=object) if os.path.isfile(fname): chsp = np.loadtxt(fname, dtype=object, converters=chsp_converter, encoding=None) - if(len(chsp)!=n): + if len(chsp)!=n: raise RuntimeError(f'Wrong length of the file {fname}') else: raise RuntimeError(f"{fname} can not be found") @@ -194,7 +194,7 @@ def check_data_struct(fin, local=False): is_labeled = False if not local and x.ndim == 1: is_single = True - elif x.shape[1] != 2: ## could be problematic! (if it's a set of local representations and nfeatures = 2 !! + elif x.shape[1] != 2: # could be problematic! (if it's a set of local representations and nfeatures = 2 !! is_single=True else: is_single = False @@ -266,10 +266,10 @@ def load_reps(f_in, from_list=True, srcdir=None, with_labels=False, else: reps.extend(x) else: - if is_labeled: + if is_labeled: reps.append(x[1]) labels.extend(x[0]) - else: + else: reps.append(x) try: reps = np.array(reps, dtype=float) diff --git a/qstack/tools.py b/qstack/tools.py index 9b8f58a1..11e30bd9 100644 --- a/qstack/tools.py +++ b/qstack/tools.py @@ -7,6 +7,8 @@ import time import resource import argparse +import itertools +import numpy as np def unix_time_decorator(func): @@ -98,3 +100,97 @@ def remove_argument(self, arg): if (opts and opts[0] == arg) or group_action.dest == arg: action._group_actions.remove(group_action) return + + +def slice_generator(iterable, inc=lambda x: x, i0=0): + """Generate slices for elements in an iterable based on increments. + + Args: + iterable (iterable): Iterable of elements to generate slices for. + inc (callable: int->int): Function that computes increment size for each element. + Defaults to identity function. + i0 (int): Initial starting index. Defaults to 0. + + Yields: + tuple: (element, slice) pairs for each element in the iterable. + """ + func = func=lambda total, elem: total+inc(elem) + starts = itertools.accumulate(iterable, func=func, initial=i0) + starts_ends = itertools.pairwise(starts) + for elem, (start, end) in zip(iterable, starts_ends, strict=True): + yield elem, np.s_[start:end] + + +class Cursor: + """Cursor class to manage dynamic indexing. + + Args: + action (str): Type of indexing action ('slicer' or 'ranger'). + inc (callable: int->int): Function to determine increment size. + Defaults to identity function. + i0 (int): Initial index position. Defaults to 0. + + Attributes: + i (int): Current index position. + i_prev (int): Previous index position. + cur (range or slice: Current range or slice. + inc (callable int->int): Increment function. + + Methods: + add(di): Advances the cursor by increment and returns current range/slice. + __call__(di=None): Advances the cursor if di is not None, + returns current range/slice. + """ + def __init__(self, action='slicer', inc=lambda x: x, i0=0): + self.i = i0 + self.i_prev = None + self.inc = inc + self.cur = None + self.action = action + self.actions_dict = {'slicer': self._slicer, 'ranger': self._ranger} + + def add(self, di): + """Advance the cursor and return the current range or slice. + + Args: + di: Element to determine increment size. + + Returns: + Current range or slice after advancing. + """ + self._add(di) + self.cur = self.actions_dict[self.action]() + return self.cur + + def _ranger(self): + return range(self.i_prev, self.i) + + def _slicer(self): + return np.s_[self.i_prev:self.i] + + def _add(self, di): + self.i_prev = self.i + self.i += self.inc(di) + + def __call__(self, di=None): + """Optionally advance the cursor and return the current range or slice. + + If the argument is passed, it is used to advance the cursor. + If not, the current value is returned. + + Args: + di (optional): Element to determine increment size. + + Returns: + Current range or slice (after advancing). + """ + if di is None: + return self.cur + else: + return self.add(di) + + def __str__(self): + return str(self.i) + + def __repr__(self): + return str(self.i) diff --git a/ruff.toml b/ruff.toml index 45d1474c..04a1cd58 100644 --- a/ruff.toml +++ b/ruff.toml @@ -68,44 +68,19 @@ ignore = [ ] #preview = true + extend-select = ["DOC"] extend-ignore = [ - "E265", # 1 - "FURB103", # 1 - "FURB152", # 1 - "E266", # 2 - "E271", # 2 - "E305", # 2 - "FURB101", # 2 - "E262", # 3 - "E306", # 5 - "E228", # 6 - "E272", # 6 - "W391", # 6 - "RUF031", # 7 - "E261", # 12 - "E275", # 13 - "E251", # 18 - "E211", # 19 - "E303", # 24 - "E201", # 33 - "E202", # 34 - "E203", # 51 - "E221", # 113 - "E225", # 179 - "E241", # 268 - "E222", # 278 - "E231", # 608 - "E226", # 4313 - "FURB118", - "E117", - "E111", - "C419", +# pathlib + "FURB101", "FURB103", +# whitespaces + "E201", "E202", "E203", "E211", + "E221", "E222", "E225", "E226", "E228", + "E231", "E241", "E271", "E272", +# lambda + "FURB118", ] -#[lint.pydocstyle] -#convention = "google" - [lint.per-file-ignores] "qstack/spahm/rho/Dmatrix.py" = ["E702"] # multiple statements on one line (semicolon) "qstack/spahm/rho/bond.py" = ["E711"] # comparison to `None` for np.array elements @@ -120,6 +95,7 @@ extend-ignore = [ "qstack/mathutils/xyz_integrals_float.py" = ["D417"] # missing argument descriptions "qstack/reorder.py" = ["DOC502"] # raised exception is not explicitly raised "qstack/orcaio.py" = ["DOC502"] # raised exception is not explicitly raised +"qstack/equio.py" = ["E251"] # unexpected spaces around keyword / parameter equals "qstack/spahm/rho/dmb_rep_atom.py" = [ "DOC201", # `return` is not documented in docstring "DOC102", # documented parameter is not in the function's signature diff --git a/tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy b/tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy index 311909ec..75a20eb4 100644 Binary files a/tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy and b/tests/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy differ diff --git a/tests/data/SPAHM_a_H2O/X_H2O_MR2021.npy b/tests/data/SPAHM_a_H2O/X_H2O_MR2021.npy new file mode 100644 index 00000000..0360a409 Binary files /dev/null and b/tests/data/SPAHM_a_H2O/X_H2O_MR2021.npy differ diff --git a/tests/data/SPAHM_a_H2O/X_H2O_SAD.npy b/tests/data/SPAHM_a_H2O/X_H2O_SAD.npy index 39b132de..65ec869a 100644 Binary files a/tests/data/SPAHM_a_H2O/X_H2O_SAD.npy and b/tests/data/SPAHM_a_H2O/X_H2O_SAD.npy differ diff --git a/tests/test_c2mio.py b/tests/test_c2mio.py index f0100b21..93d41277 100755 --- a/tests/test_c2mio.py +++ b/tests/test_c2mio.py @@ -3,15 +3,16 @@ import os from qstack.c2mio import get_cell, get_mol, get_ligand + def test_c2mio(): path = os.path.dirname(os.path.realpath(__file__)) - cell = get_cell(f'{path}/data/cell2mol/YOXKUS.cif', workdir=f'{path}/data/cell2mol/') #cell = get_cell('Cell_yoxkus.cell', workdir='.') - #print(cell.moleclist) + cell = get_cell(f'{path}/data/cell2mol/YOXKUS.cif', workdir=f'{path}/data/cell2mol/') # cell = get_cell('Cell_yoxkus.cell', workdir='.') + # print(cell.moleclist) mol = get_mol(cell, mol_idx=0, ecp='def2-svp') assert mol.natm==52 cell = get_cell(f'{path}/data/cell2mol/Cell_YOXKUS.cell', workdir='.') - #print(cell.moleclist[0].ligands) + # print(cell.moleclist[0].ligands) mol_lig = get_ligand(cell, mol_idx=0, lig_idx=1) assert mol_lig.natm==47 diff --git a/tests/test_dori.py b/tests/test_dori.py index 7350721b..4769cb46 100755 --- a/tests/test_dori.py +++ b/tests/test_dori.py @@ -85,6 +85,7 @@ def test_dori_df(): dori2, _, _, _, _ = dori(mol, c=c, grid_type='cube', resolution=0.5, alg='num') assert np.allclose(dori0, dori2) + if __name__ == '__main__': test_derivatives() test_dori_deriv() diff --git a/tests/test_equio.py b/tests/test_equio.py index 0738634a..2845eba1 100755 --- a/tests/test_equio.py +++ b/tests/test_equio.py @@ -3,6 +3,7 @@ import os import tempfile import filecmp +from itertools import starmap import numpy as np from qstack import compound, equio import metatensor @@ -27,9 +28,10 @@ def test_equio_vector(): ctensor = equio.array_to_tensormap(mol, c) tmpfile = tempfile.mktemp() + MTS_EXT metatensor.save(tmpfile, ctensor) - assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.mts', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.mts', tmpfile)) c1 = equio.tensormap_to_array(mol, ctensor) - assert(np.linalg.norm(c-c1)==0) + assert (np.linalg.norm(c-c1)==0) + def test_equio_matrix(): path = os.path.dirname(os.path.realpath(__file__)) @@ -38,9 +40,10 @@ def test_equio_matrix(): dtensor = equio.array_to_tensormap(mol, dm) tmpfile = tempfile.mktemp() + MTS_EXT metatensor.save(tmpfile, dtensor) - assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.dm.mts', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist.ccpvdz.dm.mts', tmpfile)) dm1 = equio.tensormap_to_array(mol, dtensor) - assert(np.linalg.norm(dm-dm1)==0) + assert (np.linalg.norm(dm-dm1)==0) + def test_equio_joinsplit(): path = os.path.dirname(os.path.realpath(__file__)) @@ -54,12 +57,12 @@ def test_equio_joinsplit(): tmpfile = tempfile.mktemp() + MTS_EXT metatensor.save(tmpfile, ctensor_big) - assert(filecmp.cmp(path+'/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.mts', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.mts', tmpfile)) ctensors = equio.split(ctensor_big) - c11, c22 = [equio.tensormap_to_array(mol, t) for mol,t in zip([mol1,mol2], ctensors, strict=True)] - assert(np.linalg.norm(c11-c1)==0) - assert(np.linalg.norm(c22-c2)==0) + c11, c22 = [*starmap(equio.tensormap_to_array, zip([mol1, mol2], ctensors, strict=True))] + assert (np.linalg.norm(c11-c1)==0) + assert (np.linalg.norm(c22-c2)==0) if __name__ == '__main__': diff --git a/tests/test_excited.py b/tests/test_excited.py index d15988a9..908a33b0 100755 --- a/tests/test_excited.py +++ b/tests/test_excited.py @@ -23,22 +23,22 @@ def test_excited(): x_ao = fields.excited.get_transition_dm(mol, X[state_id], coeff) dip = fields.moments.first(mol, x_ao) dip0 = np.array([ 0.68927353, -2.10714637, -1.53423419]) - assert(np.allclose(dip, dip0, atol=1e-8)) + assert (np.allclose(dip, dip0, atol=1e-8)) hole_d, part_d = fields.excited.get_holepart(mol, X[state_id], coeff) - assert(np.allclose(hole_d, hole_d0, atol=1e-8)) - assert(np.allclose(part_d, part_d0, atol=1e-8)) + assert (np.allclose(hole_d, hole_d0, atol=1e-8)) + assert (np.allclose(part_d, part_d0, atol=1e-8)) auxmol = compound.make_auxmol(mol, 'ccpvqz jkfit') dip = fields.moments.first(auxmol, x_c) dip0 = np.array([-0.68919144, 2.10692116, 1.53399871]) - assert(np.allclose(dip, dip0, atol=1e-8)) + assert (np.allclose(dip, dip0, atol=1e-8)) dist, hole_extent, part_extent = fields.excited.exciton_properties(mol, hole_d, part_d) - assert(np.allclose([dist, hole_extent, part_extent], [2.59863354, 7.84850017, 5.67617426], atol=1e-7)) + assert (np.allclose([dist, hole_extent, part_extent], [2.59863354, 7.84850017, 5.67617426], atol=1e-7)) dist, hole_extent, part_extent = fields.excited.exciton_properties(auxmol, hole_c, part_c) - assert(np.allclose([dist, hole_extent, part_extent], [2.59940378, 7.8477511, 5.67541635], atol=1e-7)) + assert (np.allclose([dist, hole_extent, part_extent], [2.59940378, 7.8477511, 5.67541635], atol=1e-7)) def test_excited_frag(): @@ -56,8 +56,8 @@ def test_excited_frag(): else: omega_hole_frag0 = np.array([ 4.24698889, 25.1717958 , 7.80455406, 32.89098877, 29.88567248]) omega_part_frag0 = np.array([ 1.87258999, 19.98184387, 37.30712212, 36.77858748, 4.05985653]) - assert(np.linalg.norm(omega_hole_frag-omega_hole_frag0)<1e-8) - assert(np.linalg.norm(omega_part_frag-omega_part_frag0)<1e-8) + assert (np.linalg.norm(omega_hole_frag-omega_hole_frag0)<1e-8) + assert (np.linalg.norm(omega_part_frag-omega_part_frag0)<1e-8) if __name__ == '__main__': diff --git a/tests/test_fitting.py b/tests/test_fitting.py index f7bc4bc5..59a7f573 100755 --- a/tests/test_fitting.py +++ b/tests/test_fitting.py @@ -12,7 +12,7 @@ def test_fitting(): dm = np.load(path+'/data/H2O_dist.ccpvdz.dm.npy') c0 = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') _auxmol, c = decomposition.decompose(mol, dm, 'cc-pvdz jkfit') - assert(np.linalg.norm(c-c0)<1e-10) + assert (np.linalg.norm(c-c0)<1e-10) def test_block_fitting(): @@ -29,7 +29,7 @@ def test_block_fitting(): c0 = decomposition.get_coeff(dm, eri2c0, eri3c) c = decomposition.get_coeff(dm, eri2c0, eri3c, slices=atom_bounds) - assert(np.linalg.norm(c-c0)<1e-10) + assert (np.linalg.norm(c-c0)<1e-10) def test_fitting_error(): @@ -42,9 +42,9 @@ def test_fitting_error(): _, eri2c, eri3c = decomposition.get_integrals(mol, auxmol) self_repulsion = decomposition.get_self_repulsion(mol, dm) error = decomposition.optimal_decomposition_error(self_repulsion, c0, eri2c) - assert(np.allclose(error, error0)) + assert (np.allclose(error, error0)) error = decomposition.decomposition_error(self_repulsion, c0, eri2c, eri3c, dm) - assert(np.allclose(error, error0)) + assert (np.allclose(error, error0)) def test_fitting_noe(): diff --git a/tests/test_global.py b/tests/test_global.py index 73d641eb..ef443021 100755 --- a/tests/test_global.py +++ b/tests/test_global.py @@ -12,13 +12,12 @@ def test_avg_kernel(): mols = [np.load(f, allow_pickle=True) for f in mollist] K = kernel.kernel(mols, akernel='L', gkernel='avg', sigma=1.0) - true_K = np.array( [[1. , 1. , 0.79179528], \ - [1. , 1. , 0.79179528] , \ + true_K = np.array( [[1. , 1. , 0.79179528], + [1. , 1. , 0.79179528] , [0.79179528, 0.79179528, 1. ]]) - - assert(K.shape == (3,3)) - assert(np.abs(np.sum(K-true_K)) < 1e-05) + assert (K.shape == (3,3)) + assert (np.abs(np.sum(K-true_K)) < 1e-05) def test_rem_kernel(): @@ -28,12 +27,12 @@ def test_rem_kernel(): mols = [np.load(f, allow_pickle=True) for f in mollist] K = kernel.kernel(mols, akernel='L', gkernel='rem', sigma=1.0, gdict={'alpha':1.0, 'normalize':1, 'verbose':0}) - true_K = np.array( [[1. , 0.6528238, 1. ], \ - [0.6528238,1. ,0.6528238], \ + true_K = np.array( [[1. , 0.6528238, 1. ], + [0.6528238,1. ,0.6528238], [1. ,0.6528238 ,1. ]]) - assert(K.shape == (3,3)) - assert(np.abs(np.sum(K-true_K)) < 1e-05) + assert (K.shape == (3,3)) + assert (np.abs(np.sum(K-true_K)) < 1e-05) def test_rem_kernel_not_self(): @@ -43,12 +42,12 @@ def test_rem_kernel_not_self(): mols = [np.load(f, allow_pickle=True) for f in mollist] K = kernel.kernel(mols, Y=np.copy(mols), akernel='L', gkernel='rem', sigma=1.0, gdict={'alpha':1.0, 'normalize':1, 'verbose':0}) - true_K = np.array( [[1. , 0.6528238, 1. ], \ - [0.6528238,1. ,0.6528238], \ + true_K = np.array( [[1. , 0.6528238, 1. ], + [0.6528238,1. ,0.6528238], [1. ,0.6528238 ,1. ]]) - assert(K.shape == (3,3)) - assert(np.abs(np.sum(K-true_K)) < 1e-05) + assert (K.shape == (3,3)) + assert (np.abs(np.sum(K-true_K)) < 1e-05) if __name__ == '__main__': diff --git a/tests/test_kernels.py b/tests/test_kernels.py index 397fe974..cec3cfe2 100755 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -5,19 +5,23 @@ def test_local_kernels(): - #np.random.seed(666) - #X = np.random.rand(2,4) - #Y = np.random.rand(2,4) - #K_G_good = np.zeros((len(X),len(Y))) - #K_L_good = np.zeros((len(X),len(Y))) - #for i, x in enumerate(X): - # for j, y in enumerate(Y): - # K_G_good[i,j] = np.dot(x-y, x-y) - # K_L_good[i,j] = np.sum(abs(x-y)) - #np.exp(-K_G_good/2, out=K_G_good) - #np.exp(-K_L_good/2, out=K_L_good) - #K_dot_good = np.dot(X, Y.T) - #K_cos_good = K_dot_good / np.outer(np.linalg.norm(X, axis=1), np.linalg.norm(Y, axis=1)) + """Test data is generated using: + ``` + np.random.seed(666) + X = np.random.rand(2,4) + Y = np.random.rand(2,4) + K_G_good = np.zeros((len(X),len(Y))) + K_L_good = np.zeros((len(X),len(Y))) + for i, x in enumerate(X): + for j, y in enumerate(Y): + K_G_good[i,j] = np.dot(x-y, x-y) + K_L_good[i,j] = np.sum(abs(x-y)) + np.exp(-K_G_good/2, out=K_G_good) + np.exp(-K_L_good/2, out=K_L_good) + K_dot_good = np.dot(X, Y.T) + K_cos_good = K_dot_good / np.outer(np.linalg.norm(X, axis=1), np.linalg.norm(Y, axis=1)) + ``` + """ X = np.array([[0.70043712, 0.84418664, 0.67651434, 0.72785806], [0.95145796, 0.0127032 , 0.4135877 , 0.04881279]]) Y = np.array([[0.09992856, 0.50806631, 0.20024754, 0.74415417], [0.192892 , 0.70084475, 0.29322811, 0.77447945]]) diff --git a/tests/test_molden.py b/tests/test_molden.py index 5ba58c7c..8ea95543 100755 --- a/tests/test_molden.py +++ b/tests/test_molden.py @@ -14,7 +14,7 @@ def test_molden(): c = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') tmpfile = tempfile.mktemp() + '.molden' coeffs_to_molden(auxmol, c, tmpfile) - assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.molden', tmpfile)) + assert (filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.molden', tmpfile)) if __name__ == '__main__': diff --git a/tests/test_moments.py b/tests/test_moments.py index e72f3db1..c95c557d 100755 --- a/tests/test_moments.py +++ b/tests/test_moments.py @@ -18,25 +18,25 @@ def test_moments(): R2 = 12.352661975356678 r0, r1, r2 = moments.r2_c(mol, c) - assert(np.allclose(r0, R0)) - assert(np.allclose(r1, R1)) - assert(np.allclose(r2, R2)) + assert (np.allclose(r0, R0)) + assert (np.allclose(r1, R1)) + assert (np.allclose(r2, R2)) I0, I1, I2 = moments.r2_c(mol, None) - assert(np.allclose(r0, I0@c)) - assert(np.allclose(r1, I1@c)) - assert(np.allclose(r2, I2@c)) + assert (np.allclose(r0, I0@c)) + assert (np.allclose(r1, I1@c)) + assert (np.allclose(r2, I2@c)) I0, I1, I2 = moments.r2_c(mol, None, per_atom=True) r0_atom = c @ I0 - assert(np.allclose(r0_atom, R0_atom)) + assert (np.allclose(r0_atom, R0_atom)) r1_atom = np.einsum('p,xpa->ax', c, I1) # (atom, component) - assert(np.allclose(r1_atom.sum(axis=0), R1)) + assert (np.allclose(r1_atom.sum(axis=0), R1)) r0_atom, r1_atom, r2_atom = moments.r2_c(mol, c, per_atom=True) - assert(np.allclose(r0_atom, R0_atom)) - assert(np.allclose(r1_atom.sum(axis=0), R1)) - assert(np.allclose(r2_atom.sum(), R2)) + assert (np.allclose(r0_atom, R0_atom)) + assert (np.allclose(r1_atom.sum(axis=0), R1)) + assert (np.allclose(r2_atom.sum(), R2)) if __name__ == '__main__': diff --git a/tests/test_opt.py b/tests/test_opt.py index e861e700..f5f216bb 100755 --- a/tests/test_opt.py +++ b/tests/test_opt.py @@ -11,15 +11,16 @@ def test_hf_otpd(): mol = compound.xyz_to_mol(path+'/data/H2O.xyz', 'def2svp', charge=0, spin=0) dm = fields.dm.get_converged_dm(mol, xc="pbe") - otpd, grid = fields.hf_otpd.hf_otpd(mol, dm, return_all = True) + otpd, grid = fields.hf_otpd.hf_otpd(mol, dm, return_all=True) mol_dict = {'atom': mol.atom, 'rho': otpd, 'coords': grid.coords, 'weights': grid.weights} g = basis_opt.opt.optimize_basis(['H'], [path+'/data/initial/H_N0.txt', path+'/data/initial/O_N0.txt'], [mol_dict], check=True, printlvl=0) - assert(np.all(g['diff'] < 1e-6)) + assert (np.all(g['diff'] < 1e-6)) ob_good = {'H': [[0, [42.30256758622713, 1]], [0, [6.83662718701579, 1]], [0, [1.8547192742478775, 1]], [0, [0.3797283290452742, 1]], [1, [12.961663119622536, 1]], [1, [2.507400755551906, 1]], [1, [0.6648804678758861, 1]], [2, [3.482167705165484, 1]], [2, [0.6053728887614225, 1]], [3, [0.6284190712545101, 1]]]} ob = basis_opt.opt.optimize_basis(['H'], [path+'/data/initial/H_N0.txt'], [path+'/data/H2.ccpvtz.grid3.npz'], printlvl=2, gtol_in=1e-5) for [_l,[a,_c]], [_l1,[a1,_c1]] in zip(ob_good['H'], ob['H'], strict=True): - assert(abs(a-a1)<1e-5) + assert (abs(a-a1)<1e-5) + if __name__ == '__main__': test_hf_otpd() diff --git a/tests/test_orca.py b/tests/test_orca.py index 7fbbbc36..99013cd9 100755 --- a/tests/test_orca.py +++ b/tests/test_orca.py @@ -32,9 +32,9 @@ def test_orca_density_reader(): dm421 = qstack.orcaio.read_density(mol, 'H2O.orca421', directory=path+'/data/orca/', version=421, openshell=True) dm504 = qstack.orcaio.read_density(mol, 'H2O.orca504', directory=path+'/data/orca/', version=504, openshell=True) - assert(np.linalg.norm(dm-dm400)<1e-4) - assert(np.linalg.norm(dm400-dm421)<1e-10) - assert(np.linalg.norm(dm504-dm421)<5e-3) + assert (np.linalg.norm(dm-dm400)<1e-4) + assert (np.linalg.norm(dm400-dm421)<1e-10) + assert (np.linalg.norm(dm504-dm421)<5e-3) def test_orca_gbw_reader(): @@ -45,6 +45,7 @@ def test_orca_gbw_reader(): c = mf.mo_coeff e = mf.mo_energy occ = mf.mo_occ + def compare_MO(c0, c1): for s in range(c0.shape[0]): for i in range(c0.shape[-1]): diff --git a/tests/test_regression.py b/tests/test_regression.py index bfb8b989..9702fa55 100755 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -23,7 +23,7 @@ def test_hyperparameters(): [5.18262767e-01,3.00473746e-01,1.00000000e-10,3.16227766e+01], [5.10592542e-01,3.38247735e-01,1.00000000e+00,3.16227766e+01]] - assert(np.allclose(hyper, true_hyper)) + assert (np.allclose(hyper, true_hyper)) def test_regression(): @@ -38,7 +38,7 @@ def test_regression(): (6, 0.24018169400891018, 0.08584295185009833), (8, 0.2708852104417901, 7.021666937153402e-17)] - assert(np.allclose(lc, true_lc)) + assert (np.allclose(lc, true_lc)) def test_regression_sparse(): @@ -52,7 +52,7 @@ def test_regression_sparse(): (4, 0.4803773474666784, 0.19356070353924582), (6, 0.333707374435793, 0.13803898307368923), (8, 0.4501685644789055, 8.95090418262362e-17)] - assert(np.allclose(lc, true_lc)) + assert (np.allclose(lc, true_lc)) def test_regression_idx(): @@ -106,6 +106,7 @@ def test_oos(): pred3 = oos.oos(X, X[idx_train], weights, sigma=3.162278e+01, random_state=666) assert np.allclose(pred3, y[idx_train]) + def test_cross_validate_results(): path = os.path.dirname(os.path.realpath(__file__)) X = np.load(os.path.join(path, 'data/mols/X_lb.npy')) @@ -116,8 +117,7 @@ def test_cross_validate_results(): (4, 0.7336549 , 0.59839317), (6, 0.7288867 , 0.50714861), (8, 0.72604955, 0.48307486)] - assert(np.allclose(lc, true_lc)) - + assert (np.allclose(lc, true_lc)) if __name__ == '__main__': diff --git a/tests/test_reorder.py b/tests/test_reorder.py index 501f2dd2..1076814c 100755 --- a/tests/test_reorder.py +++ b/tests/test_reorder.py @@ -13,13 +13,13 @@ def test_reorder_pyscf_gpr(): dm = np.load(path+'/data/H2O_dist.ccpvdz.dm.npy') dm1 = reorder.reorder_ao(mol, dm, src='pyscf', dest='gpr') dm2 = reorder.reorder_ao(mol, dm1, src='gpr', dest='pyscf') - assert(np.linalg.norm(dm-dm2)==0) + assert (np.linalg.norm(dm-dm2)==0) auxmol = compound.make_auxmol(mol, 'cc-pvdz jkfit') c = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy') c1 = reorder.reorder_ao(auxmol, c, src='pyscf', dest='gpr') c2 = reorder.reorder_ao(auxmol, c1, src='gpr', dest='pyscf') - assert(np.linalg.norm(c-c2)==0) + assert (np.linalg.norm(c-c2)==0) def test_reorder_pyscf_gpr_orca(): @@ -30,19 +30,19 @@ def test_reorder_pyscf_gpr_orca(): dm_pyscf = from_tril(np.load(path+'/data/reorder/2_3FOD.pyscf.dm.npy')) dm_gpr1 = reorder.reorder_ao(mol, dm_orca, 'orca', 'gpr') - assert(np.linalg.norm(dm_gpr1-dm_gpr)==0) + assert (np.linalg.norm(dm_gpr1-dm_gpr)==0) dm_gpr1 = reorder.reorder_ao(mol, dm_pyscf, 'pyscf', 'gpr') - assert(np.linalg.norm(dm_gpr1-dm_gpr)==0) + assert (np.linalg.norm(dm_gpr1-dm_gpr)==0) dm_pyscf1 = reorder.reorder_ao(mol, dm_orca, 'orca', 'pyscf') - assert(np.linalg.norm(dm_pyscf1-dm_pyscf)==0) + assert (np.linalg.norm(dm_pyscf1-dm_pyscf)==0) dm_pyscf1 = reorder.reorder_ao(mol, dm_gpr, 'gpr', 'pyscf') - assert(np.linalg.norm(dm_pyscf1-dm_pyscf)==0) + assert (np.linalg.norm(dm_pyscf1-dm_pyscf)==0) dm_orca1 = reorder.reorder_ao(mol, dm_pyscf, 'pyscf', 'orca') - assert(np.linalg.norm(dm_orca1-dm_orca)==0) + assert (np.linalg.norm(dm_orca1-dm_orca)==0) dm_orca1 = reorder.reorder_ao(mol, dm_gpr, 'gpr', 'orca') - assert(np.linalg.norm(dm_orca1-dm_orca)==0) + assert (np.linalg.norm(dm_orca1-dm_orca)==0) if __name__ == '__main__': diff --git a/tests/test_rxn-repr.py b/tests/test_rxn-repr.py index d5381736..75e04c07 100755 --- a/tests/test_rxn-repr.py +++ b/tests/test_rxn-repr.py @@ -27,6 +27,7 @@ def read_mols(files): mol.set_positions(mol.positions*ase.units.Bohr) sub_mols.append(mol) return sub_mols + def get_data(): indices = np.loadtxt(idx_path, dtype=int) reactions = [] @@ -40,8 +41,12 @@ def get_data(): def test_b2r2_l(): _test_b2r2('l') + + def test_b2r2_a(): _test_b2r2('a') + + def test_b2r2_n(): _test_b2r2('n') @@ -51,7 +56,7 @@ def _test_b2r2(variant): reactions = Rxn_data(data_dir=data_dir).get_gdb7_data() b2r2_1 = b2r2.get_b2r2(reactions, variant=variant) b2r2_0 = np.load(f'{data_dir}/b2r2_{variant}.npy') - assert(np.linalg.norm(b2r2_1-b2r2_0) < 1e-10) + assert (np.linalg.norm(b2r2_1-b2r2_0) < 1e-10) def test_slatm_rxn(): @@ -59,7 +64,7 @@ def test_slatm_rxn(): reactions = Rxn_data(data_dir=data_dir).get_gdb7_data() slatm_1 = slatm.get_slatm_rxn(reactions, qml_mbtypes=True, progress=False) slatm_0 = np.load(f'{data_dir}/slatm_d.npy') - assert(np.linalg.norm(slatm_1-slatm_0) < 1e-10) + assert (np.linalg.norm(slatm_1-slatm_0) < 1e-10) if __name__ == '__main__': diff --git a/tests/test_slatm.py b/tests/test_slatm.py index c120d1cb..afa35107 100755 --- a/tests/test_slatm.py +++ b/tests/test_slatm.py @@ -11,7 +11,7 @@ def test_slatm_global(): v0 = np.load(f'{path}/data/slatm/slatm_global.npy') xyzs = sorted(glob.glob(f"{path}/data/slatm/*.xyz")) v = slatm.get_slatm_for_dataset(xyzs, progress=False, global_repr=True) - assert(np.linalg.norm(v-v0)<1e-10) + assert (np.linalg.norm(v-v0)<1e-10) def test_slatm_local(): @@ -19,7 +19,7 @@ def test_slatm_local(): v0 = np.load(f'{path}/data/slatm/slatm_local.npy') xyzs = sorted(glob.glob(f"{path}/data/slatm/*.xyz")) v = slatm.get_slatm_for_dataset(xyzs, progress=False) - assert(np.linalg.norm(v-v0)<1e-10) + assert (np.linalg.norm(v-v0)<1e-10) if __name__ == '__main__': diff --git a/tests/test_spahm.py b/tests/test_spahm.py index eb96619a..2990b9b0 100755 --- a/tests/test_spahm.py +++ b/tests/test_spahm.py @@ -13,8 +13,8 @@ def test_spahm_GWH(): R = compute_spahm.get_spahm_representation(mol, 'gwh') true_R = np.array([[-33.02835203, -8.92909895, -8.00935971, -7.51145492, -7.32962602], [-33.02835203, -8.92909895, -8.00935971, -7.51145492, 0. ]]) - assert(R.shape == (2,5)) - assert(np.allclose(R, true_R)) + assert (R.shape == (2,5)) + assert (np.allclose(R, true_R)) def test_spahm_huckel(): @@ -23,8 +23,8 @@ def test_spahm_huckel(): R = compute_spahm.get_spahm_representation(mol, 'huckel') true_R = np.array([[-20.78722617, -1.29750913, -0.51773954, -0.4322361 , -0.40740531], [-20.78722617, -1.29750913, -0.51773954, -0.4322361 , -0.40740531]]) - assert(R.shape == (2,5)) - assert(np.allclose(R, true_R)) + assert (R.shape == (2,5)) + assert (np.allclose(R, true_R)) def test_spahm_LB(): @@ -33,8 +33,8 @@ def test_spahm_LB(): R = compute_spahm.get_spahm_representation(mol, 'lb') true_R = np.array( [[-18.80209878, -1.28107468, -0.79949967, -0.63587071, -0.57481672], [-18.80209878, -1.28107468, -0.79949967, -0.63587071, 0. ]]) - assert(R.shape == (2,5)) - assert(np.allclose(R, true_R)) + assert (R.shape == (2,5)) + assert (np.allclose(R, true_R)) def test_spahm_LB_ecp(): @@ -74,7 +74,7 @@ def test_generate_reps(): xmols = [compute_spahm.get_spahm_representation(mol, 'lb')[0] for mol in mols] X = vstack_padding(xmols) Xtrue = np.load(os.path.join(path, 'X_lb.npy')) - assert(np.allclose(X, Xtrue)) + assert (np.allclose(X, Xtrue)) if __name__ == '__main__': diff --git a/tests/test_spahm_a.py b/tests/test_spahm_a.py index 5268f284..b057ed6a 100755 --- a/tests/test_spahm_a.py +++ b/tests/test_spahm_a.py @@ -7,12 +7,18 @@ PATH = os.path.dirname(os.path.realpath(__file__)) -def underlying_test(true_data_relpath, X): + +def underlying_test(true_data_relpath, X, trim=False, only_z=None): X_true = np.load(PATH+true_data_relpath, allow_pickle=True) - assert(X.shape == X_true.shape) - for a, a_true in zip(X, X_true, strict=True): - assert(a[0] == a_true[0]) # atom type - assert(np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + if only_z is not None: + X_true = X_true[np.isin(X_true[:,0], only_z)] + assert X.shape == X_true.shape + for (q, v), (q_true, v_true) in zip(X, X_true, strict=True): + assert q == q_true + if trim is True: + v = np.trim_zeros(v) # short-version vectors should be trimmed + assert np.allclose(v, v_true) + def test_water(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) @@ -21,20 +27,20 @@ def test_water(): model='lowdin-long-x', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O.npy', X) + def test_water_alternate(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) - #X = atom.get_repr(mol, ["H", "O"], None, dm=None, - # guess='LB', model='lowdin-long-x', auxbasis='ccpvdzjkfit') X = atom.get_repr("atom", [mol], [PATH], 'LB', spin=[None], auxbasis='ccpvdzjkfit', with_symbols=True) underlying_test('/data/SPAHM_a_H2O/X_H2O.npy', X) + def test_water_lowdinshortx(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, model='lowdin-short-x', auxbasis='ccpvdzjkfit') - X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) ## trimming is necessary to get the short-version vector ! - underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-short-x.npy', X) + underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-short-x.npy', X, trim=True) + def test_water_lowdinlong(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) @@ -43,40 +49,64 @@ def test_water_lowdinlong(): model='lowdin-long', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-long.npy', X) + def test_water_lowdinshort(): mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, model='lowdin-short', auxbasis='ccpvdzjkfit') - X = np.array([(z,np.trim_zeros(v)) for z,v in X], dtype=object) ## trimming is necessary to get the short-version vector ! - underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-short.npy', X) + underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-short.npy', X, trim=True) + + +def test_water_mr21(): + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) + X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', + elements=["H", "O"], spin=None, with_symbols=True, + model='MR2021', auxbasis='ccpvdzjkfit') + underlying_test('/data/SPAHM_a_H2O/X_H2O_MR2021.npy', X, trim=True) + def test_water_SAD_guess_open_shell(): - mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=1, spin=1) ## test breaks when effective open-shell caluclation is needed + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=1, spin=1) Xsad = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'sad', elements=["H", "O"], spin=[1], with_symbols=True, - xc = 'hf', model='lowdin-long-x', auxbasis='ccpvdzjkfit') + xc='hf', model='sad-diff', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O-RC_SAD.npy', Xsad) + def test_water_SAD_guess_close_shell(): - mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=0, spin=0) ## test breaks when effective open-shell caluclation is needed + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=0, spin=0) Xsad = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'sad', elements=["H", "O"], spin=None, with_symbols=True, - xc = 'hf', model='lowdin-long-x', auxbasis='ccpvdzjkfit') + xc='hf', model='sad-diff', auxbasis='ccpvdzjkfit') underlying_test('/data/SPAHM_a_H2O/X_H2O_SAD.npy', Xsad) + def test_water_single_element(): + only_z = ['O'] mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', elements=["H", "O"], spin=None, with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only - X_true = np.load(PATH+'/data/SPAHM_a_H2O/X_H2O.npy', allow_pickle=True) - # the next two lines deviate from the common template - a = X[0] - assert(X.shape == np.array(X_true[0], ndmin=2).shape) - for a_true in X_true: - if a[0] == a_true[0]: # atom type - assert(np.linalg.norm(a[1]-a_true[1]) < 1e-08) # atom representations + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=only_z) + underlying_test('/data/SPAHM_a_H2O/X_H2O.npy', X, only_z=only_z) + + +def test_water_single_element_short(): + only_z = ['O'] + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'minao', charge=0, spin=None) + X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'LB', + elements=["H", "O"], spin=None, with_symbols=True, + model='lowdin-short', auxbasis='ccpvdzjkfit', only_z=only_z) + underlying_test('/data/SPAHM_a_H2O/X_H2O_lowdin-short.npy', X, only_z=only_z) + + +def test_water_single_element_SAD(): + only_z = ['O'] + mol = compound.xyz_to_mol(PATH+'/data/H2O.xyz', 'sto3g', charge=0, spin=0) + X = atom.get_repr("atom", [mol], [PATH+'/data/H2O.xyz'], 'sad', + elements=["H", "O"], spin=None, with_symbols=True, + xc='hf', model='sad-diff', auxbasis='ccpvdzjkfit', only_z=only_z) + underlying_test('/data/SPAHM_a_H2O/X_H2O_SAD.npy', X, only_z=only_z) if __name__ == '__main__': @@ -88,3 +118,6 @@ def test_water_single_element(): test_water_SAD_guess_close_shell() test_water_SAD_guess_open_shell() test_water_single_element() + test_water_single_element_short() + test_water_mr21() + test_water_single_element_SAD() diff --git a/tests/test_spahm_b.py b/tests/test_spahm_b.py index 552f00fe..9624ba9f 100755 --- a/tests/test_spahm_b.py +++ b/tests/test_spahm_b.py @@ -7,12 +7,14 @@ PATH = os.path.dirname(os.path.realpath(__file__)) + def underlying_test(X, truepath): true_file = PATH + truepath X_true = np.load(true_file) - assert(X_true.shape == X.shape) + assert (X_true.shape == X.shape) for Xa, Xa_true in zip(X, X_true, strict=True): - assert(np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + assert (np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + def test_water(): xyz_in = PATH+'/data/H2O.xyz' @@ -21,45 +23,50 @@ def test_water(): underlying_test(X, '/data/H2O_spahm_b.npy_alpha_beta.npy') + def test_water_closed(): xyz_in = PATH+'/data/H2O.xyz' mols = utils.load_mols([xyz_in], [None], [0], 'minao') X = bond.get_repr("bond", mols, [xyz_in], 'LB', spin=[None], with_symbols=False, same_basis=False) underlying_test(X, '/data/H2O_spahm_b.npy') + def test_water_O_only(): xyz_in = PATH+'/data/H2O.xyz' mols = utils.load_mols([xyz_in], [0], [0], 'minao') dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0]) X = bond.spahm_a_b("bond", mols, dms, only_z=['O']) - X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat) - X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main + X = np.squeeze(X) # contains a single elements but has shape (1,Nfeat) + X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main X_true = np.load(PATH+'/data/H2O_spahm_b.npy_alpha_beta.npy') X_true = X_true[0] # this line makes it incompatible with a call to underlying_test() - assert(X_true.shape == X.shape) + assert (X_true.shape == X.shape) for Xa, Xa_true in zip(X, X_true, strict=True): - assert(np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + assert (np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8) + def test_water_same_basis(): xyz_in = PATH+'/data/H2O.xyz' mols = utils.load_mols([xyz_in], [0], [0], 'minao') dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0]) X = bond.spahm_a_b("bond", mols, dms, same_basis=True) - X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat) - X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main + X = np.squeeze(X) # contains a single elements but has shape (1,Nfeat) + X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main underlying_test(X, '/data/H2O_spahm_b_CCbas.npy_alpha_beta.npy') + def test_ecp(): xyz_in = PATH+'/data/I2.xyz' mols = utils.load_mols([xyz_in], [0], [0], 'minao', ecp='def2-svp') dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0]) X = bond.spahm_a_b("bond", mols, dms, same_basis=True) - X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat) - X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main + X = np.squeeze(X) # contains a single elements but has shape (1,Nfeat) + X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main underlying_test(X, '/data/I2_spahm-b_minao-def2-svp_alpha-beta.npy') + def test_repr_shapes(): xyz_in = [PATH+'/data/H2O.xyz', PATH+'/data/HO_spinline.xyz'] mols = utils.load_mols(xyz_in, [0,-1], [0,0], 'ccpvdz') @@ -102,7 +109,7 @@ def test_from_list(): mols = utils.load_mols(xyzlist, charges, spins, 'minao', srcdir=PATH+"/data/") spahm_b = bond.get_repr("bond", mols, xyzlist, 'LB', spin=spins, same_basis=True) Xtrue = np.load(PATH+'/data/list_H2O_spahm-b_minao_LB_alpha-beta.npy') - assert(np.allclose(Xtrue, spahm_b)) + assert (np.allclose(Xtrue, spahm_b)) if __name__ == '__main__': @@ -113,4 +120,3 @@ def test_from_list(): test_ecp() test_repr_shapes() test_from_list() - diff --git a/tests/test_spahm_b_selected.py b/tests/test_spahm_b_selected.py index 329d8245..c52b8e3b 100755 --- a/tests/test_spahm_b_selected.py +++ b/tests/test_spahm_b_selected.py @@ -5,6 +5,7 @@ from qstack import compound from qstack.spahm.rho.bond_selected import get_spahm_b_selected + def test_spahm_b_selected(): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data/') fname = os.path.join(path, 'H2O.xyz') @@ -12,7 +13,7 @@ def test_spahm_b_selected(): mols = [compound.xyz_to_mol(fname, basis='minao', charge=0, spin=0)] X = get_spahm_b_selected(mols, bondij, [fname])[0][1] Xtrue = np.load(os.path.join(path, 'H2O.xyz_1_2.npy')) - assert(np.allclose(X, Xtrue)) + assert (np.allclose(X, Xtrue)) if __name__ == '__main__': diff --git a/tests/test_spahm_grad.py b/tests/test_spahm_grad.py index 4114d500..77722c5f 100755 --- a/tests/test_spahm_grad.py +++ b/tests/test_spahm_grad.py @@ -46,7 +46,7 @@ def spahm_ev(r, mol, guess): agrad = spahm.compute_spahm.get_guess_orbitals_grad(mol, guess)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_ev, mol, guess).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_re_grad(): @@ -60,7 +60,7 @@ def spahm_re(r, mol, guess_in): agrad = spahm.compute_spahm.get_spahm_representation_grad(mol, guess)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_re, mol, guess).reshape(mol.natm*3, -1).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_ev_grad_ecp(): @@ -74,7 +74,7 @@ def spahm_ev(r, mol, guess): agrad = spahm.compute_spahm.get_guess_orbitals_grad(mol, guess)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_ev, mol, guess).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_ev_grad_field(): @@ -89,7 +89,7 @@ def spahm_ev(r, mol, guess): agrad = spahm.compute_spahm.get_guess_orbitals_grad(mol, guess, field=field)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_ev, mol, guess).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_re_grad_field(): @@ -105,7 +105,7 @@ def spahm_re(r, mol, guess_in): agrad = spahm.compute_spahm.get_spahm_representation_grad(mol, guess, field=field)[1].reshape(-1, mol.natm*3) ngrad = grad_num(spahm_re, mol, guess).reshape(mol.natm*3, -1).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) def test_spahm_re_field_grad(): @@ -119,7 +119,7 @@ def spahm_re(field, mol, guess_in): agrad = spahm.compute_spahm.get_spahm_representation_grad(mol, guess, field=field)[2].reshape(-1, 3) ngrad = derivatives_num(field, spahm_re, mol, guess).reshape(3, -1).T for g1, g2 in zip(ngrad, agrad, strict=True): - assert(np.linalg.norm(g1-g2)<1e-6) + assert (np.linalg.norm(g1-g2)<1e-6) if __name__ == '__main__': diff --git a/tests/test_splitting.py b/tests/test_splitting.py index f14fb187..909a3cc4 100755 --- a/tests/test_splitting.py +++ b/tests/test_splitting.py @@ -11,19 +11,22 @@ spin_list = os.path.join(path, "data", 'list_water_spins.txt') charge_list = os.path.join(path, "data", 'list_water_charges.txt') + def test_no_split(): nameout = tempfile.mktemp() sufix = "_alpha_beta.npy" rho.main(['--rep', 'atom', '--mol', mol_list, '--spin', spin_list, '--charge', charge_list, '--name', nameout]) reps = np.load(nameout+sufix) - assert(reps.shape == (9,414)) + assert (reps.shape == (9,414)) + def test_split_once(): nameout = tempfile.mktemp() sufix = "_alpha_beta.npy" rho.main(['--rep', 'atom', '--mol', mol_list, '--spin', spin_list, '--charge', charge_list, '--name', nameout, '--split']) - reps = np.load(nameout+sufix, allow_pickle=True) ## why is the `dtype` object ???? - assert(reps.shape == (3, 3, 414)) + reps = np.load(nameout+sufix, allow_pickle=True) # why is the `dtype` object ???? + assert (reps.shape == (3, 3, 414)) + def test_split_twice(): nameout = tempfile.mktemp() @@ -31,8 +34,8 @@ def test_split_twice(): rep_files = [nameout+"_"+os.path.basename(f).split(".")[0]+sufix for f in np.loadtxt(mol_list, dtype=str)] rho.main(['--rep', 'atom', '--mol', mol_list, '--spin', spin_list, '--charge', charge_list, '--name', nameout, '--split', "--split"]) for f in rep_files: - reps = np.load(f, allow_pickle=True) ## why is the `dtype` object ???? - assert(reps.shape == (3, 414)) + reps = np.load(f, allow_pickle=True) # why is the `dtype` object ???? + assert (reps.shape == (3, 414)) if __name__ == '__main__': diff --git a/tests/test_utils.py b/tests/test_utils.py index a789a75f..81085a67 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,29 +12,32 @@ def test_load_rep_from_list(): path = os.path.dirname(os.path.realpath(__file__)) paths2list = os.path.join(path, 'data/SPAHM_a_H2O/') - Xarray, symbols = ut.load_reps(paths2list+'reps_list.txt', from_list=True, \ - with_labels=True, local=True, sum_local=False, printlevel=0, progress=True, \ + Xarray, symbols = ut.load_reps(paths2list+'reps_list.txt', from_list=True, + with_labels=True, local=True, sum_local=False, printlevel=0, progress=True, srcdir=paths2list) - assert(Xarray.shape == (9,207)) - assert(len(symbols) == 9) + assert (Xarray.shape == (9,207)) + assert (len(symbols) == 9) + def test_load_reps(): path = os.path.dirname(os.path.realpath(__file__)) paths2X = os.path.join(path, 'data/SPAHM_a_H2O/X_H2O.npy') - X, symbols = ut.load_reps(paths2X, from_list=False, \ + X, symbols = ut.load_reps(paths2X, from_list=False, with_labels=True, local=True, sum_local=False, printlevel=0, progress=True) - assert(X.shape == (3,207)) - assert(len(symbols) == 3) + assert (X.shape == (3,207)) + assert (len(symbols) == 3) + -def test_load_reps_nosymbols(): #throws warning and returns empty list of symbols +def test_load_reps_nosymbols(): # throws warning and returns empty list of symbols path = os.path.dirname(os.path.realpath(__file__)) paths2X = os.path.join(path, 'data/H2O_spahm_b.npy_alpha_beta.npy') - X, symbols = ut.load_reps(paths2X, from_list=False, \ + X, symbols = ut.load_reps(paths2X, from_list=False, with_labels=True, local=True, sum_local=False, printlevel=0, progress=True) - assert(X.shape == (3,1108)) - assert(len(symbols) == 0) + assert (X.shape == (3,1108)) + assert (len(symbols) == 0) + def test_load_reps_singleatom(): path = os.path.dirname(os.path.realpath(__file__)) @@ -44,13 +47,14 @@ def test_load_reps_singleatom(): mol = compound.xyz_to_mol(xyzpath, basis="minao", charge=0, spin=0, ignore=False, unit='ANG', ecp=None) rep = atom.get_repr("atom", [mol], [xyzpath], 'LB', elements=["H", "O"], spin=[0], with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only np.save(tmpfile, rep) - X, symbols = ut.load_reps(tmpfile, from_list=False, \ + X, symbols = ut.load_reps(tmpfile, from_list=False, with_labels=True, local=True, sum_local=False, printlevel=0, progress=True) - assert(X.shape == (1,414)) - assert(len(symbols) == 1) - assert(symbols[0] == 'O') + assert (X.shape == (1,414)) + assert (len(symbols) == 1) + assert (symbols[0] == 'O') + def test_load_reps_singleatom_sum_local(): path = os.path.dirname(os.path.realpath(__file__)) @@ -60,11 +64,12 @@ def test_load_reps_singleatom_sum_local(): mol = compound.xyz_to_mol(xyzpath, basis="minao", charge=0, spin=0, ignore=False, unit='ANG', ecp=None) rep = atom.get_repr("atom", [mol], [xyzpath], 'LB', elements=["H", "O"], spin=[0], with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only np.save(tmpfile, rep) - X = ut.load_reps(tmpfile, from_list=False, \ + X = ut.load_reps(tmpfile, from_list=False, with_labels=False, local=True, sum_local=True, printlevel=0, progress=True) - assert(X.shape == (1,414)) + assert (X.shape == (1,414)) + def test_load_reps_singleatom_sum_local2(): path = os.path.dirname(os.path.realpath(__file__)) @@ -74,30 +79,33 @@ def test_load_reps_singleatom_sum_local2(): mol = compound.xyz_to_mol(xyzpath, basis="minao", charge=0, spin=0, ignore=False, unit='ANG', ecp=None) rep = atom.get_repr("atom", [mol], [xyzpath], 'LB', elements=["H", "O"], spin=[0], with_symbols=True, - model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) #requesting reps for O-atom only + model='lowdin-long-x', auxbasis='ccpvdzjkfit', only_z=['O']) # requesting reps for O-atom only np.save(tmpfile, rep) - X = ut.load_reps(tmpfile, from_list=False, \ + X = ut.load_reps(tmpfile, from_list=False, with_labels=False, local=True, sum_local=True, printlevel=0, progress=True) - assert(X.shape == (1,414)) + assert (X.shape == (1,414)) + def test_load_mols(): path = os.path.dirname(os.path.realpath(__file__)) molslist = [os.path.join(path, 'data', m) for m in ['H2O.xyz','H2O_dist.xyz','rotated_H2O.xyz']] mols = ut.load_mols(molslist, [0]*len(molslist), [None]*len(molslist), 'minao', progress=True) - assert(len(mols) == 3) + assert (len(mols) == 3) + def test_check_data_structure(): path = os.path.dirname(os.path.realpath(__file__)) test_files = [ - {'path2file': os.path.join(path, 'data', 'H2O_spahm-e_def2svp.npy'), 'is_local':False, 'is_single':True, 'is_labeled':False}, \ - {'path2file': os.path.join(path, 'data', 'H2O_spahm_b.npy_alpha_beta.npy'), 'is_local':True, 'is_single':True, 'is_labeled':False}, \ - {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/X_H2O.npy'), 'is_local':True, 'is_single':True, 'is_labeled':True}, \ - {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/Xs_H2O_array.npy'), 'is_local':True, 'is_single':False, 'is_labeled':True} \ + {'path2file': os.path.join(path, 'data', 'H2O_spahm-e_def2svp.npy'), 'is_local':False, 'is_single':True, 'is_labeled':False}, + {'path2file': os.path.join(path, 'data', 'H2O_spahm_b.npy_alpha_beta.npy'), 'is_local':True, 'is_single':True, 'is_labeled':False}, + {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/X_H2O.npy'), 'is_local':True, 'is_single':True, 'is_labeled':True}, + {'path2file': os.path.join(path, 'data', 'SPAHM_a_H2O/Xs_H2O_array.npy'), 'is_local':True, 'is_single':False, 'is_labeled':True}, ] for ft in test_files: - is_single, is_labeled = ut.check_data_struct(ft['path2file'], local = ft['is_local']) - assert(ft['is_single'] == is_single) - assert(ft['is_labeled'] == is_labeled) + is_single, is_labeled = ut.check_data_struct(ft['path2file'], local=ft['is_local']) + assert (ft['is_single'] == is_single) + assert (ft['is_labeled'] == is_labeled) + def test_regroup_symbols(): path = os.path.dirname(os.path.realpath(__file__)) @@ -106,19 +114,20 @@ def test_regroup_symbols(): rep_count = {"H":2, "O":1} print(regrouped_species) for z,v in regrouped_species.items(): - assert(len(v) == rep_count[z]) + assert (len(v) == rep_count[z]) + def test_regroup_symbols_and_trim(): path = os.path.dirname(os.path.realpath(__file__)) filelist = os.path.join(path, "./data/list_water_lowdin-short-padded.txt") regrouped_species = ut.regroup_symbols(filelist, trim_reps=True) - #trimedlist = os.path.join(path, "./data/list_water_lowdin-short.txt") ## this is not possible because of inhomogenous array + # trimedlist = os.path.join(path, "./data/list_water_lowdin-short.txt") ## this is not possible because of inhomogenous array X_truth = np.load(path+"/data/SPAHM_a_H2O/X_H2O_lowdin-short.npy", allow_pickle=True) regrouped_truth = {z:[] for z in regrouped_species} for z,v in X_truth: regrouped_truth[z].append(v) for z in regrouped_species: - assert(np.allclose(regrouped_species[z], regrouped_truth[z])) + assert (np.allclose(regrouped_species[z], regrouped_truth[z])) if __name__ == '__main__':