deepmodeling
diff --git a/‎docs/advanced/input_files/input-main.md‎
Lines changed: 6 additions & 0 deletions b/‎docs/advanced/input_files/input-main.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎source/source_base/module_external/lapack_connector.h‎
Lines changed: 18 additions & 5 deletions b/‎source/source_base/module_external/lapack_connector.h‎
Lines changed: 18 additions & 5 deletions
diff --git a/‎source/source_hamilt/module_xc/exx_info.h‎
Lines changed: 1 addition & 0 deletions b/‎source/source_hamilt/module_xc/exx_info.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎source/source_io/input_conv.cpp‎
Lines changed: 1 addition & 0 deletions b/‎source/source_io/input_conv.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎source/source_io/module_parameter/input_parameter.h‎
Lines changed: 1 addition & 0 deletions b/‎source/source_io/module_parameter/input_parameter.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎source/source_io/read_input_item_exx_dftu.cpp‎
Lines changed: 6 additions & 0 deletions b/‎source/source_io/read_input_item_exx_dftu.cpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎source/source_lcao/module_ri/ABFs_Construct-PCA.cpp‎
Lines changed: 152 additions & 65 deletions b/‎source/source_lcao/module_ri/ABFs_Construct-PCA.cpp‎
Lines changed: 152 additions & 65 deletions
@@ -2979,6 +2979,12 @@ These variables are relevant when using hybrid functionals with *[basis_type](#b
 - **Description**: See also the entry [exx_pca_threshold](#exx_pca_threshold). Smaller components (less than exx_c_threshold) of the $C^{a}_{ik}$ matrix are neglected to accelerate calculation. The larger the threshold is, the faster the calculation and the lower the accuracy. A relatively safe choice of the value is 1e-4.
 - **Default**: 1E-4
 
+### exx_cs_inv_thr
+
+- **Type**: Real
+- **Description**: By default, the Coulomb matrix inversion required for obtaining LRI coefficients is performed using LU decomposition. However, this approach may suffer from numerical instabilities when a large set of auxiliary basis functions (ABFs) is employed. When `exx_cs_inv_thr > 0`, the inversion is instead carried out via matrix diagonalization. Eigenvalues smaller than `exx_cs_inv_thr` are discarded to improve numerical stability. A relatively safe and commonly recommended value is `1e-5`.
+- **Default**: -1
+
 ### exx_v_threshold
 
 - **Type**: Real
 
@@ -170,11 +170,24 @@ void zhegv_(const int* itype, const char* jobz, const char* uplo, const int* n,
 
 // === Standard Hermitian eigenproblem ===
 
-void dsyev_(const char* jobz, const char* uplo, const int* n,
-            double* a, const int* lda,
-            double* w,
-            double* work, const int* lwork,
-            int* info);
+    void ssyev_(const char* jobz,
+                const char* uplo,
+                const int* n,
+                float* a,
+                const int* lda,
+                float* w,
+                float* work,
+                const int* lwork,
+                int* info);
+    void dsyev_(const char* jobz,
+                const char* uplo,
+                const int* n,
+                double* a,
+                const int* lda,
+                double* w,
+                double* work,
+                const int* lwork,
+                int* info);
 
 void cheev_(const char* jobz, const char* uplo, const int* n,
             std::complex<float>* a, const int* lda,
 
@@ -67,6 +67,7 @@ struct Exx_Info
         double ccp_rmesh_times = 10;
         bool exx_symmetry_realspace = true;
         double kmesh_times = 4;
+        double Cs_inv_thr = -1;
 
         int abfs_Lmax = 0; // tmp
 
 
@@ -474,6 +474,7 @@ void Input_Conv::Convert()
         GlobalC::exx_info.info_ri.V_grad_R_threshold = PARAM.inp.exx_v_grad_r_threshold;
         GlobalC::exx_info.info_ri.ccp_rmesh_times = std::stod(PARAM.inp.exx_ccp_rmesh_times);
         GlobalC::exx_info.info_ri.exx_symmetry_realspace = PARAM.inp.exx_symmetry_realspace;
+        GlobalC::exx_info.info_ri.Cs_inv_thr = PARAM.inp.exx_cs_inv_thr;
 
         GlobalC::exx_info.info_opt_abfs.pca_threshold = PARAM.inp.exx_pca_threshold;
         GlobalC::exx_info.info_opt_abfs.abfs_Lmax  = PARAM.inp.exx_opt_orb_lmax;
 
@@ -553,6 +553,7 @@ struct Input_para
         = true; ///< whether to reduce the real-space sector in when using symmetry=1 in EXX calculation
     double rpa_ccp_rmesh_times = 10.0; ///< how many times larger the radial mesh required for
                                        ///< calculating Columb potential is to that of atomic orbitals
+    double exx_cs_inv_thr = -1;        ///< threshold to inverse Vq in abfs for generating Cs
     bool out_ri_cv = false; ///< Whether to output the coefficient tensor C and ABFs-representation Coulomb matrix V
     // ==============   #Parameters (16.dft+u) ======================
     //    DFT+U       Xin Qu added on 2020-10-29
 
@@ -243,6 +243,12 @@ void ReadInput::item_exx()
         read_sync_double(input.exx_pca_threshold);
         this->add_item(item);
     }
+    {
+        Input_Item item("exx_cs_inv_thr");
+        item.annotation = "threshold to inverse Vq in abfs for generating Cs";
+        read_sync_double(input.exx_cs_inv_thr);
+        this->add_item(item);
+    }
     {
         Input_Item item("exx_c_threshold");
         item.annotation = "threshold to screen C matrix in exx";
 
@@ -14,139 +14,226 @@ namespace ABFs_Construct
 {
 namespace PCA
 {
-	void tensor_dsyev(const char jobz, const char uplo, RI::Tensor<double> & a, double*const w, int & info)
+template <>
+void tensor_syev<double>(char jobz, char uplo, RI::Tensor<double>& a, double* w, int& info)
 	{
-		// reference: dsyev in lapack_connector.h (for ModuleBase::matrix)
 		assert(a.shape.size() == 2);
 		assert(a.shape[0] == a.shape[1]);
-		const int nr = a.shape[0];
-		const int nc = a.shape[1];
 
-		double work_tmp=0.0;
+    const int n = a.shape[0];
+    const int lda = a.shape[1];
+
+    double work_query = 0.0;
 		constexpr int minus_one = -1;
-		dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, &work_tmp, &minus_one, &info);		// get best lwork
 
-		const int lwork = work_tmp;
+    dsyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, &info);
+
+    const int lwork = static_cast<int>(work_query);
 		std::vector<double> work(std::max(1, lwork));
-		dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, work.data(), &lwork, &info);
+
+    dsyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, &info);
+}
+
+template <>
+void tensor_syev<float>(char jobz, char uplo, RI::Tensor<float>& a, float* w, int& info)
+{
+    assert(a.shape.size() == 2);
+    assert(a.shape[0] == a.shape[1]);
+
+    const int n = a.shape[0];
+    const int lda = a.shape[1];
+
+    float work_query = 0.0f;
+    constexpr int minus_one = -1;
+
+    ssyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, &info);
+
+    const int lwork = static_cast<int>(work_query);
+    std::vector<float> work(std::max(1, lwork));
+
+    ssyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, &info);
 	}
 
-	RI::Tensor<double> get_sub_matrix( 
-		const RI::Tensor<double> & m,		// size: (lcaos, lcaos, abfs)
-		const std::size_t & T,
-		const std::size_t & L,
-		const ModuleBase::Element_Basis_Index::Range & range,
-		const ModuleBase::Element_Basis_Index::IndexLNM & index )
+template <>
+void tensor_syev<std::complex<double>>(char jobz, char uplo, RI::Tensor<std::complex<double>>& a, double* w, int& info)
+{
+    assert(a.shape.size() == 2);
+    assert(a.shape[0] == a.shape[1]);
+
+    const int n = a.shape[0];
+    const int lda = a.shape[1];
+
+    std::complex<double> work_query;
+    constexpr int minus_one = -1;
+
+    zheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, nullptr, &info);
+
+    const int lwork = static_cast<int>(work_query.real());
+    std::vector<std::complex<double>> work(std::max(1, lwork));
+    std::vector<double> rwork(std::max(1, 3 * n - 2));
+
+    zheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, rwork.data(), &info);
+}
+
+template <>
+void tensor_syev<std::complex<float>>(char jobz, char uplo, RI::Tensor<std::complex<float>>& a, float* w, int& info)
+{
+    assert(a.shape.size() == 2);
+    assert(a.shape[0] == a.shape[1]);
+
+    const int n = a.shape[0];
+    const int lda = a.shape[1];
+
+    std::complex<float> work_query;
+    constexpr int minus_one = -1;
+
+    cheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, nullptr, &info);
+
+    const int lwork = static_cast<int>(work_query.real());
+    std::vector<std::complex<float>> work(std::max(1, lwork));
+    std::vector<float> rwork(std::max(1, 3 * n - 2));
+
+    cheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, rwork.data(), &info);
+}
+// void tensor_dsyev(const char jobz, const char uplo, RI::Tensor<double> & a, double*const w, int & info)
+// {
+// 	// reference: dsyev in lapack_connector.h (for ModuleBase::matrix)
+// 	assert(a.shape.size() == 2);
+// 	assert(a.shape[0] == a.shape[1]);
+// 	const int nr = a.shape[0];
+// 	const int nc = a.shape[1];
+
+// 	double work_tmp=0.0;
+// 	constexpr int minus_one = -1;
+// 	dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, &work_tmp, &minus_one, &info);		// get best lwork
+
+// 	const int lwork = work_tmp;
+// 	std::vector<double> work(std::max(1, lwork));
+// 	dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, work.data(), &lwork, &info);
+// }
+
+RI::Tensor<double> get_sub_matrix(const RI::Tensor<double>& m, // size: (lcaos, lcaos, abfs)
+                                  const std::size_t& T,
+                                  const std::size_t& L,
+                                  const ModuleBase::Element_Basis_Index::Range& range,
+                                  const ModuleBase::Element_Basis_Index::IndexLNM& index)
 	{
 		ModuleBase::TITLE("ABFs_Construct::PCA::get_sub_matrix");		
 		assert(m.shape.size() == 3);
-		RI::Tensor<double> m_sub({ m.shape[0], m.shape[1], range[T][L].N });
-		for (std::size_t ir=0; ir!=m.shape[0]; ++ir) {
-			for (std::size_t jr=0; jr!=m.shape[1]; ++jr) {
-				for (std::size_t N=0; N!=range[T][L].N; ++N) {
+    RI::Tensor<double> m_sub({m.shape[0], m.shape[1], range[T][L].N});
+    for (std::size_t ir = 0; ir != m.shape[0]; ++ir)
+    {
+        for (std::size_t jr = 0; jr != m.shape[1]; ++jr)
+        {
+            for (std::size_t N = 0; N != range[T][L].N; ++N)
+            {
 					m_sub(ir, jr, N) = m(ir, jr, index[T][L][N][0]);
 }
 }
 }
-		m_sub = m_sub.reshape({ m.shape[0] * m.shape[1], range[T][L].N });
+    m_sub = m_sub.reshape({m.shape[0] * m.shape[1], range[T][L].N});
 		return m_sub;
 	}
 
-	RI::Tensor<double> get_column_mean0_matrix( const RI::Tensor<double> & m )
+RI::Tensor<double> get_column_mean0_matrix(const RI::Tensor<double>& m)
 	{
 		ModuleBase::TITLE("ABFs_Construct::PCA::get_column_mean0_matrix");
-		RI::Tensor<double> m_new( m.shape);
-		for( std::size_t ic=0; ic!=m.shape[1]; ++ic )
+    RI::Tensor<double> m_new(m.shape);
+    for (std::size_t ic = 0; ic != m.shape[1]; ++ic)
 		{
-			double sum=0;
-			for( std::size_t ir=0; ir!=m.shape[0]; ++ir ) {
-				sum += m(ir,ic);
+        double sum = 0;
+        for (std::size_t ir = 0; ir != m.shape[0]; ++ir)
+        {
+            sum += m(ir, ic);
 }
-			const double mean = sum/m.shape[0];
-			for( std::size_t ir=0; ir!=m.shape[0]; ++ir ) {
-				m_new(ir,ic) = m(ir,ic) - mean;
+        const double mean = sum / m.shape[0];
+        for (std::size_t ir = 0; ir != m.shape[0]; ++ir)
+        {
+            m_new(ir, ic) = m(ir, ic) - mean;
 }
 		}
 		return m_new;
 	}
 
 	std::vector<std::vector<std::pair<std::vector<double>, RI::Tensor<double>>>> cal_PCA(
-		const UnitCell &ucell,
+    const UnitCell& ucell,
         const LCAO_Orbitals& orb,
-		const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>> &lcaos, 
-		const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>> &abfs,
-		const double kmesh_times )
+    const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>>& lcaos,
+    const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>>& abfs,
+    const double kmesh_times)
 	{
 		ModuleBase::TITLE("ABFs_Construct::PCA::cal_PCA");
 
-		const ModuleBase::Element_Basis_Index::Range
-			range_lcaos = ModuleBase::Element_Basis_Index::construct_range( lcaos );
-		const ModuleBase::Element_Basis_Index::IndexLNM
-			index_lcaos = ModuleBase::Element_Basis_Index::construct_index( range_lcaos );
+    const ModuleBase::Element_Basis_Index::Range range_lcaos = ModuleBase::Element_Basis_Index::construct_range(lcaos);
+    const ModuleBase::Element_Basis_Index::IndexLNM index_lcaos
+        = ModuleBase::Element_Basis_Index::construct_index(range_lcaos);
 
-		const ModuleBase::Element_Basis_Index::Range
-			range_abfs = ModuleBase::Element_Basis_Index::construct_range( abfs );
-		const ModuleBase::Element_Basis_Index::IndexLNM
-			index_abfs = ModuleBase::Element_Basis_Index::construct_index( range_abfs );
+    const ModuleBase::Element_Basis_Index::Range range_abfs = ModuleBase::Element_Basis_Index::construct_range(abfs);
+    const ModuleBase::Element_Basis_Index::IndexLNM index_abfs
+        = ModuleBase::Element_Basis_Index::construct_index(range_abfs);
 
 		const int Lmax_bak = GlobalC::exx_info.info_ri.abfs_Lmax;
 		GlobalC::exx_info.info_ri.abfs_Lmax = std::numeric_limits<int>::min();
-		for( std::size_t T=0; T!=abfs.size(); ++T ) {
-			GlobalC::exx_info.info_ri.abfs_Lmax = std::max( GlobalC::exx_info.info_ri.abfs_Lmax, static_cast<int>(abfs[T].size())-1 );
+    for (std::size_t T = 0; T != abfs.size(); ++T)
+    {
+        GlobalC::exx_info.info_ri.abfs_Lmax
+            = std::max(GlobalC::exx_info.info_ri.abfs_Lmax, static_cast<int>(abfs[T].size()) - 1);
 }
 
 		Matrix_Orbs21 m_abfslcaos_lcaos;
 		ORB_gaunt_table MGT;
 		int Lmax;
-		m_abfslcaos_lcaos.init( 1, ucell , orb, kmesh_times, orb.get_Rmax(), Lmax );
+    m_abfslcaos_lcaos.init(1, ucell, orb, kmesh_times, orb.get_Rmax(), Lmax);
 		MGT.init_Gaunt_CH(Lmax);
         MGT.init_Gaunt(Lmax);
-		m_abfslcaos_lcaos.init_radial( abfs, lcaos, lcaos, MGT );
+    m_abfslcaos_lcaos.init_radial(abfs, lcaos, lcaos, MGT);
 
-		std::map<std::size_t,std::map<std::size_t,std::set<double>>> delta_R;
-		for( std::size_t it=0; it!=abfs.size(); ++it ) {
+    std::map<std::size_t, std::map<std::size_t, std::set<double>>> delta_R;
+    for (std::size_t it = 0; it != abfs.size(); ++it)
+    {
 			delta_R[it][it] = {0.0};
 }
 		m_abfslcaos_lcaos.init_radial_table(delta_R);
 
 		GlobalC::exx_info.info_ri.abfs_Lmax = Lmax_bak;
 
-		std::vector<std::vector<std::pair<std::vector<double>,RI::Tensor<double>>>> eig(abfs.size());
-		for( std::size_t T=0; T!=abfs.size(); ++T )
+    std::vector<std::vector<std::pair<std::vector<double>, RI::Tensor<double>>>> eig(abfs.size());
+    for (std::size_t T = 0; T != abfs.size(); ++T)
 		{
-			const RI::Tensor<double> A = m_abfslcaos_lcaos.cal_overlap_matrix<double>(  
-				T, 
+        const RI::Tensor<double> A = m_abfslcaos_lcaos.cal_overlap_matrix<double>(T,
 				T, 
-				ModuleBase::Vector3<double>{0,0,0},
-				ModuleBase::Vector3<double>{0,0,0},  
+                                                                                  ModuleBase::Vector3<double>{0, 0, 0},
+                                                                                  ModuleBase::Vector3<double>{0, 0, 0},
 				index_abfs, 
 				index_lcaos,
 				index_lcaos,
 				Matrix_Orbs21::Matrix_Order::A2BA1);
 
 			eig[T].resize(abfs[T].size());
-			for( std::size_t L=0; L!=abfs[T].size(); ++L )
+        for (std::size_t L = 0; L != abfs[T].size(); ++L)
 			{
-				const RI::Tensor<double> A_sub = get_sub_matrix( A, T, L, range_abfs, index_abfs );
+            const RI::Tensor<double> A_sub = get_sub_matrix(A, T, L, range_abfs, index_abfs);
 				RI::Tensor<double> mm = A_sub.transpose() * A_sub;
 				std::vector<double> eig_value(mm.shape[0]);
 
-				int info=1;
+            int info = 1;
 
-				tensor_dsyev('V', 'L', mm, eig_value.data(), info);
+            tensor_syev<double>('V', 'L', mm, eig_value.data(), info);
 
-				if( info )
+            if (info)
 				{
 					std::cout << std::endl << "info_dsyev = " << info << std::endl;
-					auto tensor_print = [](RI::Tensor<double>& m, std::ostream& os, const double threshold)
-					{
+                auto tensor_print = [](RI::Tensor<double>& m, std::ostream& os, const double threshold) {
 						for (int ir = 0; ir != m.shape[0]; ++ir)
 						{
 							for (int ic = 0; ic != m.shape[1]; ++ic)
 							{
-								if (std::abs(m(ir, ic)) > threshold) {
+                            if (std::abs(m(ir, ic)) > threshold)
+                            {
 									os << m(ir, ic) << "\t";
-								} else {
+                            }
+                            else
+                            {
 									os << 0 << "\t";
 }
 							}
@@ -155,15 +242,15 @@ namespace PCA
 						os << std::endl;
 					};
 					tensor_print(mm, GlobalV::ofs_warning, 0.0);
-					std::cout<<"in file "<<__FILE__<<" line "<<__LINE__<<std::endl;
+                std::cout << "in file " << __FILE__ << " line " << __LINE__ << std::endl;
 					ModuleBase::QUIT();
 				}
-				eig[T][L] = std::make_pair( eig_value, mm );
+            eig[T][L] = std::make_pair(eig_value, mm);
 			}
 		}
 
 		return eig;
 	}
 
-}	// namespace ABFs_Construct::PCA
-}	// namespace ABFs_Construct
+} // namespace PCA
+} // namespace ABFs_Construct
Original file line number	Diff line number	Diff line change
`@@ -243,6 +243,12 @@ void ReadInput::item_exx()`
`243`	`243`	`read_sync_double(input.exx_pca_threshold);`
`244`	`244`	`this->add_item(item);`
`245`	`245`	`}`
	`246`	`+ {`
	`247`	`+ Input_Item item("exx_cs_inv_thr");`
	`248`	`+ item.annotation = "threshold to inverse Vq in abfs for generating Cs";`
	`249`	`+ read_sync_double(input.exx_cs_inv_thr);`
	`250`	`+ this->add_item(item);`
	`251`	`+ }`
`246`	`252`	`{`
`247`	`253`	`Input_Item item("exx_c_threshold");`
`248`	`254`	`item.annotation = "threshold to screen C matrix in exx";`