ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
phi_operator.hpp
Go to the documentation of this file.
1#pragma once
2
3#include "phi_operator.h"
6
7namespace ModuleGint
8{
9
10template<typename T>
11void PhiOperator::set_phi(T* phi) const
12{
13 for(int i = 0; i < biggrid_->get_atoms_num(); ++i)
14 {
15 const auto atom = biggrid_->get_atom(i);
16 atom->set_phi(atom_rcoords_[i], cols_, phi);
17 phi += atom->get_nw();
18 }
19}
20
21// Helpers to dispatch a Tin-typed BLAS-GEMM target buffer:
22// - For Tin=double, write the GEMM result directly into the caller's
23// double* phi_dm (no scratch, no cast).
24// - For Tin=float, allocate a fp32 scratch buffer and cast the result into
25// phi_dm at the end. K of each individual GEMM is small (~atom nw) so
26// accumulating in fp32 inside the scratch is fine.
27inline double* phi_mul_dm_scratch_(double* phi_dm, std::vector<double>& /*scratch*/, int /*size*/)
28{
29 return phi_dm;
30}
31
32template<typename Tin>
33inline Tin* phi_mul_dm_scratch_(double* /*phi_dm*/, std::vector<Tin>& scratch, int size)
34{
35 scratch.assign(size, Tin(0));
36 return scratch.data();
37}
38
39inline void phi_mul_dm_finalize_(double* /*phi_dm*/, const std::vector<double>& /*scratch*/, int /*size*/) {}
40
41template<typename Tin>
42inline void phi_mul_dm_finalize_(double* phi_dm, const std::vector<Tin>& scratch, int size)
43{
44 for (int k = 0; k < size; ++k)
45 {
46 phi_dm[k] = static_cast<double>(scratch[k]);
47 }
48}
49
50// phi_dm(ir,iwt_2) = \sum_{iwt_1} phi(ir,iwt_1) * dm(iwt_1,iwt_2)
51template<typename Tin>
53 const Tin*const phi, // phi(ir,iwt)
54 const HContainer<Tin>& dm, // dm(iwt_1,iwt_2)
55 const bool is_symm,
56 double*const phi_dm) const // phi_dm(ir,iwt)
57{
58 std::vector<Tin> scratch;
59 Tin* target = phi_mul_dm_scratch_(phi_dm, scratch, rows_ * cols_);
60 ModuleBase::GlobalFunc::ZEROS(target, rows_ * cols_);
61
62 for(int i = 0; i < biggrid_->get_atoms_num(); ++i)
63 {
64 const auto atom_i = biggrid_->get_atom(i);
65 const auto r_i = atom_i->get_R();
66
67 if(is_symm)
68 {
69 const auto dm_mat = dm.find_matrix(atom_i->get_iat(), atom_i->get_iat(), 0, 0, 0);
70 constexpr Tin alpha = 1.0;
71 constexpr Tin beta = 1.0;
73 'L', 'U',
74 atoms_phi_len_[i], rows_,
75 alpha, dm_mat->get_pointer(), atoms_phi_len_[i],
76 &phi[0 * cols_ + atoms_startidx_[i]], cols_,
77 beta, &target[0 * cols_ + atoms_startidx_[i]], cols_);
78 }
79
80 const int start = is_symm ? i + 1 : 0;
81
82 for(int j = start; j < biggrid_->get_atoms_num(); ++j)
83 {
84 const auto atom_j = biggrid_->get_atom(j);
85 const auto r_j = atom_j->get_R();
86 // FIXME may be r = r_j - r_i
87 const auto dm_mat = dm.find_matrix(atom_i->get_iat(), atom_j->get_iat(), r_i-r_j);
88
89 // if dm_mat is nullptr, it means this atom pair does not affect any meshgrid in the unitcell
90 if(dm_mat == nullptr)
91 {
92 continue;
93 }
94
95 const int start_idx = get_atom_pair_start_end_idx_(i, j).first;
96 const int end_idx = get_atom_pair_start_end_idx_(i, j).second;
97 const int len = end_idx - start_idx + 1;
98
99 // if len<=0, it means this atom pair does not affect any meshgrid in this biggrid
100 if(len <= 0)
101 {
102 continue;
103 }
104
105 const Tin alpha = is_symm ? 2.0 : 1.0;
106 constexpr Tin beta = 1.0;
108 'N', 'N',
109 len, atoms_phi_len_[j], atoms_phi_len_[i],
110 alpha, &phi[start_idx * cols_ + atoms_startidx_[i]], cols_,
111 dm_mat->get_pointer(), atoms_phi_len_[j],
112 beta, &target[start_idx * cols_ + atoms_startidx_[j]], cols_);
113 }
114 }
115
116 phi_mul_dm_finalize_(phi_dm, scratch, rows_ * cols_);
117}
118
119// result(ir) = phi(ir) * vl(ir)
120template<typename T>
122 const T*const vl, // vl(ir)
123 const T dr3,
124 const T*const phi, // phi(ir,iwt)
125 T*const result) const // result(ir,iwt)
126{
127 int idx = 0;
128 for(int i = 0; i < biggrid_->get_mgrids_num(); i++)
129 {
130 T vldr3_mgrid = vl[mgrid_lidx_[i]] * dr3;
131 for(int j = 0; j < cols_; j++)
132 {
133 result[idx] = phi[idx] * vldr3_mgrid;
134 idx++;
135 }
136 }
137}
138
139// hr(iwt_i,iwt_j) += \sum_{ir} phi_i(ir,iwt_i) * phi_i(ir,iwt_j)
140// this is a thread-safe function.
141// The per-biggrid GEMM accumulator tmp_hr stays in Tin (small K, no significant
142// precision loss); only the global add into HContainer<double> is widened.
143template<typename Tin>
145 const Tin*const phi_i, // phi_i(ir,iwt)
146 const Tin*const phi_j, // phi_j(ir,iwt)
147 HContainer<double>& hr, // hr(iwt_i,iwt_j)
148 const TriPart part) const
149{
150 std::vector<Tin> tmp_hr;
151 for(int i = 0; i < biggrid_->get_atoms_num(); ++i)
152 {
153 const auto atom_i = biggrid_->get_atom(i);
154 const auto& r_i = atom_i->get_R();
155 const int iat_i = atom_i->get_iat();
156 const int n_i = atoms_phi_len_[i];
157
158 for(int j = 0; j < biggrid_->get_atoms_num(); ++j)
159 {
160 const auto atom_j = biggrid_->get_atom(j);
161 const auto& r_j = atom_j->get_R();
162 const int iat_j = atom_j->get_iat();
163 const int n_j = atoms_phi_len_[j];
164
165 // only calculate the upper triangle matrix
166 if(part==TriPart::Upper && iat_i>iat_j)
167 {
168 continue;
169 }
170 // only calculate the upper triangle matrix
171 else if(part==TriPart::Lower && iat_i<iat_j)
172 {
173 continue;
174 }
175
176 // FIXME may be r = r_j - r_i
177 const auto result = hr.find_matrix(iat_i, iat_j, r_i-r_j);
178
179 if(result == nullptr)
180 {
181 continue;
182 }
183
184 const int start_idx = get_atom_pair_start_end_idx_(i, j).first;
185 const int end_idx = get_atom_pair_start_end_idx_(i, j).second;
186 const int len = end_idx - start_idx + 1;
187
188 if(len <= 0)
189 {
190 continue;
191 }
192
193 tmp_hr.resize(n_i * n_j);
194 ModuleBase::GlobalFunc::ZEROS(tmp_hr.data(), n_i*n_j);
195
196 constexpr Tin alpha=1, beta=1;
198 'T', 'N', n_i, n_j, len,
199 alpha, phi_i + start_idx * cols_ + atoms_startidx_[i], cols_,
200 phi_j + start_idx * cols_ + atoms_startidx_[j], cols_,
201 beta, tmp_hr.data(), n_j,
203
204 result->add_array_ts(tmp_hr.data());
205 }
206 }
207}
208
209// Mixed-precision dotc wrapper. Accepts (double, double) or (double, float);
210// when y is fp32 it is upcast into the caller-provided fp64 scratch buffer
211// before dispatching to BlasConnector::dotc (which requires uniform types).
212// The buffer is grown on demand and meant to be reused across many calls.
213inline double dotc_mixed(int n, const double* x, const double* y,
214 std::vector<double>& /*buf*/)
215{
216 return BlasConnector::dotc(n, x, 1, y, 1);
217}
218
219inline double dotc_mixed(int n, const double* x, const float* y,
220 std::vector<double>& buf)
221{
222 if (static_cast<int>(buf.size()) < n) { buf.resize(n); }
223 for (int k = 0; k < n; ++k) { buf[k] = static_cast<double>(y[k]); }
224 return BlasConnector::dotc(n, x, 1, buf.data(), 1);
225}
226
227// rho(ir) = \sum_{iwt} \phi_i(ir,iwt) * \phi_j^*(ir,iwt)
228// phi_j is always double (output of phi_mul_dm); phi_i may be fp32. dotc_mixed
229// keeps the inner product in fp64 in either case.
230template<typename Tin>
232 const Tin*const phi_i, // phi_i(ir,iwt)
233 const double*const phi_j, // phi_j(ir,iwt)
234 double*const rho) const // rho(ir)
235{
236 std::vector<double> buf;
237 for(int i = 0; i < biggrid_->get_mgrids_num(); ++i)
238 {
239 rho[mgrid_lidx_[i]] += dotc_mixed(
240 cols_, phi_j + i * cols_, phi_i + i * cols_, buf);
241 }
242}
243
244} // namespace ModuleGint
const std::complex< double > i
Definition cal_pLpR.cpp:46
static float dotc(const int n, const float *const X, const int incX, const float *const Y, const int incY, base_device::AbacusDevice_t device_type=base_device::AbacusDevice_t::CpuDevice)
Definition blas_connector_vector.cpp:224
static void gemm(const char transa, const char transb, const int m, const int n, const int k, const float alpha, const float *a, const int lda, const float *b, const int ldb, const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type=base_device::AbacusDevice_t::CpuDevice)
Definition blas_connector_matrix.cpp:26
static void symm_cm(const char side, const char uplo, const int m, const int n, const float alpha, const float *a, const int lda, const float *b, const int ldb, const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type=base_device::AbacusDevice_t::CpuDevice)
Definition blas_connector_matrix.cpp:375
void phi_mul_vldr3(const T *const vl, const T dr3, const T *const phi, T *const result) const
Definition phi_operator.hpp:121
std::shared_ptr< const BigGrid > biggrid_
Definition phi_operator.h:153
void phi_dot_phi(const Tin *const phi_i, const double *const phi_j, double *const rho) const
Definition phi_operator.hpp:231
const std::pair< int, int > & get_atom_pair_start_end_idx_(int a, int b) const
Definition phi_operator.h:129
void phi_mul_phi(const Tin *const phi_i, const Tin *const phi_j, HContainer< double > &hr, const TriPart part) const
Definition phi_operator.hpp:144
std::vector< std::vector< Vec3d > > atom_rcoords_
Definition phi_operator.h:157
TriPart
Definition phi_operator.h:22
std::vector< int > atoms_phi_len_
Definition phi_operator.h:169
void phi_mul_dm(const Tin *const phi, const HContainer< Tin > &dm, const bool is_symm, double *const phi_dm) const
Definition phi_operator.hpp:52
std::vector< int > atoms_startidx_
Definition phi_operator.h:164
void set_phi(T *phi) const
Definition phi_operator.hpp:11
int cols_
Definition phi_operator.h:147
Definition hcontainer.h:144
BaseMatrix< T > * find_matrix(int i, int j, int rx, int ry, int rz)
find BaseMatrix with atom index atom_i and atom_j and R index (rx, ry, rz) This interface can be used...
Definition hcontainer.cpp:286
#define T
Definition exp.cpp:237
void ZEROS(std::complex< T > *u, const TI n)
Definition global_function.h:109
Definition batch_biggrid.cpp:4
void phi_mul_dm_finalize_(double *, const std::vector< double > &, int)
Definition phi_operator.hpp:39
double * phi_mul_dm_scratch_(double *phi_dm, std::vector< double > &, int)
Definition phi_operator.hpp:27
double dotc_mixed(int n, const double *x, const double *y, std::vector< double > &)
Definition phi_operator.hpp:213
@ CpuDevice
Definition types.h:14
iclock::time_point start
Definition test_partition.cpp:22