ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
nonlocal_maths.hpp
Go to the documentation of this file.
1#ifndef HAMILTPW_NONLOCAL_MATHS_H
2#define HAMILTPW_NONLOCAL_MATHS_H
3
6#include "source_cell/klist.h"
11
12namespace hamilt
13{
14
15template <typename FPTYPE, typename Device>
17{
18 public:
19 Nonlocal_maths(const pseudopot_cell_vnl* nlpp_in, const UnitCell* ucell_in)
20 {
21 this->device = base_device::get_device_type<Device>(this->ctx);
22 this->nhtol_ = nlpp_in->nhtol;
23 this->lmax_ = nlpp_in->lmaxkb;
24 this->ucell_ = ucell_in;
25 }
26 Nonlocal_maths(const ModuleBase::matrix& nhtol, const int lmax, const UnitCell* ucell_in)
27 {
28 this->device = base_device::get_device_type<Device>(this->ctx);
29 this->nhtol_ = nhtol;
30 this->lmax_ = lmax;
31 this->ucell_ = ucell_in;
32 }
33
34 private:
36 int lmax_;
38
39 Device* ctx = {};
40 base_device::DEVICE_CPU* cpu_ctx = {};
42
43 public:
44 // functions
55 std::vector<FPTYPE> cal_gk(int ik, const ModulePW::PW_Basis_K* pw_basis);
65 void cal_ylm(int lmax, int npw, const FPTYPE* gk_in, FPTYPE* ylm);
67 void cal_ylm_deri(int lmax, int npw, const FPTYPE* gk_in, FPTYPE* ylm_deri);
69 std::vector <std::complex<FPTYPE>> cal_pref(int it, const int nh);
72 void cal_vkb(int it,
73 int ia,
74 int npw,
75 const FPTYPE* vq_in,
76 const FPTYPE* ylm_in,
77 const std::complex<FPTYPE>* sk_in,
78 const std::complex<FPTYPE>* pref_in,
79 std::complex<FPTYPE>* vkb_out);
81 void cal_vkb_deri(int it,
82 int ia,
83 int npw,
84 int ipol,
85 int jpol,
86 const FPTYPE* vq_in,
87 const FPTYPE* vq_deri_in,
88 const FPTYPE* ylm_in,
89 const FPTYPE* ylm_deri_in,
90 const std::complex<FPTYPE>* sk_in,
91 const std::complex<FPTYPE>* pref_in,
92 const FPTYPE* gk_in,
93 std::complex<FPTYPE>* vkb_out);
94
96 void prepare_vkb_ptr(int nbeta,
97 double* nhtol,
98 int nhtol_nc,
99 int npw,
100 int it,
101 std::complex<FPTYPE>* vkb_out,
102 std::complex<FPTYPE>** vkb_ptrs,
103 FPTYPE* ylm_in,
104 FPTYPE** ylm_ptrs,
105 FPTYPE* vq_in,
106 FPTYPE** vq_ptrs);
107
110 void cal_dvkb_index(const int nbeta,
111 const double* nhtol,
112 const int nhtol_nc,
113 const int npw,
114 const int it,
115 const int ipol,
116 const int jpol,
117 int* indexes);
118
119 static void dylmr2(const int nylm, const int ngy, const FPTYPE* gk, FPTYPE* dylm, const int ipol);
121 static FPTYPE Polynomial_Interpolation_nl(const ModuleBase::realArray& table,
122 const int& dim1,
123 const int& dim2,
124 const FPTYPE& table_interval,
125 const FPTYPE& x);
126};
127
128// prepare a memory block containing information of vector G+k, this function can be named as eval_q or eval_gk
129// seems this operation is not on gpu
130template <typename FPTYPE, typename Device>
131std::vector<FPTYPE> Nonlocal_maths<FPTYPE, Device>::cal_gk(int ik, const ModulePW::PW_Basis_K* pw_basis)
132{
133 int npw = pw_basis->npwk[ik];
134 std::vector<FPTYPE> gk(npw * 5);
136 for (int ig = 0; ig < npw; ++ig)
137 {
138 // written in memory block from 0 to 3*npw. This is like a matrix with npw rows and 3 columns
139 q = pw_basis->getgpluskcar(ik, ig);
140 gk[ig * 3] = q.x;
141 gk[ig * 3 + 1] = q.y;
142 gk[ig * 3 + 2] = q.z;
143 // the following written in memory block from 3*npw to 5*npw, the excess 2*npw is for norm and 1/norm
144 // for memory consecutive consideration, there are blocks storing the norm and 1/norm.
145 FPTYPE norm = sqrt(q.norm2());
146 gk[3 * npw + ig] = norm * this->ucell_->tpiba; // one line with length npw, storing the norm
147 gk[4 * npw + ig] = norm < 1e-8 ? 0.0 : 1.0 / norm * this->ucell_->tpiba; // one line with length npw, storing 1/norm
148 }
149 return gk;
150}
151
152// tabulate the spherical haromonic functions up to lmax. The q vector is given as input.
153// I would rather call this function as cal_ylm_cpu2gpu, distincting from the pure cpu implementation
154template <typename FPTYPE, typename Device>
155void Nonlocal_maths<FPTYPE, Device>::cal_ylm(int lmax, int npw, const FPTYPE* q, FPTYPE* ylm)
156{
157 const int ntot_ylm = (lmax + 1) * (lmax + 1);
158 if (this->device == base_device::GpuDevice)
159 {
160 // alias
162 // allocate
163 std::vector<FPTYPE> ylm_cpu(ntot_ylm * npw);
164 // calculate
165 ModuleBase::YlmReal::Ylm_Real(cpu_ctx, ntot_ylm, npw, q, ylm_cpu.data());
166 // send from cpu to gpu
167 syncmem_var_h2d_op()(ylm, ylm_cpu.data(), ylm_cpu.size());
168 }
169 else
170 {
171 // calculate. Why not implement this logic branch inside some function???
172 ModuleBase::YlmReal::Ylm_Real(cpu_ctx, ntot_ylm, npw, q, ylm);
173 }
174 return;
175}
176
177// this function calculate the numerical derivate of the spherical harmonic functions respect to the G vector...
178// maybe called eval_dylmdq_cpu2gpu?
179template <typename FPTYPE, typename Device>
180void Nonlocal_maths<FPTYPE, Device>::cal_ylm_deri(int lmax, int npw, const FPTYPE* q, FPTYPE* out)
181{
182 const int ntot_ylm = (lmax + 1) * (lmax + 1);
183
184 if (this->device == base_device::GpuDevice)
185 {
186 // alias
188 // allocate
189 std::vector<FPTYPE> dylmdq_cpu(3 * ntot_ylm * npw);
190 // calculate
191 for (int ipol = 0; ipol < 3; ipol++)
192 {
193 Nonlocal_maths<FPTYPE, Device>::dylmr2(ntot_ylm, npw, q, &dylmdq_cpu[ipol * ntot_ylm * npw], ipol);
194 }
195 // send from cpu to gpu
196 syncmem_var_h2d_op()(out, dylmdq_cpu.data(), dylmdq_cpu.size());
197 }
198 else
199 {
200 for (int ipol = 0; ipol < 3; ipol++)
201 {
202 Nonlocal_maths<FPTYPE, Device>::dylmr2(ntot_ylm, npw, q, &out[ipol * ntot_ylm * npw], ipol);
203 }
204 }
205
206 return;
207}
208// cal_pref
209template <typename FPTYPE, typename Device>
210std::vector<std::complex<FPTYPE>> Nonlocal_maths<FPTYPE, Device>::cal_pref(int it, const int nh)
211{
212 // nh is the total number of m-channels of the beta functions
213 // for example, if angular momentum of beta functions are 0, 0, 1, 1, 1, 1, the nh will be
214 // 1 + 1 + 3 + 3 + 3 + 3 = 14
215 std::vector<std::complex<FPTYPE>> pref(nh);
216 for (int ih = 0; ih < nh; ih++)
217 {
218 pref[ih] = std::pow(std::complex<FPTYPE>(0.0, -1.0), this->nhtol_(it, ih));
219 // it is actually nh2l, which means to get the angular momentum...
220 }
221 return pref;
222}
223
224// cal_vkb
225// cpu version first, gpu version later
226template <typename FPTYPE, typename Device>
228 int ia,
229 int npw,
230 const FPTYPE* vq_in,
231 const FPTYPE* ylm_in,
232 const std::complex<FPTYPE>* sk_in,
233 const std::complex<FPTYPE>* pref_in,
234 std::complex<FPTYPE>* vkb_out)
235{
236 int ih = 0;
237 // loop over all beta functions
238 for (int ib = 0; ib < this->ucell_->atoms[it].ncpp.nbeta; ib++)
239 {
240 int l = this->nhtol_(it, ih);
241 // loop over all m angular momentum
242 for (int m = 0; m < 2 * l + 1; m++)
243 {
244 int lm = l * l + m;
245 std::complex<FPTYPE>* vkb_ptr = &vkb_out[ih * npw];
246 const FPTYPE* ylm_ptr = &ylm_in[lm * npw];
247 const FPTYPE* vq_ptr = &vq_in[ib * npw];
248 // loop over all G-vectors
249 for (int ig = 0; ig < npw; ig++)
250 {
251 vkb_ptr[ig] = ylm_ptr[ig] * vq_ptr[ig] * sk_in[ig] * pref_in[ih];
252 }
253 ih++;
254 }
255 }
256}
257
258// cal_vkb
259// cpu version first, gpu version later
260template <typename FPTYPE, typename Device>
262 int ia,
263 int npw,
264 int ipol,
265 int jpol,
266 const FPTYPE* vq_in,
267 const FPTYPE* vq_deri_in,
268 const FPTYPE* ylm_in,
269 const FPTYPE* ylm_deri_in,
270 const std::complex<FPTYPE>* sk_in,
271 const std::complex<FPTYPE>* pref_in,
272 const FPTYPE* gk_in,
273 std::complex<FPTYPE>* vkb_out)
274{
275 const int x1 = (this->lmax_ + 1) * (this->lmax_ + 1);
276 int ih = 0;
277 // loop over all beta functions
278 for (int nb = 0; nb < this->ucell_->atoms[it].ncpp.nbeta; nb++)
279 {
280 const int l = this->nhtol_(it, ih);
281 // loop over all m angular momentum
282 for (int m = 0; m < 2 * l + 1; m++)
283 {
284 const int lm = l * l + m;
285 std::complex<FPTYPE>* vkb_ptr = &vkb_out[ih * npw];
286 const FPTYPE* ylm_ptr = &ylm_in[lm * npw];
287 const FPTYPE* vq_ptr = &vq_in[nb * npw];
288 // set vkb to zero
289 for (int ig = 0; ig < npw; ig++)
290 {
291 vkb_ptr[ig] = std::complex<FPTYPE>(0.0, 0.0);
292 }
293 // first term: ylm * vq * sk * pref
294 // loop over all G-vectors
295 if (ipol == jpol)
296 {
297 for (int ig = 0; ig < npw; ig++)
298 {
299 vkb_ptr[ig] -= ylm_ptr[ig] * vq_ptr[ig] * sk_in[ig] * pref_in[ih];
300 }
301 }
302 // second term: ylm_deri * vq_deri * sk * pref
303 // loop over all G-vectors
304 const FPTYPE* ylm_deri_ptr1 = &ylm_deri_in[(ipol * x1 + lm) * npw];
305 const FPTYPE* ylm_deri_ptr2 = &ylm_deri_in[(jpol * x1 + lm) * npw];
306 const FPTYPE* vq_deri_ptr = &vq_deri_in[nb * npw];
307 const FPTYPE* qnorm = &gk_in[4 * npw];
308 for (int ig = 0; ig < npw; ig++)
309 {
310 vkb_ptr[ig] -= (gk_in[ig * 3 + ipol] * ylm_deri_ptr2[ig] + gk_in[ig * 3 + jpol] * ylm_deri_ptr1[ig])
311 * vq_ptr[ig] * sk_in[ig] * pref_in[ih];
312 }
313 // third term: ylm * vq_deri * sk * pref
314 // loop over all G-vectors
315 for (int ig = 0; ig < npw; ig++)
316 {
317 vkb_ptr[ig] -= 2.0 * ylm_ptr[ig] * vq_deri_ptr[ig] * sk_in[ig] * pref_in[ih] * gk_in[ig * 3 + ipol]
318 * gk_in[ig * 3 + jpol] * qnorm[ig];
319 }
320 ih++;
321 }
322 }
323}
324
325template <typename FPTYPE, typename Device>
327 double* nhtol,
328 int nhtol_nc,
329 int npw,
330 int it,
331 std::complex<FPTYPE>* vkb_out,
332 std::complex<FPTYPE>** vkb_ptrs,
333 FPTYPE* ylm_in,
334 FPTYPE** ylm_ptrs,
335 FPTYPE* vq_in,
336 FPTYPE** vq_ptrs)
337{
338 // std::complex<FPTYPE>** vkb_ptrs[nh];
339 // const FPTYPE** ylm_ptrs[nh];
340 // const FPTYPE** vq_ptrs[nh];
341 int ih = 0;
342 for (int nb = 0; nb < nbeta; nb++)
343 {
344 int l = nhtol[it * nhtol_nc + ih];
345 for (int m = 0; m < 2 * l + 1; m++)
346 {
347 int lm = l * l + m;
348 vkb_ptrs[ih] = &vkb_out[ih * npw];
349 ylm_ptrs[ih] = &ylm_in[lm * npw];
350 vq_ptrs[ih] = &vq_in[nb * npw];
351 ih++;
352 }
353 }
354}
355
356template <typename FPTYPE, typename Device>
358 const double* nhtol,
359 const int nhtol_nc,
360 const int npw,
361 const int it,
362 const int ipol,
363 const int jpol,
364 int* indexes)
365{
366 int ih = 0;
367 const int x1 = (this->lmax_ + 1) * (this->lmax_ + 1);
368 for (int nb = 0; nb < nbeta; nb++)
369 {
370 int l = nhtol[it * nhtol_nc + ih];
371 for (int m = 0; m < 2 * l + 1; m++)
372 {
373 //std::cout << "in function cal_dvkb_index, nhtol(" << it << ", " << ih << ") = " << l << std::endl;
374 int lm = l * l + m;
375 indexes[ih * 4] = lm; // the index of ylm matrix, for given l and m, together with ig to get value
376 indexes[ih * 4 + 1] = nb; // the iproj of present atom type
377 indexes[ih * 4 + 2] = (ipol * x1 + lm);
378 indexes[ih * 4 + 3] = (jpol * x1 + lm);
379
380 ih++;
381 }
382 }
383}
384
385template <typename FPTYPE, typename Device>
387 const int ngy,
388 const FPTYPE* gk,
389 FPTYPE* dylm,
390 const int ipol)
391{
392 //-----------------------------------------------------------------------
393 //
394 // compute \partial Y_lm(G) \over \partial (G)_ipol
395 // using simple numerical derivation (SdG)
396 // The spherical harmonics are calculated in ylmr2
397 //
398 // int nylm, ngy, ipol;
399 // number of spherical harmonics
400 // the number of g vectors to compute
401 // desired polarization
402 // FPTYPE g (3, ngy), gg (ngy), dylm (ngy, nylm)
403 // the coordinates of g vectors
404 // the moduli of g vectors
405 // the spherical harmonics derivatives
406 //
407 const FPTYPE delta = 1e-6;
408 const FPTYPE small = 1e-15;
409
410 ModuleBase::matrix ylmaux;
411 // dg is the finite increment for numerical derivation:
412 // dg = delta |G| = delta * sqrt(gg)
413 // dgi= 1 /(delta * sqrt(gg))
414 // gx = g +/- dg
415
416 std::vector<FPTYPE> gx(ngy * 3);
417
418 std::vector<FPTYPE> dg(ngy);
419 std::vector<FPTYPE> dgi(ngy);
420
421 ylmaux.create(nylm, ngy);
422
423 ModuleBase::GlobalFunc::ZEROS(dylm, nylm * ngy);
424 ylmaux.zero_out();
425
426#ifdef _OPENMP
427#pragma omp parallel for
428#endif
429 for (int ig = 0; ig < 3 * ngy; ig++)
430 {
431 gx[ig] = gk[ig];
432 }
433 //$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ig)
434#ifdef _OPENMP
435#pragma omp parallel for
436#endif
437 for (int ig = 0; ig < ngy; ig++)
438 {
439 const int igx = ig * 3, igy = ig * 3 + 1, igz = ig * 3 + 2;
440 FPTYPE norm2 = gx[igx] * gx[igx] + gx[igy] * gx[igy] + gx[igz] * gx[igz];
441 dg[ig] = delta * sqrt(norm2);
442 if (dg[ig] > small)
443 {
444 dgi[ig] = 1.0 / dg[ig];
445 }
446 else
447 {
448 dgi[ig] = 0.0;
449 }
450 }
451 //$OMP END PARALLEL DO
452
453 //$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ig)
454#ifdef _OPENMP
455#pragma omp parallel for
456#endif
457 for (int ig = 0; ig < ngy; ig++)
458 {
459 const int index = ig * 3 + ipol;
460 gx[index] = gk[index] + dg[ig];
461 }
462 //$OMP END PARALLEL DO
463
464 base_device::DEVICE_CPU* cpu = {};
465 ModuleBase::YlmReal::Ylm_Real(cpu, nylm, ngy, gx.data(), dylm);
466 //$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ig)
467#ifdef _OPENMP
468#pragma omp parallel for
469#endif
470 for (int ig = 0; ig < ngy; ig++)
471 {
472 const int index = ig * 3 + ipol;
473 gx[index] = gk[index] - dg[ig];
474 }
475 //$OMP END PARALLEL DO
476
477 ModuleBase::YlmReal::Ylm_Real(cpu, nylm, ngy, gx.data(), ylmaux.c);
478
479 // zaxpy ( - 1.0, ylmaux, 1, dylm, 1);
480#ifdef _OPENMP
481#pragma omp parallel for collapse(2)
482#endif
483 for (int lm = 0; lm < nylm; lm++)
484 {
485 for (int ig = 0; ig < ngy; ig++)
486 {
487 dylm[lm * ngy + ig] -= ylmaux(lm, ig);
488 dylm[lm * ngy + ig] *= 0.5 * dgi[ig];
489 }
490 }
491
492 return;
493}
494
495template <typename FPTYPE, typename Device>
497 const int& dim1,
498 const int& dim2,
499 const FPTYPE& table_interval,
500 const FPTYPE& x // input value
501)
502{
503
504 assert(table_interval > 0.0);
505 const FPTYPE position = x / table_interval;
506 const int iq = static_cast<int>(position);
507
508 const FPTYPE x0 = position - static_cast<FPTYPE>(iq);
509 const FPTYPE x1 = 1.0 - x0;
510 const FPTYPE x2 = 2.0 - x0;
511 const FPTYPE x3 = 3.0 - x0;
512 const FPTYPE y = (table(dim1, dim2, iq) * (-x2 * x3 - x1 * x3 - x1 * x2) / 6.0
513 + table(dim1, dim2, iq + 1) * (+x2 * x3 - x0 * x3 - x0 * x2) / 2.0
514 - table(dim1, dim2, iq + 2) * (+x1 * x3 - x0 * x3 - x0 * x1) / 2.0
515 + table(dim1, dim2, iq + 3) * (+x1 * x2 - x0 * x2 - x0 * x1) / 6.0)
516 / table_interval;
517
518 return y;
519}
520} // namespace hamilt
521
522#endif
3 elements vector
Definition vector3.h:22
T norm2(void) const
Get the square of nomr of a Vector3.
Definition vector3.h:177
T x
Definition vector3.h:24
T y
Definition vector3.h:25
T z
Definition vector3.h:26
static void Ylm_Real(const int lmax2, const int ng, const ModuleBase::Vector3< double > *g, matrix &ylm)
spherical harmonic function (real form) an array of vectors
Definition math_ylmreal.cpp:357
Definition matrix.h:19
void zero_out(void)
Definition matrix.cpp:281
double * c
Definition matrix.h:25
void create(const int nrow, const int ncol, const bool flag_zero=true)
Definition matrix.cpp:122
double float array
Definition realarray.h:21
Special pw_basis class. It includes different k-points.
Definition pw_basis_k.h:57
int * npwk
Definition pw_basis_k.h:78
ModuleBase::Vector3< double > getgpluskcar(const int ik, const int igl) const
Definition pw_basis_k.cpp:390
Definition unitcell.h:16
Definition nonlocal_maths.hpp:17
static void dylmr2(const int nylm, const int ngy, const FPTYPE *gk, FPTYPE *dylm, const int ipol)
Definition nonlocal_maths.hpp:386
void cal_ylm_deri(int lmax, int npw, const FPTYPE *gk_in, FPTYPE *ylm_deri)
calculate the derivate of the sperical bessel function for projections
Definition nonlocal_maths.hpp:180
base_device::DEVICE_CPU * cpu_ctx
Definition nonlocal_maths.hpp:40
void cal_vkb(int it, int ia, int npw, const FPTYPE *vq_in, const FPTYPE *ylm_in, const std::complex< FPTYPE > *sk_in, const std::complex< FPTYPE > *pref_in, std::complex< FPTYPE > *vkb_out)
Definition nonlocal_maths.hpp:227
Nonlocal_maths(const ModuleBase::matrix &nhtol, const int lmax, const UnitCell *ucell_in)
Definition nonlocal_maths.hpp:26
static FPTYPE Polynomial_Interpolation_nl(const ModuleBase::realArray &table, const int &dim1, const int &dim2, const FPTYPE &table_interval, const FPTYPE &x)
polynomial interpolation tool for calculate derivate of vq
Definition nonlocal_maths.hpp:496
int lmax_
Definition nonlocal_maths.hpp:36
base_device::AbacusDevice_t device
Definition nonlocal_maths.hpp:41
void prepare_vkb_ptr(int nbeta, double *nhtol, int nhtol_nc, int npw, int it, std::complex< FPTYPE > *vkb_out, std::complex< FPTYPE > **vkb_ptrs, FPTYPE *ylm_in, FPTYPE **ylm_ptrs, FPTYPE *vq_in, FPTYPE **vq_ptrs)
calculate the ptr used in vkb_op
Definition nonlocal_maths.hpp:326
const UnitCell * ucell_
Definition nonlocal_maths.hpp:37
void cal_dvkb_index(const int nbeta, const double *nhtol, const int nhtol_nc, const int npw, const int it, const int ipol, const int jpol, int *indexes)
Definition nonlocal_maths.hpp:357
std::vector< std::complex< FPTYPE > > cal_pref(int it, const int nh)
calculate the (-i)^l factors
Definition nonlocal_maths.hpp:210
Device * ctx
Definition nonlocal_maths.hpp:39
Nonlocal_maths(const pseudopot_cell_vnl *nlpp_in, const UnitCell *ucell_in)
Definition nonlocal_maths.hpp:19
void cal_ylm(int lmax, int npw, const FPTYPE *gk_in, FPTYPE *ylm)
calculate the real spherical harmonic functions on cpu (and optionally send to gpu,...
Definition nonlocal_maths.hpp:155
ModuleBase::matrix nhtol_
Definition nonlocal_maths.hpp:35
std::vector< FPTYPE > cal_gk(int ik, const ModulePW::PW_Basis_K *pw_basis)
this function prepares all the q (G+k) information in one contiguous memory block including the x,...
Definition nonlocal_maths.hpp:131
void cal_vkb_deri(int it, int ia, int npw, int ipol, int jpol, const FPTYPE *vq_in, const FPTYPE *vq_deri_in, const FPTYPE *ylm_in, const FPTYPE *ylm_deri_in, const std::complex< FPTYPE > *sk_in, const std::complex< FPTYPE > *pref_in, const FPTYPE *gk_in, std::complex< FPTYPE > *vkb_out)
calculate the dvkb matrix for this atom
Definition nonlocal_maths.hpp:261
Definition VNL_in_pw.h:21
int lmaxkb
Definition VNL_in_pw.h:35
ModuleBase::matrix nhtol
Definition VNL_in_pw.h:68
void ZEROS(std::complex< T > *u, const TI n)
Definition global_function.h:109
AbacusDevice_t
Definition types.h:12
@ GpuDevice
Definition types.h:15
Definition hamilt.h:12
double norm(const Vec3 &v)
Definition test_partition.cpp:25