ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
pw_basis.h
Go to the documentation of this file.
1#ifndef PWBASIS_H
2#define PWBASIS_H
3
9#include <complex>
11#include <cstring>
12#ifdef __MPI
13#include "mpi.h"
14#endif
15
16namespace ModulePW
17{
18
56{
57
58public:
59 std::string classname;
60 PW_Basis();
61 PW_Basis(std::string device_, std::string precision_);
62 virtual ~PW_Basis();
63 //Init mpi parameters
64#ifdef __MPI
65 void initmpi(
66 const int poolnproc_in, // Number of processors in this pool
67 const int poolrank_in, // Rank in this pool
68 MPI_Comm pool_world_in //Comm world for pw_basis
69 );
70#endif
71
72 //Init the grids for FFT
73 virtual void initgrids(
74 const double lat0_in, //unit length (unit in bohr)
75 const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors (unit in lat0)
76 const double gridecut //unit in Ry, ecut to set up grids
77 );
78 //Init the grids for FFT
79 virtual void initgrids(
80 const double lat0_in,
81 const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors
82 const int nx_in, int ny_in, int nz_in
83 );
84
85 //Init some parameters
86 void initparameters(
87 const bool gamma_only_in,
88 const double pwecut_in, //unit in Ry, ecut to decides plane waves
89 const int distribution_type_in = 1,
90 const bool xprime_in = true
91 );
92
93 //Set parameters about full planewave, only used in OFDFT for now.
94 void setfullpw(
95 const bool inpt_full_pw = false,
96 const int inpt_full_pw_dim = 0
97 );
98//===============================================
99// distribution maps
100//===============================================
101public:
102#ifdef __MPI
103 MPI_Comm pool_world=MPI_COMM_NULL;
104#endif
105
106 int *ig2isz=nullptr; // map ig to (is, iz).
107 int *ig2ixyz_gpu = nullptr;
108 int *istot2ixy=nullptr; // istot2ixy[is]: iy + ix * ny of is^th stick among all sticks.
109 int *is2fftixy=nullptr, * d_is2fftixy = nullptr; // is2fftixy[is]: iy + ix * ny of is^th stick among sticks on current proc.
110 int *fftixy2ip=nullptr; // fftixy2ip[iy + ix * fftny]: ip of proc which contains stick on (ix, iy). if no stick: -1
111 int nst=0; //num. of sticks in current proc.
112 int *nst_per=nullptr;// nst on each core
113 int nstnz=0; // nst * nz
114 int nstot=0; //num. of sticks in total.
115 int npw=0; //num. of plane waves in current proc.
116 int *npw_per=nullptr; //npw on each core
117 int npwtot=0; // total num. of plane waves in all proc. in this pool
118
119 //real space
120 int nrxx=0; //num. of real space grids
121 int *startz=nullptr; //startz[ip]: starting z plane in the ip-th proc. in current POOL_WORLD
122 int *numz=nullptr; //numz[ip]: num. of z planes in the ip-th proc. in current POOL_WORLD
123 int *numg=nullptr; //numg[ip] : nst_per[poolrank] * numz[ip]
124 int *numr=nullptr; //numr[ip] : numz[poolrank] * nst_per[ip]
125 int *startg=nullptr; // startg[ip] = numg[ip-1] + startg[ip-1]
126 int *startr=nullptr; // startr[ip] = numr[ip-1] + startr[ip-1]
128 int nplane=0; //num. of planes in current proc.
129
130 ModuleBase::Vector3<double> *gdirect=nullptr; //(= *G1d) ; // ig = new Vector igc[npw]
131 ModuleBase::Vector3<double> *gcar=nullptr; //G vectors in cartesian corrdinate
132 double *gg=nullptr; // modulus (G^2) of G vectors [npw]
133 //gg[ng]=ig[ng]*GGT*ig[ng]/(lat0*lat0)=g[ng]*g[ng] (/lat0*lat0)
134 // gg_global dimension: [cutgg_num_now] (save memory skill is used)
135 int ig_gge0=-1; //ig when gg == 0
136
137 //distribute plane waves and grids and set up fft
138 void setuptransform();
139
140protected:
141 int *startnsz_per=nullptr;//useless intermediate variable// startnsz_per[ip]: starting is * nz stick in the ip^th proc.
142
143 //distribute plane waves to different processors
144 void distribute_g();
145
146 //distribute real-space grids to different processors
147 virtual void distribute_r();
148
149 //prepare for MPI_Alltoall
150 void getstartgr();
151
152
153public:
154 //collect gdirect, gcar, gg
155 void collect_local_pw();
156
157public:
158 int ngg=0; //number of different modulus (G^2) of G vectors
159 int *ig2igg=nullptr;//[npw] map ig to igg(<ngg: the index of G^2)
160 double *gg_uniq=nullptr; //[ngg] modulus (G^2) of G vectors of igg, each gg of igg is unique.
161 //collect gg_uniq
162 void collect_uniqgg();
163
164
165public:
166 bool gamma_only = false;
167 bool full_pw = false;
169 double ggecut = 0.0;
170 double gridecut_lat = 0.0;
171 double lat0 = 1;
172 double tpiba = 1;
173 double tpiba2 = 1;
178 double omega = 1.0;
180 int full_pw_dim = 0;
182 int poolnproc = 1;
183 int poolrank = 0;
184
185protected:
186 //distribute plane waves to different processors
187 //method 1: first consider number of plane waves
189 // Distribute sticks to cores in method 1.
190 void divide_sticks_1(
191 int* st_i, // x or x + fftnx (if x < 0) of stick.
192 int* st_j, // y or y + fftny (if y < 0) of stick.
193 int* st_length // the stick on (ix, iy) consists of st_length[ix*fftny+iy] planewaves.
194 );
195
196 //method 2: first consider number of sticks
198 // Distribute sticks to cores in method 2.
199 void divide_sticks_2();
200
201 //Count the total number of planewaves (tot_npw) and sticks (this->nstot) (in distributeg method1 and method2)
202 void count_pw_st(
203 int* st_length2D, // the number of planewaves that belong to the stick located on (x, y).
204 int* st_bottom2D // the z-coordinate of the bottom of stick on (x, y).
205 );
206
207 //get ig2isz and is2fftixy
209 int* st_bottom, // minimum z of stick, stored in 1d array with tot_nst elements.
210 int* st_length // the stick on (x, y) consists of st_length[x*fftny+y] planewaves.
211 );
212
213 //Collect the x, y indexs, length of the sticks (in distributeg method1)
214 void collect_st(
215 int* st_length2D, // the number of planewaves that belong to the stick located on (x, y), stored in 2d x-y plane.
216 int* st_bottom2D, // the z-coordinate of the bottom of stick on (x, y), stored in 2d x-y plane.
217 int* st_i, // x or x + fftnx (if x < 0) of stick.
218 int* st_j, // y or y + fftny (if y < 0) of stick.
219 int* st_length // number of planewaves in stick, stored in 1d array with tot_nst elements.
220 );
221
222 //get istot2ixy
223 void get_istot2ixy(
224 int* st_i, // x or x + fftnx (if x < 0) of stick.
225 int* st_j // y or y + fftny (if y < 0) of stick.
226 );
227
228 //Create the maps from ixy to (in method 2)
229 void create_maps(
230 int* st_length2D // the number of planewaves that belong to the stick located on (x, y), stored in 2d x-y plane.
231 );
232
233//===============================================
234// FFT
235//===============================================
236public:
237 // FFT dimensions for wave functions.
238 int fftnx=0, fftny=0, fftnz=0, fftnxyz=0, fftnxy=0;
239 int nx=0, ny=0, nz=0, nxyz=0, nxy=0; // Gamma_only: fftny = int(ny/2)-1 , others: fftny = ny
240 int liy=0, riy=0;// liy: the left edge of the pw ball; riy: the right edge of the pw ball in the y direction
241 int lix=0, rix=0;// lix: the left edge of the pw ball; rix: the right edge of the pw ball in the x direction
242 bool xprime = true; // true: when do recip2real, x-fft will be done last and when doing real2recip, x-fft will be
243 // done first; false: y-fft For gamma_only, true: we use half x; false: we use half y
244 int ng_xeq0 = 0; //only used when xprime = true, number of g whose gx = 0
245 int nmaxgr = 0; // Gamma_only: max between npw and (nrxx+1)/2, others: max between npw and nrxx
246 // Thus std::complex<double>[nmaxgr] is able to contain either reciprocal or real data
247 // FFT ft;
249 //The position of pointer in and out can be equal(in-place transform) or different(out-of-place transform).
250
251 template <typename FPTYPE>
252 void real2recip(const FPTYPE* in,
253 std::complex<FPTYPE>* out,
254 const bool add = false,
255 const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns)
256 template <typename FPTYPE>
257 void real2recip(const std::complex<FPTYPE>* in,
258 std::complex<FPTYPE>* out,
259 const bool add = false,
260 const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns)
261 template <typename FPTYPE>
262 void recip2real(const std::complex<FPTYPE>* in,
263 FPTYPE* out,
264 const bool add = false,
265 const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny)
266 template <typename FPTYPE>
267 void recip2real(const std::complex<FPTYPE>* in,
268 std::complex<FPTYPE>* out,
269 const bool add = false,
270 const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny)
271
272 template <typename FPTYPE>
273 void real2recip_gpu(const FPTYPE* in,
274 std::complex<FPTYPE>* out,
275 const bool add = false,
276 const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns)
277 template <typename FPTYPE>
278 void real2recip_gpu(const std::complex<FPTYPE>* in,
279 std::complex<FPTYPE>* out,
280 const bool add = false,
281 const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns)
282 template <typename FPTYPE>
283 void recip2real_gpu(const std::complex<FPTYPE>* in,
284 FPTYPE* out,
285 const bool add = false,
286 const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny)
287 template <typename FPTYPE>
288 void recip2real_gpu(const std::complex<FPTYPE>* in,
289 std::complex<FPTYPE>* out,
290 const bool add = false,
291 const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny)
292
313 template <typename TK,
314 typename TR,
315 typename Device,
316 typename std::enable_if<!std::is_same<TK, typename GetTypeReal<TK>::type>::value
317 && (std::is_same<TR, typename GetTypeReal<TK>::type>::value
318 || std::is_same<TR, TK>::value)
319 && std::is_same<Device, base_device::DEVICE_CPU>::value,
320 int>::type
321 = 0>
322 void recip_to_real(TK* in, TR* out, const bool add = false, const typename GetTypeReal<TK>::type factor = 1.0) const
323 {
324 this->recip2real(in, out, add, factor);
325 };
326
344 template <typename TK,
345 typename TR,
346 typename Device,
347 typename std::enable_if<!std::is_same<TK, typename GetTypeReal<TK>::type>::value
348 && (std::is_same<TR, typename GetTypeReal<TK>::type>::value
349 || std::is_same<TR, TK>::value)
350 && std::is_same<Device, base_device::DEVICE_GPU>::value,
351 int>::type
352 = 0>
353 void recip_to_real(TK* in,
354 TR* out,
355 const bool add = false,
356 const typename GetTypeReal<TK>::type factor = 1.0) const
357 {
358 this->recip2real_gpu(in,out,add,factor);
359 };
360
361 // template <typename FPTYPE,
362 // typename Device,
363 // typename std::enable_if<!std::is_same<FPTYPE, typename GetTypeReal<FPTYPE>::type>::value, int>::type = 0>
364 // void recip_to_real(FPTYPE* in,
365 // FPTYPE* out,
366 // const bool add = false,
367 // const typename GetTypeReal<FPTYPE>::type factor = 1.0) const;
390 template <typename TR,typename TK,typename Device,
391 typename std::enable_if<!std::is_same<TK, typename GetTypeReal<TK>::type>::value
392 && (std::is_same<TR, typename GetTypeReal<TK>::type>::value || std::is_same<TR, TK>::value)
393 && std::is_same<Device, base_device::DEVICE_CPU>::value ,int>::type = 0>
394 void real_to_recip(TR* in,
395 TK* out,
396 const bool add = false,
397 const typename GetTypeReal<TK>::type factor = 1.0) const
398 {
399 this->real2recip(in, out, add, factor);
400 }
401
402 template <typename TR, typename TK, typename Device,
403 typename std::enable_if<!std::is_same<TK, typename GetTypeReal<TK>::type>::value
404 && (std::is_same<TR, typename GetTypeReal<TK>::type>::value || std::is_same<TR, TK>::value)
405 && std::is_same<Device, base_device::DEVICE_GPU>::value ,int>::type = 0>
406 void real_to_recip(TR* in,
407 TK* out,
408 const bool add = false,
409 const typename GetTypeReal<TK>::type factor = 1.0) const
410 {
411 this->real2recip_gpu(in,out,add,factor);
412 };
413
414 protected:
415 //gather planes and scatter sticks of all processors
416 template <typename T>
417 void gatherp_scatters(std::complex<T>* in, std::complex<T>* out) const;
418
419 // gather sticks of and scatter planes of all processors
420 template <typename T>
421 void gathers_scatterp(std::complex<T>* in, std::complex<T>* out) const;
422
423 public:
424 //get fftixy2is;
425 void getfftixy2is(int * fftixy2is) const;
426
430 // using default_device_cpu = base_device::DEVICE_CPU;
431
432 void set_device(std::string device_);
433 void set_precision(std::string precision_);
434
435 std::string get_device() const { return device; }
436 std::string get_precision() const { return precision; }
437
438protected:
439
440 std::string device = "cpu";
441 std::string precision = "double";
442 bool double_data_ = true;
443 bool float_data_ = false;
444};
445}
446#endif // PWBASIS_H
447#include "pw_basis_sup.h"
448#include "pw_basis_big.h" //temporary it will be removed
3x3 matrix and related mathamatical operations
Definition matrix3.h:19
3 elements vector
Definition vector3.h:22
Definition fft_bundle.h:11
A class which can convert a function of "r" to the corresponding linear superposition of plane waves ...
Definition pw_basis.h:56
void recip2real_gpu(const std::complex< FPTYPE > *in, std::complex< FPTYPE > *out, const bool add=false, const FPTYPE factor=1.0) const
int nrxx
Definition pw_basis.h:120
void getfftixy2is(int *fftixy2is) const
Definition pw_basis.cpp:274
int nst
Definition pw_basis.h:111
int nmaxgr
Definition pw_basis.h:245
void setfullpw(const bool inpt_full_pw=false, const int inpt_full_pw_dim=0)
Definition pw_init.cpp:246
int nstot
Definition pw_basis.h:114
double omega
volume of the cell
Definition pw_basis.h:178
void real2recip(const FPTYPE *in, std::complex< FPTYPE > *out, const bool add=false, const FPTYPE factor=1.0) const
transform real space to reciprocal space
Definition pw_transform.cpp:79
void real_to_recip(TR *in, TK *out, const bool add=false, const typename GetTypeReal< TK >::type factor=1.0) const
Converts data from real space to reciprocal space (Fourier space).
Definition pw_basis.h:394
int riy
Definition pw_basis.h:240
int lix
Definition pw_basis.h:241
bool xprime
Definition pw_basis.h:242
int nx
Definition pw_basis.h:239
int * numz
Definition pw_basis.h:122
std::string precision
single, double, mixing
Definition pw_basis.h:441
void set_device(std::string device_)
Definition pw_basis.cpp:296
ModuleBase::Matrix3 GT
traspose of G
Definition pw_basis.h:176
int nxy
Definition pw_basis.h:239
double tpiba
2pi/lat0
Definition pw_basis.h:172
int * ig2ixyz_gpu
Definition pw_basis.h:107
int liy
Definition pw_basis.h:240
int poolnproc
Definition pw_basis.h:182
void gatherp_scatters(std::complex< T > *in, std::complex< T > *out) const
gather planes and scatter sticks
Definition pw_gatherscatter.h:15
int * startr
Definition pw_basis.h:126
void distribution_method1()
Definition pw_distributeg_method1.cpp:25
int rix
Definition pw_basis.h:241
int * startg
Definition pw_basis.h:125
bool double_data_
if has double data
Definition pw_basis.h:442
void count_pw_st(int *st_length2D, int *st_bottom2D)
(1) We count the total number of planewaves (tot_npw) and sticks (this->nstot) here.
Definition pw_distributeg.cpp:44
int npw
Definition pw_basis.h:115
bool gamma_only
only half g are used.
Definition pw_basis.h:166
std::string device
cpu or gpu
Definition pw_basis.h:440
double ggecut
Energy cut off for g^2/2 = ecutwfc(Ry)*lat0^2/4pi^2, unit in 1/lat0^2.
Definition pw_basis.h:169
int * d_is2fftixy
Definition pw_basis.h:109
double * gg_uniq
Definition pw_basis.h:160
PW_Basis()
Definition pw_basis.cpp:11
ModuleBase::Vector3< double > * gdirect
Definition pw_basis.h:130
int * startnsz_per
Definition pw_basis.h:141
void initmpi(const int poolnproc_in, const int poolrank_in, MPI_Comm pool_world_in)
Definition pw_init.cpp:7
int * numr
Definition pw_basis.h:124
void divide_sticks_1(int *st_i, int *st_j, int *st_length)
(3-1) Distribute sticks to cores according to the number of plane waves.
Definition pw_distributeg_method1.cpp:261
int * istot2ixy
Definition pw_basis.h:108
void create_maps(int *st_length2D)
(3) Create the maps from ixy to ip, istot, and from istot to ixy, and construt npw_per simultaneously...
Definition pw_distributeg_method2.cpp:142
int fftnxy
Definition pw_basis.h:238
int ny
Definition pw_basis.h:239
int fftny
Definition pw_basis.h:238
void divide_sticks_2()
(2) Devide the sticks to each core according to the number of sticks Sticks are in the order of ixy i...
Definition pw_distributeg_method2.cpp:120
int fftnz
Definition pw_basis.h:238
void distribution_method2()
Definition pw_distributeg_method2.cpp:25
void set_precision(std::string precision_)
Definition pw_basis.cpp:300
void get_ig2isz_is2fftixy(int *st_bottom, int *st_length)
(5) Construct ig2isz, and is2fftixy.
Definition pw_distributeg.cpp:156
int startz_current
Definition pw_basis.h:127
virtual void distribute_r()
distribute real-space grids to different processors
Definition pw_distributer.cpp:11
virtual void initgrids(const double lat0_in, const ModuleBase::Matrix3 latvec_in, const double gridecut)
Definition pw_init.cpp:23
void distribute_g()
distribute plane waves to different cores
Definition pw_distributeg.cpp:13
double lat0
unit length for lattice, unit in bohr
Definition pw_basis.h:171
int poolrank
Definition pw_basis.h:183
std::string classname
Definition pw_basis.h:59
int ngg
Definition pw_basis.h:158
std::string get_precision() const
Definition pw_basis.h:436
void initparameters(const bool gamma_only_in, const double pwecut_in, const int distribution_type_in=1, const bool xprime_in=true)
Definition pw_init.cpp:213
int * nst_per
Definition pw_basis.h:112
int ng_xeq0
Definition pw_basis.h:244
int fftnx
Definition pw_basis.h:238
int * npw_per
Definition pw_basis.h:116
int nplane
Definition pw_basis.h:128
std::string get_device() const
Definition pw_basis.h:435
void collect_uniqgg()
Definition pw_basis.cpp:193
int full_pw_dim
Definition pw_basis.h:180
void real2recip_gpu(const std::complex< FPTYPE > *in, std::complex< FPTYPE > *out, const bool add=false, const FPTYPE factor=1.0) const
void real2recip_gpu(const FPTYPE *in, std::complex< FPTYPE > *out, const bool add=false, const FPTYPE factor=1.0) const
FFT_Bundle fft_bundle
Definition pw_basis.h:248
int fftnxyz
Definition pw_basis.h:238
ModuleBase::Matrix3 latvec
Unitcell lattice vectors, unit in lat0.
Definition pw_basis.h:174
void recip2real_gpu(const std::complex< FPTYPE > *in, FPTYPE *out, const bool add=false, const FPTYPE factor=1.0) const
MPI_Comm pool_world
Definition pw_basis.h:103
ModuleBase::Matrix3 G
reciprocal lattice vector, unit in 1/lat0
Definition pw_basis.h:175
void recip2real(const std::complex< FPTYPE > *in, FPTYPE *out, const bool add=false, const FPTYPE factor=1.0) const
transform reciprocal space to real space
Definition pw_transform.cpp:205
void gathers_scatterp(std::complex< T > *in, std::complex< T > *out) const
gather sticks and scatter planes
Definition pw_gatherscatter.h:99
int nxyz
Definition pw_basis.h:239
int distribution_type
distribution method
Definition pw_basis.h:179
bool full_pw
Definition pw_basis.h:167
int * numg
Definition pw_basis.h:123
double tpiba2
4pi^2/lat0^2
Definition pw_basis.h:173
virtual ~PW_Basis()
Definition pw_basis.cpp:23
void get_istot2ixy(int *st_i, int *st_j)
(3-2) Rearrange sticks in the order of the ip of core increasing, in each core, sticks are sorted in ...
Definition pw_distributeg_method1.cpp:317
int * ig2igg
Definition pw_basis.h:159
void collect_local_pw()
Definition pw_basis.cpp:135
double * gg
Definition pw_basis.h:132
int * is2fftixy
Definition pw_basis.h:109
int ig_gge0
Definition pw_basis.h:135
int nstnz
Definition pw_basis.h:113
int * startz
Definition pw_basis.h:121
ModuleBase::Matrix3 GGT
GGT = G*GT.
Definition pw_basis.h:177
bool float_data_
if has float data
Definition pw_basis.h:443
int * fftixy2ip
Definition pw_basis.h:110
void collect_st(int *st_length2D, int *st_bottom2D, int *st_i, int *st_j, int *st_length)
(2) Collect the x, y indexs, length of the sticks.
Definition pw_distributeg_method1.cpp:131
double gridecut_lat
Energy cut off for all fft grids = ecutrho(Ry)*lat0^2/4pi^2, unit in 1/lat0^2.
Definition pw_basis.h:170
int npwtot
Definition pw_basis.h:117
int * ig2isz
Definition pw_basis.h:106
int nz
Definition pw_basis.h:239
void setuptransform()
Definition pw_basis.cpp:56
void getstartgr()
Definition pw_basis.cpp:76
ModuleBase::Vector3< double > * gcar
Definition pw_basis.h:131
void recip_to_real(TK *in, TR *out, const bool add=false, const typename GetTypeReal< TK >::type factor=1.0) const
Converts data from reciprocal space to real space on Cpu.
Definition pw_basis.h:322
Definition pw_op.cpp:3
T type
Definition macros.h:8
Definition memory_op.h:77
Definition memory_op.h:17