ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
pw_gatherscatter.h
Go to the documentation of this file.
1#include "pw_basis.h"
3#include "source_base/timer.h"
4#include "typeinfo"
5namespace ModulePW
6{
14template <typename T>
15void PW_Basis::gatherp_scatters(std::complex<T>* in, std::complex<T>* out) const
16{
17 //ModuleBase::timer::tick(this->classname, "gatherp_scatters");
18
19 if(this->poolnproc == 1) //In this case nst=nstot, nz = nplane,
20 {
21#ifdef _OPENMP
22#pragma omp parallel for
23#endif
24 for(int is = 0 ; is < this->nst ; ++is)
25 {
26 int ixy = this->istot2ixy[is];
27 //int ixy = (ixy / fftny)*ny + ixy % fftny;
28 std::complex<T> *outp = &out[is*nz];
29 std::complex<T> *inp = &in[ixy*nz];
30 for(int iz = 0 ; iz < this->nz ; ++iz)
31 {
32 outp[iz] = inp[iz];
33 }
34 }
35 //ModuleBase::timer::tick(this->classname, "gatherp_scatters");
36 return;
37 }
38#ifdef __MPI
39 //change (nplane fftnxy) to (nplane,nstot)
40 // Hence, we can send them at one time.
41#ifdef _OPENMP
42#pragma omp parallel for
43#endif
44 for (int istot = 0;istot < nstot; ++istot)
45 {
46 int ixy = this->istot2ixy[istot];
47 //int ixy = (ixy / fftny)*ny + ixy % fftny;
48 std::complex<T> *outp = &out[istot*nplane];
49 std::complex<T> *inp = &in[ixy*nplane];
50 for (int iz = 0; iz < nplane; ++iz)
51 {
52 outp[iz] = inp[iz];
53 }
54 }
55
56 //exchange data
57 //(nplane,nstot) to (numz[ip],ns, poolnproc)
58 if(typeid(T) == typeid(double))
59 {
60 MPI_Alltoallv(out, numr, startr, MPI_DOUBLE_COMPLEX, in, numg, startg, MPI_DOUBLE_COMPLEX, this->pool_world);
61 }
62 else if(typeid(T) == typeid(float))
63 {
64 MPI_Alltoallv(out, numr, startr, MPI_COMPLEX, in, numg, startg, MPI_COMPLEX, this->pool_world);
65 }
66
67 // change (nz,ns) to (numz[ip],ns, poolnproc)
68#ifdef _OPENMP
69#pragma omp parallel for collapse(2)
70#endif
71 for (int ip = 0; ip < this->poolnproc ;++ip)
72 {
73 for (int is = 0; is < this->nst; ++is)
74 {
75 int nzip = this->numz[ip];
76 std::complex<T> *outp0 = &out[startz[ip]];
77 std::complex<T> *inp0 = &in[startg[ip]];
78 std::complex<T> *outp = &outp0[is * nz];
79 std::complex<T> *inp = &inp0[is * nzip ];
80 for (int izip = 0; izip < nzip; ++izip)
81 {
82 outp[izip] = inp[izip];
83 }
84 }
85 }
86#endif
87 //ModuleBase::timer::tick(this->classname, "gatherp_scatters");
88 return;
89}
90
98template <typename T>
99void PW_Basis::gathers_scatterp(std::complex<T>* in, std::complex<T>* out) const
100{
101 // ModuleBase::timer::tick(this->classname, "gathers_scatterp");
102 if(this->poolnproc == 1) //In this case nrxx=fftnx*fftny*nz, nst = nstot,
103 {
104#ifdef _OPENMP
105#pragma omp parallel for schedule(static, 4096/sizeof(T))
106#endif
107 for(int i = 0; i < this->nrxx; ++i)
108 {
109 out[i] = std::complex<T>(0, 0);
110 }
111
112#ifdef _OPENMP
113#pragma omp parallel for
114#endif
115 for(int is = 0 ; is < this->nst ; ++is)
116 {
117 int ixy = istot2ixy[is];
118 //int ixy = (ixy / fftny)*ny + ixy % fftny;
119 std::complex<T> *outp = &out[ixy*nz];
120 std::complex<T> *inp = &in[is*nz];
121 for(int iz = 0 ; iz < this->nz ; ++iz)
122 {
123 outp[iz] = inp[iz];
124 }
125 }
126 // ModuleBase::timer::tick(this->classname, "gathers_scatterp");
127 return;
128 }
129#ifdef __MPI
130 // change (nz,ns) to (numz[ip],ns, poolnproc)
131 // Hence, we can send them at one time.
132#ifdef _OPENMP
133#pragma omp parallel for collapse(2)
134#endif
135 for (int ip = 0; ip < this->poolnproc ;++ip)
136 {
137 for (int is = 0; is < this->nst; ++is)
138 {
139 int nzip = this->numz[ip];
140 std::complex<T> *outp0 = &out[startg[ip]];
141 std::complex<T> *inp0 = &in[startz[ip]];
142 std::complex<T> *outp = &outp0[is * nzip];
143 std::complex<T> *inp = &inp0[is * nz ];
144 for (int izip = 0; izip < nzip; ++izip)
145 {
146 outp[izip] = inp[izip];
147 }
148 }
149 }
150
151 //exchange data
152 //(numz[ip],ns, poolnproc) to (nplane,nstot)
153 if(typeid(T) == typeid(double))
154 {
155 MPI_Alltoallv(out, numg, startg, MPI_DOUBLE_COMPLEX, in, numr, startr, MPI_DOUBLE_COMPLEX, this->pool_world);
156 }
157 else if(typeid(T) == typeid(float))
158 {
159 MPI_Alltoallv(out, numg, startg, MPI_COMPLEX, in, numr, startr, MPI_COMPLEX, this->pool_world);
160 }
161
162#ifdef _OPENMP
163#pragma omp parallel for schedule(static, 4096/sizeof(T))
164#endif
165 for(int i = 0; i < this->nrxx; ++i)
166 {
167 out[i] = std::complex<T>(0, 0);
168 }
169 //change (nplane,nstot) to (nplane fftnxy)
170#ifdef _OPENMP
171#pragma omp parallel for
172#endif
173 for (int istot = 0;istot < nstot; ++istot)
174 {
175 int ixy = this->istot2ixy[istot];
176 //int ixy = (ixy / fftny)*ny + ixy % fftny;
177 std::complex<T> *outp = &out[ixy * nplane];
178 std::complex<T> *inp = &in[istot * nplane];
179 for (int iz = 0; iz < nplane; ++iz)
180 {
181 outp[iz] = inp[iz];
182 }
183 }
184#endif
185 // ModuleBase::timer::tick(this->classname, "gathers_scatterp");
186 return;
187}
188
189
190
191}
int nrxx
Definition pw_basis.h:120
int nst
Definition pw_basis.h:111
int nstot
Definition pw_basis.h:114
int * numz
Definition pw_basis.h:122
int poolnproc
Definition pw_basis.h:182
void gatherp_scatters(std::complex< T > *in, std::complex< T > *out) const
gather planes and scatter sticks
Definition pw_gatherscatter.h:15
int * startr
Definition pw_basis.h:126
int * startg
Definition pw_basis.h:125
int * numr
Definition pw_basis.h:124
int * istot2ixy
Definition pw_basis.h:108
int nplane
Definition pw_basis.h:128
MPI_Comm pool_world
Definition pw_basis.h:103
void gathers_scatterp(std::complex< T > *in, std::complex< T > *out) const
gather sticks and scatter planes
Definition pw_gatherscatter.h:99
int * numg
Definition pw_basis.h:123
int * startz
Definition pw_basis.h:121
int nz
Definition pw_basis.h:239
#define T
Definition exp.cpp:237
Definition pw_op.cpp:3