ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
dftu_force_stress.hpp
Go to the documentation of this file.
1#pragma once
2#include "dftu_lcao.h"
4#include "source_base/timer.h"
5
6namespace hamilt
7{
8
9template <typename TK, typename TR>
10void DFTU<OperatorLCAO<TK, TR>>::cal_force_stress(const bool cal_force,
11 const bool cal_stress,
12 ModuleBase::matrix& force,
13 ModuleBase::matrix& stress)
14{
15 ModuleBase::TITLE("DFTU", "cal_force_stress");
16 if (this->dftu->get_dmr(0) == nullptr)
17 {
18 ModuleBase::WARNING_QUIT("DFTU", "dmr is not set");
19 }
20
21 // try to get the density matrix, if the density matrix is empty, skip the calculation and return
22 const hamilt::HContainer<double>* dmR_tmp[this->nspin];
23 dmR_tmp[0] = this->dftu->get_dmr(0);
24
25 if (this->nspin == 2)
26 {
27 dmR_tmp[1] = this->dftu->get_dmr(1);
28 }
29 if (dmR_tmp[0]->size_atom_pairs() == 0)
30 {
31 return;
32 }
33
34 // begin the calculation of force and stress
35 ModuleBase::timer::tick("DFTU", "cal_force_stress");
36
37 const Parallel_Orbitals* paraV = dmR_tmp[0]->get_paraV();
38 const int npol = this->ucell->get_npol();
39 std::vector<double> stress_tmp(6, 0);
40 if (cal_force)
41 {
42 force.zero_out();
43 }
44 // calculate atom_index for adjs_all, induced by omp parallel
45 int atom_index = 0;
46 std::vector<int> atom_index_all(this->ucell->nat, -1);
47 for (int iat0 = 0; iat0 < this->ucell->nat; iat0++)
48 {
49 int T0=0;
50 int I0=0;
51 ucell->iat2iait(iat0, &I0, &T0);
52 if(this->dftu->orbital_corr[T0] == -1)
53 {
54 continue;
55 }
56 atom_index_all[iat0] = atom_index;
57 atom_index++;
58 }
59
60 // 1. calculate <psi|beta> for each pair of atoms
61 // loop over all on-site atoms
62 #pragma omp parallel
63 {
64 std::vector<double> stress_local(6, 0);
65 ModuleBase::matrix force_local(force.nr, force.nc);
66 #pragma omp for schedule(dynamic)
67 for (int iat0 = 0; iat0 < this->ucell->nat; iat0++)
68 {
69 // skip the atoms without plus-U
70 auto tau0 = ucell->get_tau(iat0);
71 int T0=0;
72 int I0=0;
73 ucell->iat2iait(iat0, &I0, &T0);
74 const int target_L = this->dftu->orbital_corr[T0];
75 if (target_L == -1)
76 {
77 continue;
78 }
79 const int tlp1 = 2 * target_L + 1;
80 AdjacentAtomInfo& adjs = this->adjs_all[atom_index_all[iat0]];
81
82 std::vector<std::unordered_map<int, std::vector<double>>> nlm_tot;
83 nlm_tot.resize(adjs.adj_num + 1);
84
85 for (int ad = 0; ad < adjs.adj_num + 1; ++ad)
86 {
87 const int T1 = adjs.ntype[ad];
88 const int I1 = adjs.natom[ad];
89 const int iat1 = ucell->itia2iat(T1, I1);
90 const ModuleBase::Vector3<double>& tau1 = adjs.adjacent_tau[ad];
91 const Atom* atom1 = &ucell->atoms[T1];
92
93 auto all_indexes = paraV->get_indexes_row(iat1);
94 auto col_indexes = paraV->get_indexes_col(iat1);
95 // insert col_indexes into all_indexes to get universal set with no repeat elements
96 all_indexes.insert(all_indexes.end(), col_indexes.begin(), col_indexes.end());
97 std::sort(all_indexes.begin(), all_indexes.end());
98 all_indexes.erase(std::unique(all_indexes.begin(), all_indexes.end()), all_indexes.end());
99 for (int iw1l = 0; iw1l < all_indexes.size(); iw1l += npol)
100 {
101 const int iw1 = all_indexes[iw1l] / npol;
102 std::vector<std::vector<double>> nlm;
103 // nlm is a vector of vectors, but size of outer vector is only 1 here
104 // If we are calculating force, we need also to store the gradient
105 // and size of outer vector is then 4
106 // inner loop : all projectors (L0,M0)
107 int L1 = atom1->iw2l[iw1];
108 int N1 = atom1->iw2n[iw1];
109 int m1 = atom1->iw2m[iw1];
110
111 // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l)
112 int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2;
113
114 ModuleBase::Vector3<double> dtau = tau0 - tau1;
115 intor_->snap(T1, L1, N1, M1, T0, dtau * this->ucell->lat0, 1 /*cal_deri*/, nlm);
116
117 // select the elements of nlm with target_L
118 std::vector<double> nlm_target(tlp1 * 4);
119 for (int iw = 0; iw < this->ucell->atoms[T0].nw; iw++)
120 {
121 const int L0 = this->ucell->atoms[T0].iw2l[iw];
122 if (L0 == target_L)
123 {
124 for (int m = 0; m < tlp1; m++) //-l, -l+1, ..., l-1, l
125 {
126 for (int n = 0; n < 4; n++) // value, deri_x, deri_y, deri_z
127 {
128 nlm_target[m + n * tlp1] = nlm[n][iw + m];
129 // if(dtau.norm2 == 0.0) std::cout<<__FILE__<<__LINE__<<" "<<m<<" "<<n<<"
130 // "<<(m+n*tlp1)<<" "<<iw+m<<" "<<nlm[n][iw+m]<<" "<<nlm_target[m + n * tlp1] <<
131 // std::endl;
132 }
133 }
134 break;
135 }
136 }
137 nlm_tot[ad].insert({all_indexes[iw1l], nlm_target});
138 }
139 }
140 // first iteration to calculate occupation matrix
141 std::vector<double> occ(tlp1 * tlp1 * this->nspin, 0);
142 if(this->nspin ==2)
143 {
144 for (int i = 0; i < occ.size(); i++)
145 {
146 const int is = i / (tlp1 * tlp1);
147 const int ii = i % (tlp1 * tlp1);
148 occ[i] = this->dftu->locale[iat0][target_L][0][is].c[ii];
149 }
150 }
151 else
152 {
153 for (int i = 0; i < occ.size(); i++)
154 {
155 occ[i] = this->dftu->locale[iat0][target_L][0][0].c[i];
156 }
157 }
158
159 // calculate VU
160 const double u_value = this->dftu->U[T0];
161 std::vector<double> VU(occ.size());
162 double eu_tmp = 0;
163 this->cal_v_of_u(occ, tlp1, u_value, &VU[0], eu_tmp);
164 if(this->nspin == 4)
165 {
166 for (int i = 0; i < VU.size(); i++)
167 {
168 VU[i] /= 2.0;
169 }
170 }
171
172 // second iteration to calculate force and stress
173 // calculate Force for atom J
174 // DMR_{I,J,R'-R} * <phi_{I,R}|chi_m> U*(1/2*delta(m, m')-occ(m, m'))
175 // \frac{\partial <chi_m'|phi_{J,R'}>}{\partial \tau_J} for each pair of <IJR> atoms
176 // calculate Stress for strain tensor \varepsilon_{\alpha\beta}
177 // -1/Omega * DMR_{I,J,R'-R} * [ \frac{\partial <phi_{I,R}|chi_m>}{\partial \tau_{J,\alpha}}\tau_{J,\beta}
178 // U*(1/2*delta(m, m')-occ(m, m'))<chi_m'|phi_{J,R'}>
179 // + <phi_{I,R}|chi_m> U*(1/2*delta(m, m')-occ(m, m'))
180 // \frac{\partial <chi_m'|phi_{J,R'}>}{\partial \tau_{J,\alpha}}\tau_{J,\beta}] for each pair of <IJR> atoms
181 for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1)
182 {
183 const int T1 = adjs.ntype[ad1];
184 const int I1 = adjs.natom[ad1];
185 const int iat1 = ucell->itia2iat(T1, I1);
186 double* force_tmp1 = (cal_force) ? &force_local(iat1, 0) : nullptr;
187 double* force_tmp2 = (cal_force) ? &force_local(iat0, 0) : nullptr;
188 ModuleBase::Vector3<int>& R_index1 = adjs.box[ad1];
189 ModuleBase::Vector3<double> dis1 = adjs.adjacent_tau[ad1] - tau0;
190 for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2)
191 {
192 const int T2 = adjs.ntype[ad2];
193 const int I2 = adjs.natom[ad2];
194 const int iat2 = ucell->itia2iat(T2, I2);
195 ModuleBase::Vector3<int>& R_index2 = adjs.box[ad2];
196 ModuleBase::Vector3<double> dis2 = adjs.adjacent_tau[ad2] - tau0;
197 ModuleBase::Vector3<int> R_vector(R_index2[0] - R_index1[0],
198 R_index2[1] - R_index1[1],
199 R_index2[2] - R_index1[2]);
200 const hamilt::BaseMatrix<double>* tmp[this->nspin];
201 tmp[0] = dmR_tmp[0]->find_matrix(iat1, iat2, R_vector[0], R_vector[1], R_vector[2]);
202 if (this->nspin == 2)
203 {
204 tmp[1] = dmR_tmp[1]->find_matrix(iat1, iat2, R_vector[0], R_vector[1], R_vector[2]);
205 }
206 // if not found , skip this pair of atoms
207 if (tmp[0] != nullptr)
208 {
209 // calculate force
210 if (cal_force) {
211 this->cal_force_IJR(iat1,
212 iat2,
213 paraV,
214 nlm_tot[ad1],
215 nlm_tot[ad2],
216 VU,
217 tmp,
218 this->nspin,
219 force_tmp1,
220 force_tmp2);
221 }
222
223 // calculate stress
224 if (cal_stress) {
225 this->cal_stress_IJR(iat1,
226 iat2,
227 paraV,
228 nlm_tot[ad1],
229 nlm_tot[ad2],
230 VU,
231 tmp,
232 this->nspin,
233 dis1,
234 dis2,
235 stress_local.data());
236 }
237 }
238 }
239 }
240 }
241 #pragma omp critical
242 {
243 if(cal_force)
244 {
245 force += force_local;
246 }
247 if(cal_stress)
248 {
249 for(int i = 0; i < 6; i++)
250 {
251 stress_tmp[i] += stress_local[i];
252 }
253 }
254 }
255 }
256
257 if (cal_force)
258 {
259#ifdef __MPI
260 // sum up the occupation matrix
261 Parallel_Reduce::reduce_all(force.c, force.nr * force.nc);
262#endif
263 for (int i = 0; i < force.nr * force.nc; i++)
264 {
265 force.c[i] *= 2.0;
266 }
267 }
268
269 // stress renormalization
270 if (cal_stress)
271 {
272#ifdef __MPI
273 // sum up the occupation matrix
274 Parallel_Reduce::reduce_all(stress_tmp.data(), 6);
275#endif
276 const double weight = this->ucell->lat0 / this->ucell->omega;
277 for (int i = 0; i < 6; i++)
278 {
279 stress.c[i] = stress_tmp[i] * weight;
280 }
281 stress.c[8] = stress.c[5]; // stress(2,2)
282 stress.c[7] = stress.c[4]; // stress(2,1)
283 stress.c[6] = stress.c[2]; // stress(2,0)
284 stress.c[5] = stress.c[4]; // stress(1,2)
285 stress.c[4] = stress.c[3]; // stress(1,1)
286 stress.c[3] = stress.c[1]; // stress(1,0)
287 }
288
289 ModuleBase::timer::tick("DFTU", "cal_force_stress");
290}
291
292
293template <typename TK, typename TR>
294void DFTU<OperatorLCAO<TK, TR>>::cal_force_IJR(const int& iat1,
295 const int& iat2,
296 const Parallel_Orbitals* paraV,
297 const std::unordered_map<int, std::vector<double>>& nlm1_all,
298 const std::unordered_map<int, std::vector<double>>& nlm2_all,
299 const std::vector<double>& vu_in,
300 const hamilt::BaseMatrix<double>** dmR_pointer,
301 const int nspin,
302 double* force1,
303 double* force2)
304{
305 // npol is the number of polarizations,
306 // 1 for non-magnetic (one Hamiltonian matrix only has spin-up or spin-down),
307 // 2 for magnetic (one Hamiltonian matrix has both spin-up and spin-down)
308 const int npol = this->ucell->get_npol();
309 // ---------------------------------------------
310 // calculate the Nonlocal matrix for each pair of orbitals
311 // ---------------------------------------------
312 auto row_indexes = paraV->get_indexes_row(iat1);
313 auto col_indexes = paraV->get_indexes_col(iat2);
314 const int m_size = int(sqrt(vu_in.size() / nspin));
315 const int m_size2 = m_size * m_size;
316
317 // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4
318 std::vector<int> step_trace(npol * npol, 0);
319
320 if (npol == 2)
321 {
322 step_trace[1] = 1;
323 step_trace[2] = col_indexes.size();
324 step_trace[3] = col_indexes.size() + 1;
325 }
326
327 double tmp[3];
328 // calculate the local matrix
329 for (int is = 0; is < nspin; is++)
330 {
331 const int is0 = nspin==2 ? is : 0;
332 const int step_is = nspin==4 ? is : 0;
333 const double* dm_pointer = dmR_pointer[is0]->get_pointer();
334 for (int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol)
335 {
336 const std::vector<double>& nlm1 = nlm1_all.find(row_indexes[iw1l])->second;
337 for (int iw2l = 0; iw2l < col_indexes.size(); iw2l += npol)
338 {
339 const std::vector<double>& nlm2 = nlm2_all.find(col_indexes[iw2l])->second;
340#ifdef __DEBUG
341 assert(nlm1.size() == nlm2.size());
342#endif
343 for (int m1 = 0; m1 < m_size; m1++)
344 {
345 for (int m2 = 0; m2 < m_size; m2++)
346 {
347 tmp[0] = vu_in[m1 * m_size + m2 + is * m_size2] * nlm1[m1 + m_size]
348 * nlm2[m2] * dm_pointer[step_trace[step_is]];
349 tmp[1] = vu_in[m1 * m_size + m2 + is * m_size2] * nlm1[m1 + m_size * 2]
350 * nlm2[m2] * dm_pointer[step_trace[step_is]];
351 tmp[2] = vu_in[m1 * m_size + m2 + is * m_size2] * nlm1[m1 + m_size * 3]
352 * nlm2[m2] * dm_pointer[step_trace[step_is]];
353 // force1 = - VU * <d phi_{I,R1}/d R1|chi_m> * <chi_m'|phi_{J,R2}>
354 // force2 = - VU * <phi_{I,R1}|d chi_m/d R0> * <chi_m'|phi_{J,R2>}
355 force1[0] += tmp[0];
356 force1[1] += tmp[1];
357 force1[2] += tmp[2];
358 force2[0] -= tmp[0];
359 force2[1] -= tmp[1];
360 force2[2] -= tmp[2];
361 }
362 }
363 dm_pointer += npol;
364 }
365 dm_pointer += (npol - 1) * col_indexes.size();
366 }
367 }
368}
369
370template <typename TK, typename TR>
371void DFTU<OperatorLCAO<TK, TR>>::cal_stress_IJR(const int& iat1,
372 const int& iat2,
373 const Parallel_Orbitals* paraV,
374 const std::unordered_map<int, std::vector<double>>& nlm1_all,
375 const std::unordered_map<int, std::vector<double>>& nlm2_all,
376 const std::vector<double>& vu_in,
377 const hamilt::BaseMatrix<double>** dmR_pointer,
378 const int nspin,
379 const ModuleBase::Vector3<double>& dis1,
380 const ModuleBase::Vector3<double>& dis2,
381 double* stress)
382{
383 // npol is the number of polarizations,
384 // 1 for non-magnetic (one Hamiltonian matrix only has spin-up or spin-down),
385 // 2 for magnetic (one Hamiltonian matrix has both spin-up and spin-down)
386 const int npol = this->ucell->get_npol();
387 // ---------------------------------------------
388 // calculate the Nonlocal matrix for each pair of orbitals
389 // ---------------------------------------------
390 auto row_indexes = paraV->get_indexes_row(iat1);
391 auto col_indexes = paraV->get_indexes_col(iat2);
392 const int m_size = int(sqrt(vu_in.size() / nspin));
393 const int m_size2 = m_size * m_size;
394
395 // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4
396 std::vector<int> step_trace(npol * npol, 0);
397
398 if (npol == 2)
399 {
400 step_trace[1] = 1;
401 step_trace[2] = col_indexes.size();
402 step_trace[3] = col_indexes.size() + 1;
403 }
404
405 // calculate the local matrix
406 for (int is = 0; is < nspin; is++)
407 {
408 const int is0 = nspin==2 ? is : 0;
409 const int step_is = nspin==4 ? is : 0;
410 const double* dm_pointer = dmR_pointer[is0]->get_pointer();
411 for (int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol)
412 {
413 const std::vector<double>& nlm1 = nlm1_all.find(row_indexes[iw1l])->second;
414 for (int iw2l = 0; iw2l < col_indexes.size(); iw2l += npol)
415 {
416 const std::vector<double>& nlm2 = nlm2_all.find(col_indexes[iw2l])->second;
417#ifdef __DEBUG
418 assert(nlm1.size() == nlm2.size());
419#endif
420 for (int m1 = 0; m1 < m_size; m1++)
421 {
422 for (int m2 = 0; m2 < m_size; m2++)
423 {
424 double tmp = vu_in[m1 * m_size + m2 + is * m_size2] * dm_pointer[step_trace[step_is]];
425 // std::cout<<__FILE__<<__LINE__<<" "<<tmp<<" "<<m1<<" "<<m2<<" "<<nlm1[m1 + m_size * 2]<<"
426 // "<<nlm2[m2 + m_size * 2]<<" "<<dis1.y<<" "<<dis2.y<<std::endl;
427 stress[0] += tmp * (nlm1[m1 + m_size] * dis1.x * nlm2[m2]
428 + nlm1[m1] * nlm2[m2 + m_size] * dis2.x);
429 stress[1] += tmp * (nlm1[m1 + m_size] * dis1.y * nlm2[m2]
430 + nlm1[m1] * nlm2[m2 + m_size] * dis2.y);
431 stress[2] += tmp * (nlm1[m1 + m_size] * dis1.z * nlm2[m2]
432 + nlm1[m1] * nlm2[m2 + m_size] * dis2.z);
433
434 stress[3] += tmp * (nlm1[m1 + m_size * 2] * dis1.y * nlm2[m2]
435 + nlm1[m1] * nlm2[m2 + m_size * 2] * dis2.y);
436 stress[4] += tmp * (nlm1[m1 + m_size * 2] * dis1.z * nlm2[m2]
437 + nlm1[m1] * nlm2[m2 + m_size * 2] * dis2.z);
438 stress[5] += tmp * (nlm1[m1 + m_size * 3] * dis1.z * nlm2[m2]
439 + nlm1[m1] * nlm2[m2 + m_size * 3] * dis2.z);
440 }
441 }
442 dm_pointer += npol;
443 }
444 dm_pointer += (npol - 1) * col_indexes.size();
445 }
446 }
447}
448
449} // namespace hamilt
Definition sltk_grid_driver.h:20
int adj_num
Definition sltk_grid_driver.h:25
std::vector< ModuleBase::Vector3< double > > adjacent_tau
Definition sltk_grid_driver.h:28
std::vector< ModuleBase::Vector3< int > > box
Definition sltk_grid_driver.h:29
std::vector< int > ntype
Definition sltk_grid_driver.h:26
std::vector< int > natom
Definition sltk_grid_driver.h:27
Definition atom_spec.h:7
std::vector< int > iw2m
Definition atom_spec.h:18
std::vector< int > iw2l
Definition atom_spec.h:20
std::vector< int > iw2n
Definition atom_spec.h:19
3 elements vector
Definition vector3.h:22
T x
Definition vector3.h:24
T y
Definition vector3.h:25
T z
Definition vector3.h:26
Definition matrix.h:19
double * c
Definition matrix.h:25
static void tick(const std::string &class_name_in, const std::string &name_in)
Use twice at a time: the first time, set start_flag to false; the second time, calculate the time dur...
Definition timer.cpp:57
std::vector< std::vector< std::vector< std::vector< ModuleBase::matrix > > > > locale
Definition dftu.h:136
std::vector< double > U
Definition dftu.h:55
std::vector< int > orbital_corr
Definition dftu.h:57
Definition parallel_orbitals.h:9
std::vector< int > get_indexes_col() const
Definition parallel_orbitals.cpp:154
std::vector< int > get_indexes_row() const
gather global indexes of orbitals in this processor get_indexes_row() : global indexes (~NLOCAL) of r...
Definition parallel_orbitals.cpp:140
Definition base_matrix.h:20
T * get_pointer() const
get pointer of value from a submatrix
Definition base_matrix.h:86
Definition dftu.hpp:13
Definition hcontainer.h:144
BaseMatrix< T > * find_matrix(int i, int j, int rx, int ry, int rz)
find BaseMatrix with atom index atom_i and atom_j and R index (rx, ry, rz) This interface can be used...
Definition hcontainer.cpp:261
const Parallel_Orbitals * get_paraV() const
get parallel orbital pointer to check parallel information
Definition hcontainer.h:192
void WARNING_QUIT(const std::string &, const std::string &)
Combine the functions of WARNING and QUIT.
Definition test_delley.cpp:14
void TITLE(const std::string &class_name, const std::string &function_name, const bool disable)
Definition tool_title.cpp:18
void reduce_all(T &object)
reduce in all process
Definition depend_mock.cpp:14
Definition hamilt.h:12