ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
phi_operator_gpu.h
Go to the documentation of this file.
1#pragma once
2#include <memory>
3#include <cuda_runtime.h>
4
6#include "gint_gpu_vars.h"
7#include "cuda_mem_wrapper.h"
8
9namespace ModuleGint
10{
11
12template<typename Real = double>
14{
15
16public:
17 PhiOperatorGpu(std::shared_ptr<const GintGpuVars> gint_gpu_vars, cudaStream_t stream = 0);
19
20 void set_bgrid_batch(std::shared_ptr<BatchBigGrid> bgrid_batch);
21
22 void set_phi(Real* phi_d) const;
23
24 // These remain double-only (for force/stress paths)
25 void set_phi_dphi(double* phi_d, double* dphi_x_d, double* dphi_y_d, double* dphi_z_d) const;
26
27 void set_ddphi(double* ddphi_xx_d, double* ddphi_xy_d, double* ddphi_xz_d,
28 double* ddphi_yy_d, double* ddphi_yz_d, double* ddphi_zz_d) const;
29
31 const Real* vl_d,
32 const Real dr3,
33 const Real* phi_d,
34 Real* result_d) const;
35
36 // All GEMM accumulators (hr in phi_mul_phi, phi_dm in phi_mul_dm) are
37 // double-typed regardless of Real: when Real=float the multiplies stay in
38 // fp32 (cheap) but per-block reductions and device-side atomicAdd run in
39 // fp64 so the global reductions don't drift.
41 const Real* phi_d,
42 const Real* phi_vldr3_d,
43 HContainer<double>& hRGint,
44 double* hr_d) const;
45
47 const Real* phi_d,
48 const Real* dm_d,
49 const HContainer<Real>& dm,
50 const bool is_symm,
51 double* phi_dm_d);
52
53 // phi_j_d is the output of phi_mul_dm and therefore always double.
55 const Real* phi_i_d,
56 const double* phi_j_d,
57 double* rho_d) const;
58
59 // These remain double-only (for force/stress paths)
61 const double* phi_d,
62 const double* dphi_x_d,
63 const double* dphi_y_d,
64 const double* dphi_z_d,
65 double* fvl_d) const;
66
68 const double* phi_d,
69 const double* dphi_x_d,
70 const double* dphi_y_d,
71 const double* dphi_z_d,
72 double* svl_d) const;
73
74private:
75 std::shared_ptr<BatchBigGrid> bgrid_batch_;
76 std::shared_ptr<const GintGpuVars> gint_gpu_vars_;
77
78 // the number of meshgrids on a biggrid
80
82
83 cudaStream_t stream_ = 0;
84 cudaEvent_t event_;
85
86 // The first number in every group of two represents the number of atoms on that bigcell.
87 // The second number represents the cumulative number of atoms up to that bigcell.
89
90 // the iat of each atom
92
93 // atoms_bgrids_rcoords_ here represents the relative coordinates from the big grid to the atoms
95
96 // the start index of the phi array for each atom
98 // The length of phi for a single meshgrid on each big grid.
100 // The start index of the phi array for each big grid.
102 // Mapping of the index of meshgrid in the batch of biggrids to the index of meshgrid in the local cell
104
113 // Single C-pointer buffer: both phi_mul_phi (output hr) and phi_mul_dm
114 // (output phi_dm) write into double* accumulators, so a single shared
115 // gemm_C_ device buffer can serve both call sites.
118};
119
120}
Definition cuda_mem_wrapper.h:8
Definition phi_operator_gpu.h:14
std::shared_ptr< BatchBigGrid > bgrid_batch_
Definition phi_operator_gpu.h:75
CudaMemWrapper< int > gemm_ldb_
Definition phi_operator_gpu.h:109
CudaMemWrapper< int > atoms_iat_
Definition phi_operator_gpu.h:91
CudaMemWrapper< int > gemm_m_
Definition phi_operator_gpu.h:105
PhiOperatorGpu(std::shared_ptr< const GintGpuVars > gint_gpu_vars, cudaStream_t stream=0)
void set_phi(Real *phi_d) const
CudaMemWrapper< int > gemm_ldc_
Definition phi_operator_gpu.h:110
std::shared_ptr< const GintGpuVars > gint_gpu_vars_
Definition phi_operator_gpu.h:76
void phi_dot_dphi_r(const double *phi_d, const double *dphi_x_d, const double *dphi_y_d, const double *dphi_z_d, double *svl_d) const
void phi_mul_dm(const Real *phi_d, const Real *dm_d, const HContainer< Real > &dm, const bool is_symm, double *phi_dm_d)
CudaMemWrapper< int > gemm_k_
Definition phi_operator_gpu.h:107
CudaMemWrapper< double3 > atoms_bgrids_rcoords_
Definition phi_operator_gpu.h:94
void phi_mul_vldr3(const Real *vl_d, const Real dr3, const Real *phi_d, Real *result_d) const
int phi_len_
Definition phi_operator_gpu.h:81
void set_phi_dphi(double *phi_d, double *dphi_x_d, double *dphi_y_d, double *dphi_z_d) const
CudaMemWrapper< Real > gemm_alpha_
Definition phi_operator_gpu.h:117
void phi_mul_phi(const Real *phi_d, const Real *phi_vldr3_d, HContainer< double > &hRGint, double *hr_d) const
cudaStream_t stream_
Definition phi_operator_gpu.h:83
CudaMemWrapper< int > gemm_n_
Definition phi_operator_gpu.h:106
cudaEvent_t event_
Definition phi_operator_gpu.h:84
CudaMemWrapper< int > batch_mgrid_lidx_
Definition phi_operator_gpu.h:103
int mgrids_num_
Definition phi_operator_gpu.h:79
void phi_dot_dphi(const double *phi_d, const double *dphi_x_d, const double *dphi_y_d, const double *dphi_z_d, double *fvl_d) const
CudaMemWrapper< int > gemm_lda_
Definition phi_operator_gpu.h:108
CudaMemWrapper< double * > gemm_C_
Definition phi_operator_gpu.h:116
CudaMemWrapper< int > bgrid_phi_len_
Definition phi_operator_gpu.h:99
CudaMemWrapper< const Real * > gemm_B_
Definition phi_operator_gpu.h:112
CudaMemWrapper< int2 > atoms_num_info_
Definition phi_operator_gpu.h:88
CudaMemWrapper< int > atom_phi_start_
Definition phi_operator_gpu.h:97
CudaMemWrapper< const Real * > gemm_A_
Definition phi_operator_gpu.h:111
void phi_dot_phi(const Real *phi_i_d, const double *phi_j_d, double *rho_d) const
CudaMemWrapper< int > bgrid_phi_start_
Definition phi_operator_gpu.h:101
void set_bgrid_batch(std::shared_ptr< BatchBigGrid > bgrid_batch)
void set_ddphi(double *ddphi_xx_d, double *ddphi_xy_d, double *ddphi_xz_d, double *ddphi_yy_d, double *ddphi_yz_d, double *ddphi_zz_d) const
Definition hcontainer.h:144
Definition batch_biggrid.cpp:4