abacus-develop/phi__operator__gpu_8h_source.html

#pragma once

#include <memory>

#include <cuda_runtime.h>


#include "source_lcao/module_gint/batch_biggrid.h"

#include "gint_gpu_vars.h"

#include "cuda_mem_wrapper.h"


namespace ModuleGint

{


template<typename Real = double>


class PhiOperatorGpu

{


public:

    PhiOperatorGpu(std::shared_ptr<const GintGpuVars> gint_gpu_vars, cudaStream_t stream = 0);

    ~PhiOperatorGpu();


    void set_bgrid_batch(std::shared_ptr<BatchBigGrid> bgrid_batch);


    void set_phi(Real* phi_d) const;


    // These remain double-only (for force/stress paths)

    void set_phi_dphi(double* phi_d, double* dphi_x_d, double* dphi_y_d, double* dphi_z_d) const;


    void set_ddphi(double* ddphi_xx_d, double* ddphi_xy_d, double* ddphi_xz_d,

                   double* ddphi_yy_d, double* ddphi_yz_d, double* ddphi_zz_d) const;


    void phi_mul_vldr3(

        const Real* vl_d,

        const Real dr3,

        const Real* phi_d,

        Real* result_d) const;


    // All GEMM accumulators (hr in phi_mul_phi, phi_dm in phi_mul_dm) are

    // double-typed regardless of Real: when Real=float the multiplies stay in

    // fp32 (cheap) but per-block reductions and device-side atomicAdd run in

    // fp64 so the global reductions don't drift.

    void phi_mul_phi(

        const Real* phi_d,

        const Real* phi_vldr3_d,

        HContainer<double>& hRGint,

        double* hr_d) const;


    void phi_mul_dm(

        const Real* phi_d,

        const Real* dm_d,

        const HContainer<Real>& dm,

        const bool is_symm,

        double* phi_dm_d);


    // phi_j_d is the output of phi_mul_dm and therefore always double.

    void phi_dot_phi(

        const Real* phi_i_d,

        const double* phi_j_d,

        double* rho_d) const;


    // These remain double-only (for force/stress paths)

    void phi_dot_dphi(

        const double* phi_d,

        const double* dphi_x_d,

        const double* dphi_y_d,

        const double* dphi_z_d,

        double* fvl_d) const;


    void phi_dot_dphi_r(

        const double* phi_d,

        const double* dphi_x_d,

        const double* dphi_y_d,

        const double* dphi_z_d,

        double* svl_d) const;


private:

    std::shared_ptr<BatchBigGrid> bgrid_batch_;

    std::shared_ptr<const GintGpuVars> gint_gpu_vars_;


    // the number of meshgrids on a biggrid

    int mgrids_num_;


    int phi_len_;


    cudaStream_t stream_ = 0;

    cudaEvent_t event_;


    // The first number in every group of two represents the number of atoms on that bigcell.

    // The second number represents the cumulative number of atoms up to that bigcell.

    CudaMemWrapper<int2> atoms_num_info_;


    // the iat of each atom

    CudaMemWrapper<int> atoms_iat_;


    // atoms_bgrids_rcoords_ here represents the relative coordinates from the big grid to the atoms

    CudaMemWrapper<double3> atoms_bgrids_rcoords_;


    // the start index of the phi array for each atom

    CudaMemWrapper<int> atom_phi_start_;

    // The length of phi for a single meshgrid on each big grid.

    CudaMemWrapper<int> bgrid_phi_len_;

    // The start index of the phi array for each big grid.

    CudaMemWrapper<int> bgrid_phi_start_;

    // Mapping of the index of meshgrid in the batch of biggrids to the index of meshgrid in the local cell

    CudaMemWrapper<int> batch_mgrid_lidx_;


    mutable CudaMemWrapper<int> gemm_m_;

    mutable CudaMemWrapper<int> gemm_n_;

    mutable CudaMemWrapper<int> gemm_k_;

    mutable CudaMemWrapper<int> gemm_lda_;

    mutable CudaMemWrapper<int> gemm_ldb_;

    mutable CudaMemWrapper<int> gemm_ldc_;

    mutable CudaMemWrapper<const Real*> gemm_A_;

    mutable CudaMemWrapper<const Real*> gemm_B_;

    // Single C-pointer buffer: both phi_mul_phi (output hr) and phi_mul_dm

    // (output phi_dm) write into double* accumulators, so a single shared

    // gemm_C_ device buffer can serve both call sites.

    mutable CudaMemWrapper<double*> gemm_C_;

    mutable CudaMemWrapper<Real> gemm_alpha_;

};


}

batch_biggrid.h

CudaMemWrapper
Definition cuda_mem_wrapper.h:8

ModuleGint::PhiOperatorGpu
Definition phi_operator_gpu.h:14

ModuleGint::PhiOperatorGpu::bgrid_batch_
std::shared_ptr< BatchBigGrid > bgrid_batch_
Definition phi_operator_gpu.h:75

ModuleGint::PhiOperatorGpu::gemm_ldb_
CudaMemWrapper< int > gemm_ldb_
Definition phi_operator_gpu.h:109

ModuleGint::PhiOperatorGpu::atoms_iat_
CudaMemWrapper< int > atoms_iat_
Definition phi_operator_gpu.h:91

ModuleGint::PhiOperatorGpu::gemm_m_
CudaMemWrapper< int > gemm_m_
Definition phi_operator_gpu.h:105

ModuleGint::PhiOperatorGpu::PhiOperatorGpu
PhiOperatorGpu(std::shared_ptr< const GintGpuVars > gint_gpu_vars, cudaStream_t stream=0)

ModuleGint::PhiOperatorGpu::set_phi
void set_phi(Real *phi_d) const

ModuleGint::PhiOperatorGpu::gemm_ldc_
CudaMemWrapper< int > gemm_ldc_
Definition phi_operator_gpu.h:110

ModuleGint::PhiOperatorGpu::gint_gpu_vars_
std::shared_ptr< const GintGpuVars > gint_gpu_vars_
Definition phi_operator_gpu.h:76

ModuleGint::PhiOperatorGpu::phi_dot_dphi_r
void phi_dot_dphi_r(const double *phi_d, const double *dphi_x_d, const double *dphi_y_d, const double *dphi_z_d, double *svl_d) const

ModuleGint::PhiOperatorGpu::phi_mul_dm
void phi_mul_dm(const Real *phi_d, const Real *dm_d, const HContainer< Real > &dm, const bool is_symm, double *phi_dm_d)

ModuleGint::PhiOperatorGpu::gemm_k_
CudaMemWrapper< int > gemm_k_
Definition phi_operator_gpu.h:107

ModuleGint::PhiOperatorGpu::atoms_bgrids_rcoords_
CudaMemWrapper< double3 > atoms_bgrids_rcoords_
Definition phi_operator_gpu.h:94

ModuleGint::PhiOperatorGpu::phi_mul_vldr3
void phi_mul_vldr3(const Real *vl_d, const Real dr3, const Real *phi_d, Real *result_d) const

ModuleGint::PhiOperatorGpu::phi_len_
int phi_len_
Definition phi_operator_gpu.h:81

ModuleGint::PhiOperatorGpu::set_phi_dphi
void set_phi_dphi(double *phi_d, double *dphi_x_d, double *dphi_y_d, double *dphi_z_d) const

ModuleGint::PhiOperatorGpu::gemm_alpha_
CudaMemWrapper< Real > gemm_alpha_
Definition phi_operator_gpu.h:117

ModuleGint::PhiOperatorGpu::phi_mul_phi
void phi_mul_phi(const Real *phi_d, const Real *phi_vldr3_d, HContainer< double > &hRGint, double *hr_d) const

ModuleGint::PhiOperatorGpu::stream_
cudaStream_t stream_
Definition phi_operator_gpu.h:83

ModuleGint::PhiOperatorGpu::gemm_n_
CudaMemWrapper< int > gemm_n_
Definition phi_operator_gpu.h:106

ModuleGint::PhiOperatorGpu::event_
cudaEvent_t event_
Definition phi_operator_gpu.h:84

ModuleGint::PhiOperatorGpu::batch_mgrid_lidx_
CudaMemWrapper< int > batch_mgrid_lidx_
Definition phi_operator_gpu.h:103

ModuleGint::PhiOperatorGpu::mgrids_num_
int mgrids_num_
Definition phi_operator_gpu.h:79

ModuleGint::PhiOperatorGpu::phi_dot_dphi
void phi_dot_dphi(const double *phi_d, const double *dphi_x_d, const double *dphi_y_d, const double *dphi_z_d, double *fvl_d) const

ModuleGint::PhiOperatorGpu::gemm_lda_
CudaMemWrapper< int > gemm_lda_
Definition phi_operator_gpu.h:108

ModuleGint::PhiOperatorGpu::~PhiOperatorGpu
~PhiOperatorGpu()

ModuleGint::PhiOperatorGpu::gemm_C_
CudaMemWrapper< double * > gemm_C_
Definition phi_operator_gpu.h:116

ModuleGint::PhiOperatorGpu::bgrid_phi_len_
CudaMemWrapper< int > bgrid_phi_len_
Definition phi_operator_gpu.h:99

ModuleGint::PhiOperatorGpu::gemm_B_
CudaMemWrapper< const Real * > gemm_B_
Definition phi_operator_gpu.h:112

ModuleGint::PhiOperatorGpu::atoms_num_info_
CudaMemWrapper< int2 > atoms_num_info_
Definition phi_operator_gpu.h:88

ModuleGint::PhiOperatorGpu::atom_phi_start_
CudaMemWrapper< int > atom_phi_start_
Definition phi_operator_gpu.h:97

ModuleGint::PhiOperatorGpu::gemm_A_
CudaMemWrapper< const Real * > gemm_A_
Definition phi_operator_gpu.h:111

ModuleGint::PhiOperatorGpu::phi_dot_phi
void phi_dot_phi(const Real *phi_i_d, const double *phi_j_d, double *rho_d) const

ModuleGint::PhiOperatorGpu::bgrid_phi_start_
CudaMemWrapper< int > bgrid_phi_start_
Definition phi_operator_gpu.h:101

ModuleGint::PhiOperatorGpu::set_bgrid_batch
void set_bgrid_batch(std::shared_ptr< BatchBigGrid > bgrid_batch)

ModuleGint::PhiOperatorGpu::set_ddphi
void set_ddphi(double *ddphi_xx_d, double *ddphi_xy_d, double *ddphi_xz_d, double *ddphi_yy_d, double *ddphi_yz_d, double *ddphi_zz_d) const

hamilt::HContainer
Definition hcontainer.h:144

cuda_mem_wrapper.h

gint_gpu_vars.h

ModuleGint
Definition batch_biggrid.cpp:4