3#ifndef MODULE_HSOLVER_MATH_KERNEL_H
4#define MODULE_HSOLVER_MATH_KERNEL_H
12#if defined(__CUDA) || defined(__UT_USE_CUDA)
13#include <cuda_runtime.h>
24 return {x.real(), 0.0};
28 return {x.real(), 0.0};
35inline std::complex<double>
get_conj(
const std::complex<double> &x) {
36 return {x.real(), -x.imag()};
39inline std::complex<float>
get_conj(
const std::complex<float> &x) {
40 return {x.real(), -x.imag()};
43inline double get_conj(
const double &x) {
return x; }
45inline float get_conj(
const float &x) {
return x; }
51template <
typename FPTYPE,
typename Device>
struct scal_op {
63 const std::complex<FPTYPE> *alpha, std::complex<FPTYPE> *X,
96 void operator()(
const int& dim,
T* result,
const T* vector1,
const Real* vector2,
const bool& add =
false);
127 const Real *vector2);
131template <
typename T,
typename Device>
struct axpy_op {
145 const int &incX,
T *Y,
const int &incY);
149template <
typename T,
typename Device>
164 const Real constant1,
const T *vector2,
const Real constant2);
182 const T *psi_R,
const bool reduce =
true);
191template <
typename T,
typename Device>
struct gemv_op {
210 const int &n,
const T *alpha,
const T *A,
const int &lda,
211 const T *X,
const int &incx,
const T *beta,
T *Y,
216template <
typename T,
typename Device>
struct gemm_op {
237 const int &m,
const int &n,
const int &k,
const T *alpha,
238 const T *a,
const int &lda,
const T *b,
const int &ldb,
239 const T *beta,
T *c,
const int &ldc);
244template <
typename T,
typename Device>
struct gemv_op_mt {
262 void operator()(
const char &trans,
const int &m,
263 const int &n,
const T *alpha,
const T *A,
const int &lda,
264 const T *X,
const int &incx,
const T *beta,
T *Y,
269template <
typename T,
typename Device>
struct gemm_op_mt {
289 void operator()(
const char &transa,
const char &transb,
290 const int &m,
const int &n,
const int &k,
const T *alpha,
291 const T *a,
const int &lda,
const T *b,
const int &ldb,
292 const T *beta,
T *c,
const int &ldc);
307 const T *input_matrix,
T *output_matrix);
322 void operator()(
const int& n1,
const int& n2,
const T* A,
const int& LDA,
T* B,
const int& LDB);
325template <
typename T,
typename Device>
350template <
typename T,
typename Device>
354 void operator()(
const Device *d,
const int &nbase,
const int &nbase_x,
const int ¬conv,
355 T *result,
const T *vectors,
const Real *eigenvalues);
358template <
typename T,
typename Device>
366 const Real* precondition,
367 const Real* eigenvalues);
370template <
typename T,
typename Device>
378 Real* psi_norm =
nullptr);
392#if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM
397 const T *psi_L,
const T *psi_R,
const bool reduce =
true);
405 void operator()(
const int dim,
T* result,
const T* vector,
const Real constant);
409template <
typename T>
struct vector_mul_vector_op<
T,
base_device::DEVICE_GPU> {
411 void operator()(
const int& dim,
T* result,
const T* vector1,
const Real* vector2,
const bool& add =
false);
415template <
typename T>
struct vector_div_constant_op<
T,
base_device::DEVICE_GPU> {
417 void operator()(
const int& dim,
T* result,
const T* vector,
const Real constant);
421template <
typename T>
struct vector_div_vector_op<
T,
base_device::DEVICE_GPU> {
424 const T *vector1,
const Real *vector2);
429struct vector_add_vector_op<
T,
base_device::DEVICE_GPU> {
432 const T *vector1,
const Real constant1,
const T *vector2,
433 const Real constant2);
436template <
typename T>
struct matrixCopy<
T,
base_device::DEVICE_GPU> {
445template <
typename T>
struct matrix_mul_vector_op<
T,
base_device::DEVICE_GPU> {
456void createGpuBlasHandle();
457void destoryBLAShandle();
460template <
typename T>
struct apply_eigenvalues_op<
T,
base_device::DEVICE_GPU> {
463 void operator()(
const base_device::DEVICE_GPU *d,
const int &nbase,
const int &nbase_x,
const int ¬conv,
464 T *result,
const T *vectors,
const Real *eigenvalues);
470 void operator()(
const base_device::DEVICE_GPU* d,
475 const Real* precondition,
476 const Real* eigenvalues);
#define N
Definition exp.cpp:24
#define T
Definition exp.cpp:237
Definition clebsch_gordan_coeff.cpp:8
std::complex< double > set_real_tocomplex(const std::complex< double > &x)
Definition math_kernel_op.h:23
std::complex< double > get_conj(const std::complex< double > &x)
Definition math_kernel_op.h:35
T type
Definition macros.h:8
Definition math_kernel_op.h:351
void operator()(const Device *d, const int &nbase, const int &nbase_x, const int ¬conv, T *result, const T *vectors, const Real *eigenvalues)
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:352
Definition math_kernel_op.h:131
void operator()(const int &N, const T *alpha, const T *X, const int &incX, T *Y, const int &incY)
Y = alpha * X + Y.
Definition math_kernel_op.h:167
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:168
Real operator()(const int &dim, const T *psi_L, const T *psi_R, const bool reduce=true)
dot_real_op computes the dot product of the given complex arrays(treated as float arrays)....
Definition math_kernel_op.h:216
void operator()(const char &transa, const char &transb, const int &m, const int &n, const int &k, const T *alpha, const T *a, const int &lda, const T *b, const int &ldb, const T *beta, T *c, const int &ldc)
C = alpha * op(A) * op(B) + beta * C.
Definition math_kernel_op.h:191
void operator()(const char &trans, const int &m, const int &n, const T *alpha, const T *A, const int &lda, const T *X, const int &incx, const T *beta, T *Y, const int &incy)
y = alpha * op(A) * x + beta * y
Definition math_kernel_op.h:310
void operator()(const int &n1, const int &n2, const T *A, const int &LDA, T *B, const int &LDB)
copy matrix A to B, they can have different leading dimensions
Definition math_kernel_op.h:296
void operator()(const int &row, const int &col, const T *input_matrix, T *output_matrix)
transpose the input matrix
Definition math_kernel_op.h:326
void operator()(const int &m, const int &n, T *a, const int &lda, const Real *b, const Real alpha, T *c, const int &ldc)
a * b * beta by each column
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:327
void operator()(const base_device::DEVICE_GPU *d, const int &dim, T *psi_iter, const int &nbase, const int ¬conv, Real *psi_norm)
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:383
Definition math_kernel_op.h:371
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:372
void operator()(const Device *d, const int &dim, T *psi_iter, const int &nbase, const int ¬conv, Real *psi_norm=nullptr)
Definition math_kernel_op.h:359
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:360
void operator()(const Device *d, const int &dim, T *psi_iter, const int &nbase, const int ¬conv, const Real *precondition, const Real *eigenvalues)
Definition math_kernel_op.h:51
void operator()(const int &N, const std::complex< FPTYPE > *alpha, std::complex< FPTYPE > *X, const int &incx)
x = alpha * x, where alpha and x are complex numbers
Definition math_kernel_op.h:150
void operator()(const int &dim, T *result, const T *vector1, const Real constant1, const T *vector2, const Real constant2)
result[i] = vector1[i] * constant1 + vector2[i] * constant2
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:151
Definition math_kernel_op.h:100
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:101
void operator()(const int &dim, T *result, const T *vector, const Real constant)
result[i] = vector[i] / constant
Definition math_kernel_op.h:115
void operator()(const int &dim, T *result, const T *vector1, const Real *vector2)
result[i] = vector1[i](complex) / vector2[i](not complex)
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:116
Definition math_kernel_op.h:67
void operator()(const int dim, T *result, const T *vector, const Real constant)
result[i] = vector[i] * constant, where vector is complex number and constant is real number。 It is d...
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:68
Definition math_kernel_op.h:84
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:85
void operator()(const int &dim, T *result, const T *vector1, const Real *vector2, const bool &add=false)
result[i] = vector1[i](complex) * vector2[i](not complex)