3#ifndef MODULE_HSOLVER_MATH_KERNEL_H
4#define MODULE_HSOLVER_MATH_KERNEL_H
13#if defined(__CUDA) || defined(__UT_USE_CUDA)
14#include <cuda_runtime.h>
25 return {x.real(), 0.0};
29 return {x.real(), 0.0};
36inline std::complex<double>
get_conj(
const std::complex<double> &x) {
37 return {x.real(), -x.imag()};
40inline std::complex<float>
get_conj(
const std::complex<float> &x) {
41 return {x.real(), -x.imag()};
44inline double get_conj(
const double &x) {
return x; }
46inline float get_conj(
const float &x) {
return x; }
52template <
typename FPTYPE,
typename Device>
struct scal_op {
64 const std::complex<FPTYPE> *alpha, std::complex<FPTYPE> *X,
97 void operator()(
const int& dim,
T* result,
const T* vector1,
const Real* vector2,
const bool& add =
false);
128 const Real *vector2);
132template <
typename T,
typename Device>
struct axpy_op {
146 const int &incX,
T *Y,
const int &incY);
150template <
typename T,
typename Device>
165 const Real constant1,
const T *vector2,
const Real constant2);
183 const T *psi_R,
const bool reduce =
true);
192template <
typename T,
typename Device>
struct gemv_op {
211 const int &n,
const T *alpha,
const T *A,
const int &lda,
212 const T *X,
const int &incx,
const T *beta,
T *Y,
217template <
typename T,
typename Device>
struct gemm_op {
238 const int &m,
const int &n,
const int &k,
const T *alpha,
239 const T *a,
const int &lda,
const T *b,
const int &ldb,
240 const T *beta,
T *c,
const int &ldc);
245template <
typename T,
typename Device>
struct gemm_op_mt {
265 void operator()(
const char &transa,
const char &transb,
266 const int &m,
const int &n,
const int &k,
const T *alpha,
267 const T *a,
const int &lda,
const T *b,
const int &ldb,
268 const T *beta,
T *c,
const int &ldc);
283 const T *input_matrix,
T *output_matrix);
298 void operator()(
const int& n1,
const int& n2,
const T* A,
const int& LDA,
T* B,
const int& LDB);
301template <
typename T,
typename Device>
305 void operator()(
const Device *d,
const int &nbase,
const int &nbase_x,
const int ¬conv,
306 T *result,
const T *vectors,
const Real *eigenvalues);
309template <
typename T,
typename Device>
317 const Real* precondition,
318 const Real* eigenvalues);
321template <
typename T,
typename Device>
329 Real* psi_norm =
nullptr);
343#if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM
348 const T *psi_L,
const T *psi_R,
const bool reduce =
true);
356 void operator()(
const int dim,
T* result,
const T* vector,
const Real constant);
360template <
typename T>
struct vector_mul_vector_op<
T,
base_device::DEVICE_GPU> {
362 void operator()(
const int& dim,
T* result,
const T* vector1,
const Real* vector2,
const bool& add =
false);
366template <
typename T>
struct vector_div_constant_op<
T,
base_device::DEVICE_GPU> {
368 void operator()(
const int& dim,
T* result,
const T* vector,
const Real constant);
372template <
typename T>
struct vector_div_vector_op<
T,
base_device::DEVICE_GPU> {
375 const T *vector1,
const Real *vector2);
380struct vector_add_vector_op<
T,
base_device::DEVICE_GPU> {
383 const T *vector1,
const Real constant1,
const T *vector2,
384 const Real constant2);
387template <
typename T>
struct matrixCopy<
T,
base_device::DEVICE_GPU> {
396void createGpuBlasHandle();
397void destoryBLAShandle();
400template <
typename T>
struct apply_eigenvalues_op<
T,
base_device::DEVICE_GPU> {
403 void operator()(
const base_device::DEVICE_GPU *d,
const int &nbase,
const int &nbase_x,
const int ¬conv,
404 T *result,
const T *vectors,
const Real *eigenvalues);
410 void operator()(
const base_device::DEVICE_GPU* d,
415 const Real* precondition,
416 const Real* eigenvalues);
#define N
Definition exp.cpp:24
#define T
Definition exp.cpp:237
Definition array_pool.h:6
std::complex< double > set_real_tocomplex(const std::complex< double > &x)
Definition math_kernel_op.h:24
std::complex< double > get_conj(const std::complex< double > &x)
Definition math_kernel_op.h:36
T type
Definition macros.h:8
Definition math_kernel_op.h:302
void operator()(const Device *d, const int &nbase, const int &nbase_x, const int ¬conv, T *result, const T *vectors, const Real *eigenvalues)
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:303
Definition math_kernel_op.h:132
void operator()(const int &N, const T *alpha, const T *X, const int &incX, T *Y, const int &incY)
Y = alpha * X + Y.
Definition math_kernel_op.h:168
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:169
Real operator()(const int &dim, const T *psi_L, const T *psi_R, const bool reduce=true)
dot_real_op computes the dot product of the given complex arrays(treated as float arrays)....
Definition math_kernel_op.h:217
void operator()(const char &transa, const char &transb, const int &m, const int &n, const int &k, const T *alpha, const T *a, const int &lda, const T *b, const int &ldb, const T *beta, T *c, const int &ldc)
C = alpha * op(A) * op(B) + beta * C.
Definition math_kernel_op.h:192
void operator()(const char &trans, const int &m, const int &n, const T *alpha, const T *A, const int &lda, const T *X, const int &incx, const T *beta, T *Y, const int &incy)
y = alpha * op(A) * x + beta * y
Definition math_kernel_op.h:286
void operator()(const int &n1, const int &n2, const T *A, const int &LDA, T *B, const int &LDB)
copy matrix A to B, they can have different leading dimensions
Definition math_kernel_op.h:272
void operator()(const int &row, const int &col, const T *input_matrix, T *output_matrix)
transpose the input matrix
void operator()(const base_device::DEVICE_GPU *d, const int &dim, T *psi_iter, const int &nbase, const int ¬conv, Real *psi_norm)
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:334
Definition math_kernel_op.h:322
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:323
void operator()(const Device *d, const int &dim, T *psi_iter, const int &nbase, const int ¬conv, Real *psi_norm=nullptr)
Definition math_kernel_op.h:310
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:311
void operator()(const Device *d, const int &dim, T *psi_iter, const int &nbase, const int ¬conv, const Real *precondition, const Real *eigenvalues)
Definition math_kernel_op.h:52
void operator()(const int &N, const std::complex< FPTYPE > *alpha, std::complex< FPTYPE > *X, const int &incx)
x = alpha * x, where alpha and x are complex numbers
Definition math_kernel_op.h:151
void operator()(const int &dim, T *result, const T *vector1, const Real constant1, const T *vector2, const Real constant2)
result[i] = vector1[i] * constant1 + vector2[i] * constant2
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:152
Definition math_kernel_op.h:101
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:102
void operator()(const int &dim, T *result, const T *vector, const Real constant)
result[i] = vector[i] / constant
Definition math_kernel_op.h:116
void operator()(const int &dim, T *result, const T *vector1, const Real *vector2)
result[i] = vector1[i](complex) / vector2[i](not complex)
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:117
Definition math_kernel_op.h:68
void operator()(const int dim, T *result, const T *vector, const Real constant)
result[i] = vector[i] * constant, where vector is complex number and constant is real number。 It is d...
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:69
Definition math_kernel_op.h:85
typename GetTypeReal< T >::type Real
Definition math_kernel_op.h:86
void operator()(const int &dim, T *result, const T *vector1, const Real *vector2, const bool &add=false)
result[i] = vector1[i](complex) * vector2[i](not complex)