ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
device.h
Go to the documentation of this file.
1#ifndef MODULE_DEVICE_H_
2#define MODULE_DEVICE_H_
3
4#include "types.h"
5#include <fstream>
6
7#ifdef __MPI
8#include "mpi.h"
9#endif
10
11namespace base_device
12{
13
14template <typename Device>
16
17template <typename T>
18std::string get_current_precision(const T* var);
19
20namespace information
21{
22
27std::string get_device_name(std::string device_flag);
28
33int get_device_num(std::string device_flag);
34
39void output_device_info(std::ostream& output);
40
45int get_device_kpar(const int& kpar, const int& bndpar);
46
51std::string get_device_flag(const std::string& device,
52 const std::string& basis_type);
53
54#if __MPI
61int get_node_rank();
62int get_node_rank_with_mpi_shared(const MPI_Comm mpi_comm = MPI_COMM_WORLD);
63int stringCmp(const void* a, const void* b);
64
65#ifdef __CUDA
66int set_device_by_rank(const MPI_Comm mpi_comm = MPI_COMM_WORLD);
67#endif
68
69#endif
70
71template <typename Device>
72void print_device_info(const Device* dev, std::ofstream& ofs_device)
73{
74 return;
75}
76
77template <typename Device>
78void record_device_memory(const Device* dev, std::ofstream& ofs_device, std::string str, size_t size)
79{
80 return;
81}
82
83#if defined(__CUDA) || defined(__ROCM)
84template <>
85void print_device_info<base_device::DEVICE_GPU>(const base_device::DEVICE_GPU *ctx, std::ofstream &ofs_device);
86
87template <>
88void record_device_memory<base_device::DEVICE_GPU>(const base_device::DEVICE_GPU* dev, std::ofstream& ofs_device, std::string str, size_t size);
89#endif
90
91} // end of namespace information
92} // end of namespace base_device
93
98#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 && !defined(__CUDA_ON_DCU)
99static __inline__ __device__ double atomicAdd(double* address, double val)
100{
101 unsigned long long int* address_as_ull = (unsigned long long int*)address;
102 unsigned long long int old = *address_as_ull, assumed;
103 do
104 {
105 assumed = old;
106 old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
107 // Note: uses integer comparison to avoid hang in case of NaN (since NaN !=
108 // NaN) } while (assumed != old);
109 } while (assumed != old);
110 return __longlong_as_double(old);
111}
112#endif
113
114#endif // MODULE_DEVICE_H_
Definition output.h:13
#define T
Definition exp.cpp:237
std::string get_device_flag(const std::string &device, const std::string &basis_type)
Get the device flag object for source_io PARAM.inp.device.
Definition device.cpp:150
void output_device_info(std::ostream &output)
Output the device information for source_esolver.
Definition output_device.cpp:107
int get_device_num(std::string device_flag)
Get the device number for source_esolver.
Definition output_device.cpp:67
void record_device_memory(const Device *dev, std::ofstream &ofs_device, std::string str, size_t size)
Definition device.h:78
int get_node_rank()
Get the rank of current node Note that GPU can only be binded with CPU in the same node.
Definition device.cpp:67
void print_device_info(const Device *dev, std::ofstream &ofs_device)
Definition device.h:72
std::string get_device_name(std::string device_flag)
Get the device name for source_esolver.
Definition output_device.cpp:27
int get_device_kpar(const int &kpar, const int &bndpar)
Get the device kpar object for source_io GlobalV::KPAR.
Definition device.cpp:199
int get_node_rank_with_mpi_shared(const MPI_Comm mpi_comm)
Definition device.cpp:114
int stringCmp(const void *a, const void *b)
Definition device.cpp:52
Definition device.cpp:21
base_device::AbacusDevice_t get_device_type(const Device *dev)
std::string get_current_precision(const float *var)
Definition device.cpp:36
AbacusDevice_t
Definition types.h:12
string device_flag
Definition pw_test.cpp:13