ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
device.h
Go to the documentation of this file.
1#ifndef MODULE_DEVICE_H_
2#define MODULE_DEVICE_H_
3
4#include "types.h"
5#include <fstream>
6
7#ifdef __MPI
8#include "mpi.h"
9#endif
10
11namespace base_device
12{
13
14template <typename Device>
16
17template <typename T>
18std::string get_current_precision(const T* var);
19
20namespace information
21{
22
27std::string get_device_name(std::string device_flag);
28
33int get_device_num(std::string device_flag);
34
39void output_device_info(std::ostream& output);
40
45int get_device_kpar(const int& kpar, const int& bndpar);
46
52
57std::string get_device_flag(const std::string& device,
58 const std::string& basis_type);
59
60#if __MPI
67int get_node_rank();
68int get_node_rank_with_mpi_shared(const MPI_Comm mpi_comm = MPI_COMM_WORLD);
69int stringCmp(const void* a, const void* b);
70
71#ifdef __CUDA
72int set_device_by_rank(const MPI_Comm mpi_comm = MPI_COMM_WORLD);
73#endif
74
75#endif
76
77template <typename Device>
78void print_device_info(const Device* dev, std::ofstream& ofs_device)
79{
80 return;
81}
82
83template <typename Device>
84void record_device_memory(const Device* dev, std::ofstream& ofs_device, std::string str, size_t size)
85{
86 return;
87}
88
89#if defined(__CUDA) || defined(__ROCM)
90template <>
91void print_device_info<base_device::DEVICE_GPU>(const base_device::DEVICE_GPU *ctx, std::ofstream &ofs_device);
92
93template <>
94void record_device_memory<base_device::DEVICE_GPU>(const base_device::DEVICE_GPU* dev, std::ofstream& ofs_device, std::string str, size_t size);
95#endif
96
97} // end of namespace information
98} // end of namespace base_device
99
104#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 && !defined(__CUDA_ON_DCU)
105static __inline__ __device__ double atomicAdd(double* address, double val)
106{
107 unsigned long long int* address_as_ull = (unsigned long long int*)address;
108 unsigned long long int old = *address_as_ull, assumed;
109 do
110 {
111 assumed = old;
112 old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
113 // Note: uses integer comparison to avoid hang in case of NaN (since NaN !=
114 // NaN) } while (assumed != old);
115 } while (assumed != old);
116 return __longlong_as_double(old);
117}
118#endif
119
120#endif // MODULE_DEVICE_H_
Definition output.h:13
#define T
Definition exp.cpp:237
bool probe_gpu_availability()
Safely probes for GPU availability without exiting on error.
Definition device.cpp:149
std::string get_device_flag(const std::string &device, const std::string &basis_type)
Get the device flag object for source_io PARAM.inp.device.
Definition device.cpp:171
void output_device_info(std::ostream &output)
Output the device information for source_esolver.
Definition output_device.cpp:108
int get_device_num(std::string device_flag)
Get the device number for source_esolver.
Definition output_device.cpp:68
void record_device_memory(const Device *dev, std::ofstream &ofs_device, std::string str, size_t size)
Definition device.h:84
int get_node_rank()
Get the rank of current node Note that GPU can only be binded with CPU in the same node.
Definition device.cpp:66
void print_device_info(const Device *dev, std::ofstream &ofs_device)
Definition device.h:78
std::string get_device_name(std::string device_flag)
Get the device name for source_esolver.
Definition output_device.cpp:28
int get_device_kpar(const int &kpar, const int &bndpar)
Get the device kpar object for source_io GlobalV::KPAR.
Definition device.cpp:214
int get_node_rank_with_mpi_shared(const MPI_Comm mpi_comm)
Definition device.cpp:113
int stringCmp(const void *a, const void *b)
Definition device.cpp:51
Definition device.cpp:20
base_device::AbacusDevice_t get_device_type(const Device *dev)
std::string get_current_precision(const float *var)
Definition device.cpp:35
AbacusDevice_t
Definition types.h:12
string device_flag
Definition pw_test.cpp:13