1#ifndef MODULE_DEVICE_MEMORY_H_
2#define MODULE_DEVICE_MEMORY_H_
15template <
typename FPTYPE,
typename Device>
26 void operator()(FPTYPE*& arr,
const size_t size,
const char* record_in =
nullptr);
29template <
typename FPTYPE,
typename Device>
40 void operator()(FPTYPE* arr,
const int var,
const size_t size);
43template <
typename FPTYPE,
typename Device>
56 void operator()(FPTYPE* arr,
const size_t pitch,
const int var,
const size_t width,
const size_t height);
59template <
typename FPTYPE,
typename Device_out,
typename Device_in>
75template <
typename FPTYPE,
typename Device_out,
typename Device_in>
97template <
typename FPTYPE_out,
typename FPTYPE_in,
typename Device_out,
typename Device_in>
109 const FPTYPE_in* arr_in,
113template <
typename FPTYPE,
typename Device>
123template <
typename FPTYPE>
126template <
typename FPTYPE>
129template <
typename FPTYPE>
132template <
typename FPTYPE_out,
typename FPTYPE_in>
135template <
typename FPTYPE>
138#if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM
140template <
typename FPTYPE>
145 const char* record_in =
nullptr);
148template <
typename FPTYPE>
149struct set_memory_op<FPTYPE,
base_device::DEVICE_GPU>
151 void operator()(FPTYPE* arr,
const int var,
const size_t size);
154template <
typename FPTYPE>
155struct set_memory_2d_op<FPTYPE,
base_device::DEVICE_GPU>
157 void operator()(FPTYPE* arr,
const size_t pitch,
const int var,
const size_t width,
const size_t height);
160template <
typename FPTYPE>
161struct synchronize_memory_op<FPTYPE,
base_device::DEVICE_CPU, base_device::DEVICE_GPU>
164 const FPTYPE* arr_in,
167template <
typename FPTYPE>
168struct synchronize_memory_op<FPTYPE,
base_device::DEVICE_GPU, base_device::DEVICE_CPU>
171 const FPTYPE* arr_in,
174template <
typename FPTYPE>
175struct synchronize_memory_op<FPTYPE,
base_device::DEVICE_GPU, base_device::DEVICE_GPU>
178 const FPTYPE* arr_in,
183template <
typename FPTYPE>
184struct synchronize_memory_2d_op<FPTYPE,
base_device::DEVICE_CPU, base_device::DEVICE_GPU>
188 const FPTYPE* arr_in,
191 const size_t height);
193template <
typename FPTYPE>
194struct synchronize_memory_2d_op<FPTYPE,
base_device::DEVICE_GPU, base_device::DEVICE_CPU>
198 const FPTYPE* arr_in,
201 const size_t height);
203template <
typename FPTYPE>
204struct synchronize_memory_2d_op<FPTYPE,
base_device::DEVICE_GPU, base_device::DEVICE_GPU>
208 const FPTYPE* arr_in,
211 const size_t height);
214template <
typename FPTYPE>
215struct delete_memory_op<FPTYPE,
base_device::DEVICE_GPU>
226void set_dsp_cluster_id(
int id);
229int get_dsp_cluster_id();
231template <
typename FPTYPE,
typename Device>
232struct resize_memory_op_mt
242 void operator()(FPTYPE*& arr,
const size_t size,
const char* record_in =
nullptr);
245template <
typename FPTYPE,
typename Device>
246struct set_memory_op_mt
256 void operator()(FPTYPE* arr,
const int var,
const size_t size);
259template <
typename FPTYPE,
typename Device>
260struct delete_memory_op_mt
266 void operator()(FPTYPE* arr);
380 cast_memory_op<std::complex<double>, std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
382 cast_memory_op<std::complex<double>, std::complex<float>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
384 cast_memory_op<std::complex<double>, std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
386 cast_memory_op<std::complex<float>, std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
388 cast_memory_op<std::complex<float>, std::complex<double>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
390 cast_memory_op<std::complex<float>, std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
392static base_device::DEVICE_CPU* cpu_ctx = {};
393static base_device::DEVICE_GPU* gpu_ctx = {};
void cast_memory(FPTYPE_out *arr_out, const FPTYPE_in *arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in)
Definition memory_op.cpp:567
void synchronize_memory(FPTYPE *arr_out, const FPTYPE *arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in)
Definition memory_op.cpp:551
void resize_memory(FPTYPE *arr, const size_t size, base_device::AbacusDevice_t device_type)
Definition memory_op.cpp:530
void set_memory(FPTYPE *arr, const int var, const size_t size, base_device::AbacusDevice_t device_type)
Definition memory_op.cpp:541
void delete_memory(FPTYPE *arr, base_device::AbacusDevice_t device_type)
Definition memory_op.cpp:584
AbacusDevice_t
Definition types.h:12
@ CpuDevice
Definition types.h:14
Definition memory_op.h:99
void operator()(FPTYPE_out *arr_out, const FPTYPE_in *arr_in, const size_t size)
memcpy for multi-device
Definition memory_op.h:115
void operator()(FPTYPE *arr)
free memory for multi-device
Definition memory_op.h:17
void operator()(FPTYPE *&arr, const size_t size, const char *record_in=nullptr)
Allocate memory for a given pointer. Note this op will free the pointer first.
Definition memory_op.h:45
void operator()(FPTYPE *arr, const size_t pitch, const int var, const size_t width, const size_t height)
memset2D for multi-device
Definition memory_op.h:31
void operator()(FPTYPE *arr, const int var, const size_t size)
memset for multi-device
Definition memory_op.h:77
void operator()(FPTYPE *arr_out, const size_t dpitch, const FPTYPE *arr_in, const size_t spitch, const size_t width, const size_t height)
memcpy2D for multi-device
Definition memory_op.h:61
void operator()(FPTYPE *arr_out, const FPTYPE *arr_in, const size_t size)
memcpy for multi-device