ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
memory_op.h
Go to the documentation of this file.
1#ifndef MODULE_DEVICE_MEMORY_H_
2#define MODULE_DEVICE_MEMORY_H_
3
4#include "types.h"
5
6#include <complex>
7#include <cstddef>
8
9namespace base_device
10{
11
12namespace memory
13{
14
15template <typename FPTYPE, typename Device>
17{
26 void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr);
27};
28
29template <typename FPTYPE, typename Device>
31{
40 void operator()(FPTYPE* arr, const int var, const size_t size);
41};
42
43template <typename FPTYPE, typename Device>
45{
56 void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height);
57};
58
59template <typename FPTYPE, typename Device_out, typename Device_in>
61{
70 void operator()(FPTYPE* arr_out,
71 const FPTYPE* arr_in,
72 const size_t size);
73};
74
75template <typename FPTYPE, typename Device_out, typename Device_in>
77{
89 void operator()(FPTYPE* arr_out,
90 const size_t dpitch,
91 const FPTYPE* arr_in,
92 const size_t spitch,
93 const size_t width,
94 const size_t height);
95};
96
97template <typename FPTYPE_out, typename FPTYPE_in, typename Device_out, typename Device_in>
99{
108 void operator()(FPTYPE_out* arr_out,
109 const FPTYPE_in* arr_in,
110 const size_t size);
111};
112
113template <typename FPTYPE, typename Device>
115{
120 void operator()(FPTYPE* arr);
121};
122
123template <typename FPTYPE>
124void resize_memory(FPTYPE* arr, const size_t size, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice);
125
126template <typename FPTYPE>
127void set_memory(FPTYPE* arr, const int var, const size_t size, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice);
128
129template <typename FPTYPE>
130void synchronize_memory(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in);
131
132template <typename FPTYPE_out, typename FPTYPE_in>
133void cast_memory(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in);
134
135template <typename FPTYPE>
137
138#if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM
139// Partially specialize operator for base_device::GpuDevice.
140template <typename FPTYPE>
141struct resize_memory_op<FPTYPE, base_device::DEVICE_GPU>
142{
143 void operator()(FPTYPE*& arr,
144 const size_t size,
145 const char* record_in = nullptr);
146};
147
148template <typename FPTYPE>
149struct set_memory_op<FPTYPE, base_device::DEVICE_GPU>
150{
151 void operator()(FPTYPE* arr, const int var, const size_t size);
152};
153
154template <typename FPTYPE>
155struct set_memory_2d_op<FPTYPE, base_device::DEVICE_GPU>
156{
157 void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height);
158};
159
160template <typename FPTYPE>
161struct synchronize_memory_op<FPTYPE, base_device::DEVICE_CPU, base_device::DEVICE_GPU>
162{
163 void operator()(FPTYPE* arr_out,
164 const FPTYPE* arr_in,
165 const size_t size);
166};
167template <typename FPTYPE>
168struct synchronize_memory_op<FPTYPE, base_device::DEVICE_GPU, base_device::DEVICE_CPU>
169{
170 void operator()(FPTYPE* arr_out,
171 const FPTYPE* arr_in,
172 const size_t size);
173};
174template <typename FPTYPE>
175struct synchronize_memory_op<FPTYPE, base_device::DEVICE_GPU, base_device::DEVICE_GPU>
176{
177 void operator()(FPTYPE* arr_out,
178 const FPTYPE* arr_in,
179 const size_t size);
180
181};
182
183template <typename FPTYPE>
184struct synchronize_memory_2d_op<FPTYPE, base_device::DEVICE_CPU, base_device::DEVICE_GPU>
185{
186 void operator()(FPTYPE* arr_out,
187 const size_t dpitch,
188 const FPTYPE* arr_in,
189 const size_t spitch,
190 const size_t width,
191 const size_t height);
192};
193template <typename FPTYPE>
194struct synchronize_memory_2d_op<FPTYPE, base_device::DEVICE_GPU, base_device::DEVICE_CPU>
195{
196 void operator()(FPTYPE* arr_out,
197 const size_t dpitch,
198 const FPTYPE* arr_in,
199 const size_t spitch,
200 const size_t width,
201 const size_t height);
202};
203template <typename FPTYPE>
204struct synchronize_memory_2d_op<FPTYPE, base_device::DEVICE_GPU, base_device::DEVICE_GPU>
205{
206 void operator()(FPTYPE* arr_out,
207 const size_t dpitch,
208 const FPTYPE* arr_in,
209 const size_t spitch,
210 const size_t width,
211 const size_t height);
212};
213
214template <typename FPTYPE>
215struct delete_memory_op<FPTYPE, base_device::DEVICE_GPU>
216{
217 void operator()(FPTYPE* arr);
218};
219#endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM
220
221#ifdef __DSP
222
223template <typename FPTYPE, typename Device>
224struct resize_memory_op_mt
225{
234 void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr);
235};
236
237template <typename FPTYPE, typename Device>
238struct delete_memory_op_mt
239{
244 void operator()(FPTYPE* arr);
245};
246
247#endif // __DSP
248
249} // end of namespace memory
250} // end of namespace base_device
251
256
261
266
271
276
281
286
291
304
306 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
308 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
310 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
312 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
314 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
316 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
317
319 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
321 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
323 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
325 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
327 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
329 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
330
332 = base_device::memory::synchronize_memory_2d_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
334 = base_device::memory::synchronize_memory_2d_op<std::complex<float>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
336 = base_device::memory::synchronize_memory_2d_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
338 = base_device::memory::synchronize_memory_2d_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
340 = base_device::memory::synchronize_memory_2d_op<std::complex<double>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
342 = base_device::memory::synchronize_memory_2d_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
343
356
357using castmem_c2z_h2h_op = base_device::memory::
358 cast_memory_op<std::complex<double>, std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
359using castmem_c2z_h2d_op = base_device::memory::
360 cast_memory_op<std::complex<double>, std::complex<float>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
361using castmem_c2z_d2h_op = base_device::memory::
362 cast_memory_op<std::complex<double>, std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
363using castmem_z2c_h2h_op = base_device::memory::
364 cast_memory_op<std::complex<float>, std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
365using castmem_z2c_h2d_op = base_device::memory::
366 cast_memory_op<std::complex<float>, std::complex<double>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
367using castmem_z2c_d2h_op = base_device::memory::
368 cast_memory_op<std::complex<float>, std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
369
370static base_device::DEVICE_CPU* cpu_ctx = {};
371static base_device::DEVICE_GPU* gpu_ctx = {};
372#endif // MODULE_DEVICE_MEMORY_H_
void cast_memory(FPTYPE_out *arr_out, const FPTYPE_in *arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in)
Definition memory_op.cpp:534
void synchronize_memory(FPTYPE *arr_out, const FPTYPE *arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in)
Definition memory_op.cpp:518
void resize_memory(FPTYPE *arr, const size_t size, base_device::AbacusDevice_t device_type)
Definition memory_op.cpp:497
void set_memory(FPTYPE *arr, const int var, const size_t size, base_device::AbacusDevice_t device_type)
Definition memory_op.cpp:508
void delete_memory(FPTYPE *arr, base_device::AbacusDevice_t device_type)
Definition memory_op.cpp:551
Definition device.cpp:21
AbacusDevice_t
Definition types.h:12
@ CpuDevice
Definition types.h:14
Definition memory_op.h:99
void operator()(FPTYPE_out *arr_out, const FPTYPE_in *arr_in, const size_t size)
memcpy for multi-device
Definition memory_op.h:115
void operator()(FPTYPE *arr)
free memory for multi-device
Definition memory_op.h:17
void operator()(FPTYPE *&arr, const size_t size, const char *record_in=nullptr)
Allocate memory for a given pointer. Note this op will free the pointer first.
Definition memory_op.h:45
void operator()(FPTYPE *arr, const size_t pitch, const int var, const size_t width, const size_t height)
memset2D for multi-device
Definition memory_op.h:31
void operator()(FPTYPE *arr, const int var, const size_t size)
memset for multi-device
void operator()(FPTYPE *arr_out, const size_t dpitch, const FPTYPE *arr_in, const size_t spitch, const size_t width, const size_t height)
memcpy2D for multi-device
void operator()(FPTYPE *arr_out, const FPTYPE *arr_in, const size_t size)
memcpy for multi-device