ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
memory_op.h
Go to the documentation of this file.
1#ifndef MODULE_DEVICE_MEMORY_H_
2#define MODULE_DEVICE_MEMORY_H_
3
4#include "types.h"
5
6#include <complex>
7#include <cstddef>
8
9namespace base_device
10{
11
12namespace memory
13{
14
15template <typename FPTYPE, typename Device>
17{
26 void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr);
27};
28
29template <typename FPTYPE, typename Device>
31{
40 void operator()(FPTYPE* arr, const int var, const size_t size);
41};
42
43template <typename FPTYPE, typename Device>
45{
56 void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height);
57};
58
59template <typename FPTYPE, typename Device_out, typename Device_in>
61{
70 void operator()(FPTYPE* arr_out,
71 const FPTYPE* arr_in,
72 const size_t size);
73};
74
75template <typename FPTYPE, typename Device_out, typename Device_in>
77{
89 void operator()(FPTYPE* arr_out,
90 const size_t dpitch,
91 const FPTYPE* arr_in,
92 const size_t spitch,
93 const size_t width,
94 const size_t height);
95};
96
97template <typename FPTYPE_out, typename FPTYPE_in, typename Device_out, typename Device_in>
99{
108 void operator()(FPTYPE_out* arr_out,
109 const FPTYPE_in* arr_in,
110 const size_t size);
111};
112
113template <typename FPTYPE, typename Device>
115{
120 void operator()(FPTYPE* arr);
121};
122
123template <typename FPTYPE>
124void resize_memory(FPTYPE* arr, const size_t size, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice);
125
126template <typename FPTYPE>
127void set_memory(FPTYPE* arr, const int var, const size_t size, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice);
128
129template <typename FPTYPE>
130void synchronize_memory(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in);
131
132template <typename FPTYPE_out, typename FPTYPE_in>
133void cast_memory(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in);
134
135template <typename FPTYPE>
137
138#if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM
139// Partially specialize operator for base_device::GpuDevice.
140template <typename FPTYPE>
141struct resize_memory_op<FPTYPE, base_device::DEVICE_GPU>
142{
143 void operator()(FPTYPE*& arr,
144 const size_t size,
145 const char* record_in = nullptr);
146};
147
148template <typename FPTYPE>
149struct set_memory_op<FPTYPE, base_device::DEVICE_GPU>
150{
151 void operator()(FPTYPE* arr, const int var, const size_t size);
152};
153
154template <typename FPTYPE>
155struct set_memory_2d_op<FPTYPE, base_device::DEVICE_GPU>
156{
157 void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height);
158};
159
160template <typename FPTYPE>
161struct synchronize_memory_op<FPTYPE, base_device::DEVICE_CPU, base_device::DEVICE_GPU>
162{
163 void operator()(FPTYPE* arr_out,
164 const FPTYPE* arr_in,
165 const size_t size);
166};
167template <typename FPTYPE>
168struct synchronize_memory_op<FPTYPE, base_device::DEVICE_GPU, base_device::DEVICE_CPU>
169{
170 void operator()(FPTYPE* arr_out,
171 const FPTYPE* arr_in,
172 const size_t size);
173};
174template <typename FPTYPE>
175struct synchronize_memory_op<FPTYPE, base_device::DEVICE_GPU, base_device::DEVICE_GPU>
176{
177 void operator()(FPTYPE* arr_out,
178 const FPTYPE* arr_in,
179 const size_t size);
180
181};
182
183template <typename FPTYPE>
184struct synchronize_memory_2d_op<FPTYPE, base_device::DEVICE_CPU, base_device::DEVICE_GPU>
185{
186 void operator()(FPTYPE* arr_out,
187 const size_t dpitch,
188 const FPTYPE* arr_in,
189 const size_t spitch,
190 const size_t width,
191 const size_t height);
192};
193template <typename FPTYPE>
194struct synchronize_memory_2d_op<FPTYPE, base_device::DEVICE_GPU, base_device::DEVICE_CPU>
195{
196 void operator()(FPTYPE* arr_out,
197 const size_t dpitch,
198 const FPTYPE* arr_in,
199 const size_t spitch,
200 const size_t width,
201 const size_t height);
202};
203template <typename FPTYPE>
204struct synchronize_memory_2d_op<FPTYPE, base_device::DEVICE_GPU, base_device::DEVICE_GPU>
205{
206 void operator()(FPTYPE* arr_out,
207 const size_t dpitch,
208 const FPTYPE* arr_in,
209 const size_t spitch,
210 const size_t width,
211 const size_t height);
212};
213
214template <typename FPTYPE>
215struct delete_memory_op<FPTYPE, base_device::DEVICE_GPU>
216{
217 void operator()(FPTYPE* arr);
218};
219#endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM
220
221#ifdef __DSP
222
226void set_dsp_cluster_id(int id);
227
229int get_dsp_cluster_id();
230
231template <typename FPTYPE, typename Device>
232struct resize_memory_op_mt
233{
242 void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr);
243};
244
245template <typename FPTYPE, typename Device>
246struct set_memory_op_mt
247{
256 void operator()(FPTYPE* arr, const int var, const size_t size);
257};
258
259template <typename FPTYPE, typename Device>
260struct delete_memory_op_mt
261{
266 void operator()(FPTYPE* arr);
267};
268
269#endif // __DSP
270
271} // end of namespace memory
272} // end of namespace base_device
273
278
283
288
293
298
303
308
313
326
328 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
330 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
332 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
334 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
336 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
338 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
339
341 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
343 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
345 = base_device::memory::synchronize_memory_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
347 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
349 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
351 = base_device::memory::synchronize_memory_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
352
354 = base_device::memory::synchronize_memory_2d_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
356 = base_device::memory::synchronize_memory_2d_op<std::complex<float>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
358 = base_device::memory::synchronize_memory_2d_op<std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
360 = base_device::memory::synchronize_memory_2d_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
362 = base_device::memory::synchronize_memory_2d_op<std::complex<double>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
364 = base_device::memory::synchronize_memory_2d_op<std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
365
378
379using castmem_c2z_h2h_op = base_device::memory::
380 cast_memory_op<std::complex<double>, std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
381using castmem_c2z_h2d_op = base_device::memory::
382 cast_memory_op<std::complex<double>, std::complex<float>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
383using castmem_c2z_d2h_op = base_device::memory::
384 cast_memory_op<std::complex<double>, std::complex<float>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
385using castmem_z2c_h2h_op = base_device::memory::
386 cast_memory_op<std::complex<float>, std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
387using castmem_z2c_h2d_op = base_device::memory::
388 cast_memory_op<std::complex<float>, std::complex<double>, base_device::DEVICE_GPU, base_device::DEVICE_CPU>;
389using castmem_z2c_d2h_op = base_device::memory::
390 cast_memory_op<std::complex<float>, std::complex<double>, base_device::DEVICE_CPU, base_device::DEVICE_GPU>;
391
392static base_device::DEVICE_CPU* cpu_ctx = {};
393static base_device::DEVICE_GPU* gpu_ctx = {};
394#endif // MODULE_DEVICE_MEMORY_H_
void cast_memory(FPTYPE_out *arr_out, const FPTYPE_in *arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in)
Definition memory_op.cpp:567
void synchronize_memory(FPTYPE *arr_out, const FPTYPE *arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in)
Definition memory_op.cpp:551
void resize_memory(FPTYPE *arr, const size_t size, base_device::AbacusDevice_t device_type)
Definition memory_op.cpp:530
void set_memory(FPTYPE *arr, const int var, const size_t size, base_device::AbacusDevice_t device_type)
Definition memory_op.cpp:541
void delete_memory(FPTYPE *arr, base_device::AbacusDevice_t device_type)
Definition memory_op.cpp:584
Definition device.cpp:21
AbacusDevice_t
Definition types.h:12
@ CpuDevice
Definition types.h:14
Definition memory_op.h:99
void operator()(FPTYPE_out *arr_out, const FPTYPE_in *arr_in, const size_t size)
memcpy for multi-device
Definition memory_op.h:115
void operator()(FPTYPE *arr)
free memory for multi-device
Definition memory_op.h:17
void operator()(FPTYPE *&arr, const size_t size, const char *record_in=nullptr)
Allocate memory for a given pointer. Note this op will free the pointer first.
Definition memory_op.h:45
void operator()(FPTYPE *arr, const size_t pitch, const int var, const size_t width, const size_t height)
memset2D for multi-device
Definition memory_op.h:31
void operator()(FPTYPE *arr, const int var, const size_t size)
memset for multi-device
void operator()(FPTYPE *arr_out, const size_t dpitch, const FPTYPE *arr_in, const size_t spitch, const size_t width, const size_t height)
memcpy2D for multi-device
void operator()(FPTYPE *arr_out, const FPTYPE *arr_in, const size_t size)
memcpy for multi-device