3#include <cuda_runtime.h>
7 int max_m,
int max_n,
int max_k,
8 const int* m_d,
const int* n_d,
const int* k_d,
9 const double*
const* A_array_d,
const int* lda_d,
10 const double*
const* B_array_d,
const int* ldb_d,
11 double** C_array_d,
const int* ldc_d,
12 int batchCount, cudaStream_t stream,
13 const double* alpha =
nullptr);
17 int max_m,
int max_n,
int max_k,
18 const int* m_d,
const int* n_d,
const int* k_d,
19 const double*
const* A_array_d,
const int* lda_d,
20 const double*
const* B_array_d,
const int* ldb_d,
21 double** C_array_d,
const int* ldc_d,
22 int batchCount, cudaStream_t stream,
23 const double* alpha =
nullptr);
void dgemm_tn_vbatch(int max_m, int max_n, int max_k, const int *m_d, const int *n_d, const int *k_d, const double *const *A_array_d, const int *lda_d, const double *const *B_array_d, const int *ldb_d, double **C_array_d, const int *ldc_d, int batchCount, cudaStream_t stream, const double *alpha=nullptr)
void dgemm_nn_vbatch(int max_m, int max_n, int max_k, const int *m_d, const int *n_d, const int *k_d, const double *const *A_array_d, const int *lda_d, const double *const *B_array_d, const int *ldb_d, double **C_array_d, const int *ldc_d, int batchCount, cudaStream_t stream, const double *alpha=nullptr)