ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
Functions
code_gen.cpp File Reference

Functions

 gemm_time_measure< double, 2, 16, 16, 32, 2, 2, 16, 2, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 2, 16, 16, 32, 4, 2, 16, 2, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 2, 16, 16, 32, 6, 2, 16, 2, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 2, 16, 16, 32, 8, 2, 16, 2, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 2, 16, 16, 48, 2, 2, 16, 2, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 2, 16, 16, 48, 4, 2, 16, 2, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 2, 16, 16, 48, 6, 2, 16, 2, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 8, 24, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 8, 24, 8, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 8, 24, 12, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 8, 32, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 8, 32, 8, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 8, 40, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 8, 40, 8, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 8, 48, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 8, 56, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 8, 64, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 16, 16, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 16, 16, 8, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 16, 16, 12, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 16, 24, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 16, 24, 8, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 16, 32, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 16, 32, 8, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 16, 40, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 16, 48, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 16, 56, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 24, 16, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 24, 16, 8, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 24, 24, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 24, 24, 8, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 24, 32, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 24, 40, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 32, 16, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 32, 16, 8, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 32, 24, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 40, 16, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 40, 24, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 48, 16, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 8, 56, 16, 4, 4, 8, 4, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 16, 32, 4, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 16, 32, 8, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 16, 32, 12, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 16, 32, 16, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 16, 48, 4, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 16, 48, 8, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 16, 48, 12, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 16, 64, 4, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 16, 64, 8, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 32, 32, 4, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 32, 32, 8, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 32, 32, 12, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 32, 48, 4, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 32, 48, 8, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 48, 32, 4, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 16, 48, 32, 8, 4, 16, 4, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 24, 24, 48, 4, 4, 24, 4, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 24, 24, 48, 8, 4, 24, 4, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 24, 24, 48, 12, 4, 24, 4, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 24, 48, 48, 4, 4, 24, 4, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 24, 48, 48, 8, 4, 24, 4, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 32, 32, 64, 4, 4, 32, 4, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 32, 32, 64, 8, 4, 32, 4, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 32, 32, 64, 12, 4, 32, 4, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 4, 32, 32, 64, 16, 4, 32, 4, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 6, 16, 48, 32, 6, 6, 16, 6, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 6, 16, 48, 32, 12, 6, 16, 6, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 6, 16, 48, 48, 6, 6, 16, 6, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 16, 12, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 16, 16, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 16, 20, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 16, 24, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 16, 28, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 16, 32, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 24, 8, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 24, 12, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 24, 16, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 24, 20, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 24, 24, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 32, 8, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 32, 12, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 32, 16, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 4, 40, 8, 8, 8, 4, 8, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 16, 24, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 16, 24, 16, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 16, 32, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 16, 32, 16, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 16, 40, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 16, 48, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 16, 56, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 16, 64, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 24, 16, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 24, 16, 16, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 24, 24, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 24, 24, 16, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 24, 32, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 24, 40, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 24, 48, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 24, 56, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 24, 64, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 32, 16, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 32, 16, 16, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 32, 24, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 32, 32, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 32, 40, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 32, 48, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 32, 56, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 40, 16, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 40, 24, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 40, 32, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 40, 40, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 40, 48, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 48, 16, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 48, 24, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 48, 32, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 48, 40, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 56, 16, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 56, 24, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 56, 32, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 64, 16, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 8, 64, 24, 8, 8, 8, 8, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 12, 24, 24, 8, 8, 12, 8, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 12, 24, 24, 16, 8, 12, 8, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 12, 24, 36, 8, 8, 12, 8, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 12, 24, 36, 16, 8, 12, 8, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 12, 24, 48, 8, 8, 12, 8, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 12, 24, 60, 8, 8, 12, 8, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 12, 48, 24, 8, 8, 12, 8, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 12, 48, 36, 8, 8, 12, 8, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 12, 48, 48, 8, 8, 12, 8, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 12, 48, 60, 8, 8, 12, 8, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 16, 48, 8, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 16, 48, 16, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 16, 48, 24, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 16, 64, 8, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 16, 64, 16, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 32, 32, 8, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 32, 32, 16, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 32, 32, 24, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 32, 48, 8, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 32, 48, 16, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 32, 64, 8, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 32, 64, 16, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 48, 32, 8, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 48, 32, 16, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 48, 48, 8, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 48, 48, 16, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 48, 64, 8, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 64, 32, 8, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 64, 32, 16, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 16, 64, 48, 8, 8, 16, 8, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 20, 40, 40, 8, 8, 20, 8, 20 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 20, 40, 40, 16, 8, 20, 8, 20 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 20, 40, 60, 8, 8, 20, 8, 20 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 24, 24, 48, 8, 8, 24, 8, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 24, 24, 48, 16, 8, 24, 8, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 24, 24, 48, 24, 8, 24, 8, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 24, 48, 48, 8, 8, 24, 8, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 24, 48, 48, 16, 8, 24, 8, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 28, 56, 56, 8, 8, 28, 8, 28 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 28, 56, 56, 16, 8, 28, 8, 28 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 32, 32, 64, 8, 8, 32, 8, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 32, 32, 64, 16, 8, 32, 8, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 32, 32, 64, 24, 8, 32, 8, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 32, 32, 64, 32, 8, 32, 8, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 32, 64, 64, 8, 8, 32, 8, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 32, 64, 64, 16, 8, 32, 8, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 8, 32, 64, 64, 24, 8, 32, 8, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 8, 24, 24, 12, 12, 8, 12, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 8, 24, 32, 12, 12, 8, 12, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 8, 24, 40, 12, 12, 8, 12, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 8, 24, 48, 12, 12, 8, 12, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 8, 24, 56, 12, 12, 8, 12, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 8, 48, 16, 12, 12, 8, 12, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 8, 48, 24, 12, 12, 8, 12, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 8, 48, 32, 12, 12, 8, 12, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 16, 48, 32, 12, 12, 16, 12, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 16, 48, 32, 24, 12, 16, 12, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 16, 48, 48, 12, 12, 16, 12, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 16, 48, 64, 12, 12, 16, 12, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 24, 48, 48, 12, 12, 24, 12, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 12, 24, 48, 48, 24, 12, 24, 12, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 4, 32, 12, 16, 16, 4, 16, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 4, 32, 16, 16, 16, 4, 16, 4 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 6, 48, 12, 16, 16, 6, 16, 6 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 32, 24, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 32, 32, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 32, 40, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 32, 48, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 32, 56, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 32, 64, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 48, 16, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 48, 24, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 48, 32, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 48, 40, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 48, 48, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 64, 16, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 64, 24, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 8, 64, 32, 16, 16, 8, 16, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 12, 48, 24, 16, 16, 12, 16, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 12, 48, 36, 16, 16, 12, 16, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 12, 48, 48, 16, 16, 12, 16, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 12, 48, 60, 16, 16, 12, 16, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 32, 48, 16, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 32, 48, 32, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 32, 64, 16, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 32, 64, 32, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 48, 32, 16, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 48, 32, 32, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 48, 48, 16, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 48, 48, 32, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 48, 64, 16, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 64, 32, 16, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 64, 32, 32, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 64, 48, 16, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 16, 64, 64, 16, 16, 16, 16, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 24, 48, 48, 16, 16, 24, 16, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 24, 48, 48, 32, 16, 24, 16, 24 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 32, 64, 64, 16, 16, 32, 16, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 16, 32, 64, 64, 32, 16, 32, 16, 32 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 20, 8, 40, 24, 20, 20, 8, 20, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 20, 8, 40, 32, 20, 20, 8, 20, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 24, 8, 48, 24, 24, 24, 8, 24, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 24, 8, 48, 32, 24, 24, 8, 24, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 24, 12, 48, 36, 24, 24, 12, 24, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 24, 12, 48, 48, 24, 24, 12, 24, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 24, 12, 48, 60, 24, 24, 12, 24, 12 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 24, 16, 48, 48, 24, 24, 16, 24, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 24, 16, 48, 64, 24, 24, 16, 24, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 32, 8, 64, 24, 32, 32, 8, 32, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 32, 8, 64, 32, 32, 32, 8, 32, 8 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 32, 16, 64, 48, 32, 32, 16, 32, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 
 gemm_time_measure< double, 32, 16, 64, 64, 32, 32, 16, 32, 16 > (max_m, max_n, d_m, d_n, d_k, d_global_A_array, d_global_lda, d_global_B_array, d_global_ldb, d_global_C_array, d_global_ldc, batchCount, temp_stream, fastest_time, fastest_algo, cpu_result, h_global_C, d_global_C)
 

Function Documentation

◆ gemm_time_measure< double, 12, 16, 48, 32, 12, 12, 16, 12, 16 >()

gemm_time_measure< double, 12, 16, 48, 32, 12, 12, 16, 12, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 16, 48, 32, 24, 12, 16, 12, 16 >()

gemm_time_measure< double, 12, 16, 48, 32, 24, 12, 16, 12, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 16, 48, 48, 12, 12, 16, 12, 16 >()

gemm_time_measure< double, 12, 16, 48, 48, 12, 12, 16, 12, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 16, 48, 64, 12, 12, 16, 12, 16 >()

gemm_time_measure< double, 12, 16, 48, 64, 12, 12, 16, 12, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 24, 48, 48, 12, 12, 24, 12, 24 >()

gemm_time_measure< double, 12, 24, 48, 48, 12, 12, 24, 12, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 24, 48, 48, 24, 12, 24, 12, 24 >()

gemm_time_measure< double, 12, 24, 48, 48, 24, 12, 24, 12, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 8, 24, 24, 12, 12, 8, 12, 8 >()

gemm_time_measure< double, 12, 8, 24, 24, 12, 12, 8, 12, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 8, 24, 32, 12, 12, 8, 12, 8 >()

gemm_time_measure< double, 12, 8, 24, 32, 12, 12, 8, 12, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 8, 24, 40, 12, 12, 8, 12, 8 >()

gemm_time_measure< double, 12, 8, 24, 40, 12, 12, 8, 12, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 8, 24, 48, 12, 12, 8, 12, 8 >()

gemm_time_measure< double, 12, 8, 24, 48, 12, 12, 8, 12, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 8, 24, 56, 12, 12, 8, 12, 8 >()

gemm_time_measure< double, 12, 8, 24, 56, 12, 12, 8, 12, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 8, 48, 16, 12, 12, 8, 12, 8 >()

gemm_time_measure< double, 12, 8, 48, 16, 12, 12, 8, 12, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 8, 48, 24, 12, 12, 8, 12, 8 >()

gemm_time_measure< double, 12, 8, 48, 24, 12, 12, 8, 12, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 12, 8, 48, 32, 12, 12, 8, 12, 8 >()

gemm_time_measure< double, 12, 8, 48, 32, 12, 12, 8, 12, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 12, 48, 24, 16, 16, 12, 16, 12 >()

gemm_time_measure< double, 16, 12, 48, 24, 16, 16, 12, 16, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 12, 48, 36, 16, 16, 12, 16, 12 >()

gemm_time_measure< double, 16, 12, 48, 36, 16, 16, 12, 16, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 12, 48, 48, 16, 16, 12, 16, 12 >()

gemm_time_measure< double, 16, 12, 48, 48, 16, 16, 12, 16, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 12, 48, 60, 16, 16, 12, 16, 12 >()

gemm_time_measure< double, 16, 12, 48, 60, 16, 16, 12, 16, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 32, 48, 16, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 32, 48, 16, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 32, 48, 32, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 32, 48, 32, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 32, 64, 16, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 32, 64, 16, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 32, 64, 32, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 32, 64, 32, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 48, 32, 16, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 48, 32, 16, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 48, 32, 32, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 48, 32, 32, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 48, 48, 16, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 48, 48, 16, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 48, 48, 32, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 48, 48, 32, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 48, 64, 16, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 48, 64, 16, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 64, 32, 16, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 64, 32, 16, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 64, 32, 32, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 64, 32, 32, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 64, 48, 16, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 64, 48, 16, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 16, 64, 64, 16, 16, 16, 16, 16 >()

gemm_time_measure< double, 16, 16, 64, 64, 16, 16, 16, 16, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 24, 48, 48, 16, 16, 24, 16, 24 >()

gemm_time_measure< double, 16, 24, 48, 48, 16, 16, 24, 16, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 24, 48, 48, 32, 16, 24, 16, 24 >()

gemm_time_measure< double, 16, 24, 48, 48, 32, 16, 24, 16, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 32, 64, 64, 16, 16, 32, 16, 32 >()

gemm_time_measure< double, 16, 32, 64, 64, 16, 16, 32, 16, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 32, 64, 64, 32, 16, 32, 16, 32 >()

gemm_time_measure< double, 16, 32, 64, 64, 32, 16, 32, 16, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 4, 32, 12, 16, 16, 4, 16, 4 >()

gemm_time_measure< double, 16, 4, 32, 12, 16, 16, 4, 16, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 4, 32, 16, 16, 16, 4, 16, 4 >()

gemm_time_measure< double, 16, 4, 32, 16, 16, 16, 4, 16, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 6, 48, 12, 16, 16, 6, 16, 6 >()

gemm_time_measure< double, 16, 6, 48, 12, 16, 16, 6, 16, 6 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 32, 24, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 32, 24, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 32, 32, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 32, 32, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 32, 40, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 32, 40, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 32, 48, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 32, 48, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 32, 56, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 32, 56, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 32, 64, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 32, 64, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 48, 16, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 48, 16, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 48, 24, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 48, 24, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 48, 32, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 48, 32, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 48, 40, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 48, 40, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 48, 48, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 48, 48, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 64, 16, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 64, 16, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 64, 24, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 64, 24, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 16, 8, 64, 32, 16, 16, 8, 16, 8 >()

gemm_time_measure< double, 16, 8, 64, 32, 16, 16, 8, 16, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 2, 16, 16, 32, 2, 2, 16, 2, 16 >()

gemm_time_measure< double, 2, 16, 16, 32, 2, 2, 16, 2, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 2, 16, 16, 32, 4, 2, 16, 2, 16 >()

gemm_time_measure< double, 2, 16, 16, 32, 4, 2, 16, 2, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 2, 16, 16, 32, 6, 2, 16, 2, 16 >()

gemm_time_measure< double, 2, 16, 16, 32, 6, 2, 16, 2, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 2, 16, 16, 32, 8, 2, 16, 2, 16 >()

gemm_time_measure< double, 2, 16, 16, 32, 8, 2, 16, 2, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 2, 16, 16, 48, 2, 2, 16, 2, 16 >()

gemm_time_measure< double, 2, 16, 16, 48, 2, 2, 16, 2, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 2, 16, 16, 48, 4, 2, 16, 2, 16 >()

gemm_time_measure< double, 2, 16, 16, 48, 4, 2, 16, 2, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 2, 16, 16, 48, 6, 2, 16, 2, 16 >()

gemm_time_measure< double, 2, 16, 16, 48, 6, 2, 16, 2, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 20, 8, 40, 24, 20, 20, 8, 20, 8 >()

gemm_time_measure< double, 20, 8, 40, 24, 20, 20, 8, 20, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 20, 8, 40, 32, 20, 20, 8, 20, 8 >()

gemm_time_measure< double, 20, 8, 40, 32, 20, 20, 8, 20, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 24, 12, 48, 36, 24, 24, 12, 24, 12 >()

gemm_time_measure< double, 24, 12, 48, 36, 24, 24, 12, 24, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 24, 12, 48, 48, 24, 24, 12, 24, 12 >()

gemm_time_measure< double, 24, 12, 48, 48, 24, 24, 12, 24, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 24, 12, 48, 60, 24, 24, 12, 24, 12 >()

gemm_time_measure< double, 24, 12, 48, 60, 24, 24, 12, 24, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 24, 16, 48, 48, 24, 24, 16, 24, 16 >()

gemm_time_measure< double, 24, 16, 48, 48, 24, 24, 16, 24, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 24, 16, 48, 64, 24, 24, 16, 24, 16 >()

gemm_time_measure< double, 24, 16, 48, 64, 24, 24, 16, 24, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 24, 8, 48, 24, 24, 24, 8, 24, 8 >()

gemm_time_measure< double, 24, 8, 48, 24, 24, 24, 8, 24, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 24, 8, 48, 32, 24, 24, 8, 24, 8 >()

gemm_time_measure< double, 24, 8, 48, 32, 24, 24, 8, 24, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 32, 16, 64, 48, 32, 32, 16, 32, 16 >()

gemm_time_measure< double, 32, 16, 64, 48, 32, 32, 16, 32, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 32, 16, 64, 64, 32, 32, 16, 32, 16 >()

gemm_time_measure< double, 32, 16, 64, 64, 32, 32, 16, 32, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 32, 8, 64, 24, 32, 32, 8, 32, 8 >()

gemm_time_measure< double, 32, 8, 64, 24, 32, 32, 8, 32, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 32, 8, 64, 32, 32, 32, 8, 32, 8 >()

gemm_time_measure< double, 32, 8, 64, 32, 32, 32, 8, 32, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 16, 32, 12, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 16, 32, 12, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 16, 32, 16, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 16, 32, 16, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 16, 32, 4, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 16, 32, 4, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 16, 32, 8, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 16, 32, 8, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 16, 48, 12, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 16, 48, 12, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 16, 48, 4, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 16, 48, 4, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 16, 48, 8, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 16, 48, 8, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 16, 64, 4, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 16, 64, 4, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 16, 64, 8, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 16, 64, 8, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 32, 32, 12, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 32, 32, 12, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 32, 32, 4, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 32, 32, 4, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 32, 32, 8, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 32, 32, 8, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 32, 48, 4, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 32, 48, 4, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 32, 48, 8, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 32, 48, 8, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 48, 32, 4, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 48, 32, 4, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 16, 48, 32, 8, 4, 16, 4, 16 >()

gemm_time_measure< double, 4, 16, 48, 32, 8, 4, 16, 4, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 24, 24, 48, 12, 4, 24, 4, 24 >()

gemm_time_measure< double, 4, 24, 24, 48, 12, 4, 24, 4, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 24, 24, 48, 4, 4, 24, 4, 24 >()

gemm_time_measure< double, 4, 24, 24, 48, 4, 4, 24, 4, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 24, 24, 48, 8, 4, 24, 4, 24 >()

gemm_time_measure< double, 4, 24, 24, 48, 8, 4, 24, 4, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 24, 48, 48, 4, 4, 24, 4, 24 >()

gemm_time_measure< double, 4, 24, 48, 48, 4, 4, 24, 4, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 24, 48, 48, 8, 4, 24, 4, 24 >()

gemm_time_measure< double, 4, 24, 48, 48, 8, 4, 24, 4, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 32, 32, 64, 12, 4, 32, 4, 32 >()

gemm_time_measure< double, 4, 32, 32, 64, 12, 4, 32, 4, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 32, 32, 64, 16, 4, 32, 4, 32 >()

gemm_time_measure< double, 4, 32, 32, 64, 16, 4, 32, 4, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 32, 32, 64, 4, 4, 32, 4, 32 >()

gemm_time_measure< double, 4, 32, 32, 64, 4, 4, 32, 4, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 32, 32, 64, 8, 4, 32, 4, 32 >()

gemm_time_measure< double, 4, 32, 32, 64, 8, 4, 32, 4, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 16, 16, 12, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 16, 16, 12, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 16, 16, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 16, 16, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 16, 16, 8, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 16, 16, 8, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 16, 24, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 16, 24, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 16, 24, 8, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 16, 24, 8, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 16, 32, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 16, 32, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 16, 32, 8, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 16, 32, 8, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 16, 40, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 16, 40, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 16, 48, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 16, 48, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 16, 56, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 16, 56, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 24, 16, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 24, 16, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 24, 16, 8, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 24, 16, 8, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 24, 24, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 24, 24, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 24, 24, 8, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 24, 24, 8, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 24, 32, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 24, 32, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 24, 40, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 24, 40, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 32, 16, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 32, 16, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 32, 16, 8, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 32, 16, 8, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 32, 24, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 32, 24, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 40, 16, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 40, 16, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 40, 24, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 40, 24, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 48, 16, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 48, 16, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 56, 16, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 56, 16, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 8, 24, 12, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 8, 24, 12, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 8, 24, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 8, 24, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 8, 24, 8, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 8, 24, 8, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 8, 32, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 8, 32, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 8, 32, 8, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 8, 32, 8, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 8, 40, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 8, 40, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 8, 40, 8, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 8, 40, 8, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 8, 48, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 8, 48, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 8, 56, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 8, 56, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 4, 8, 8, 64, 4, 4, 8, 4, 8 >()

gemm_time_measure< double, 4, 8, 8, 64, 4, 4, 8, 4, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 6, 16, 48, 32, 12, 6, 16, 6, 16 >()

gemm_time_measure< double, 6, 16, 48, 32, 12, 6, 16, 6, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 6, 16, 48, 32, 6, 6, 16, 6, 16 >()

gemm_time_measure< double, 6, 16, 48, 32, 6, 6, 16, 6, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 6, 16, 48, 48, 6, 6, 16, 6, 16 >()

gemm_time_measure< double, 6, 16, 48, 48, 6, 6, 16, 6, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 12, 24, 24, 16, 8, 12, 8, 12 >()

gemm_time_measure< double, 8, 12, 24, 24, 16, 8, 12, 8, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 12, 24, 24, 8, 8, 12, 8, 12 >()

gemm_time_measure< double, 8, 12, 24, 24, 8, 8, 12, 8, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 12, 24, 36, 16, 8, 12, 8, 12 >()

gemm_time_measure< double, 8, 12, 24, 36, 16, 8, 12, 8, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 12, 24, 36, 8, 8, 12, 8, 12 >()

gemm_time_measure< double, 8, 12, 24, 36, 8, 8, 12, 8, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 12, 24, 48, 8, 8, 12, 8, 12 >()

gemm_time_measure< double, 8, 12, 24, 48, 8, 8, 12, 8, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 12, 24, 60, 8, 8, 12, 8, 12 >()

gemm_time_measure< double, 8, 12, 24, 60, 8, 8, 12, 8, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 12, 48, 24, 8, 8, 12, 8, 12 >()

gemm_time_measure< double, 8, 12, 48, 24, 8, 8, 12, 8, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 12, 48, 36, 8, 8, 12, 8, 12 >()

gemm_time_measure< double, 8, 12, 48, 36, 8, 8, 12, 8, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 12, 48, 48, 8, 8, 12, 8, 12 >()

gemm_time_measure< double, 8, 12, 48, 48, 8, 8, 12, 8, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 12, 48, 60, 8, 8, 12, 8, 12 >()

gemm_time_measure< double, 8, 12, 48, 60, 8, 8, 12, 8, 12 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 16, 48, 16, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 16, 48, 16, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 16, 48, 24, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 16, 48, 24, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 16, 48, 8, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 16, 48, 8, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 16, 64, 16, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 16, 64, 16, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 16, 64, 8, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 16, 64, 8, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 32, 32, 16, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 32, 32, 16, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 32, 32, 24, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 32, 32, 24, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 32, 32, 8, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 32, 32, 8, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 32, 48, 16, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 32, 48, 16, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 32, 48, 8, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 32, 48, 8, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 32, 64, 16, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 32, 64, 16, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 32, 64, 8, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 32, 64, 8, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 48, 32, 16, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 48, 32, 16, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 48, 32, 8, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 48, 32, 8, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 48, 48, 16, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 48, 48, 16, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 48, 48, 8, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 48, 48, 8, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 48, 64, 8, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 48, 64, 8, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 64, 32, 16, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 64, 32, 16, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 64, 32, 8, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 64, 32, 8, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 16, 64, 48, 8, 8, 16, 8, 16 >()

gemm_time_measure< double, 8, 16, 64, 48, 8, 8, 16, 8, 16 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 20, 40, 40, 16, 8, 20, 8, 20 >()

gemm_time_measure< double, 8, 20, 40, 40, 16, 8, 20, 8, 20 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 20, 40, 40, 8, 8, 20, 8, 20 >()

gemm_time_measure< double, 8, 20, 40, 40, 8, 8, 20, 8, 20 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 20, 40, 60, 8, 8, 20, 8, 20 >()

gemm_time_measure< double, 8, 20, 40, 60, 8, 8, 20, 8, 20 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 24, 24, 48, 16, 8, 24, 8, 24 >()

gemm_time_measure< double, 8, 24, 24, 48, 16, 8, 24, 8, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 24, 24, 48, 24, 8, 24, 8, 24 >()

gemm_time_measure< double, 8, 24, 24, 48, 24, 8, 24, 8, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 24, 24, 48, 8, 8, 24, 8, 24 >()

gemm_time_measure< double, 8, 24, 24, 48, 8, 8, 24, 8, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 24, 48, 48, 16, 8, 24, 8, 24 >()

gemm_time_measure< double, 8, 24, 48, 48, 16, 8, 24, 8, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 24, 48, 48, 8, 8, 24, 8, 24 >()

gemm_time_measure< double, 8, 24, 48, 48, 8, 8, 24, 8, 24 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 28, 56, 56, 16, 8, 28, 8, 28 >()

gemm_time_measure< double, 8, 28, 56, 56, 16, 8, 28, 8, 28 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 28, 56, 56, 8, 8, 28, 8, 28 >()

gemm_time_measure< double, 8, 28, 56, 56, 8, 8, 28, 8, 28 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 32, 32, 64, 16, 8, 32, 8, 32 >()

gemm_time_measure< double, 8, 32, 32, 64, 16, 8, 32, 8, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 32, 32, 64, 24, 8, 32, 8, 32 >()

gemm_time_measure< double, 8, 32, 32, 64, 24, 8, 32, 8, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 32, 32, 64, 32, 8, 32, 8, 32 >()

gemm_time_measure< double, 8, 32, 32, 64, 32, 8, 32, 8, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 32, 32, 64, 8, 8, 32, 8, 32 >()

gemm_time_measure< double, 8, 32, 32, 64, 8, 8, 32, 8, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 32, 64, 64, 16, 8, 32, 8, 32 >()

gemm_time_measure< double, 8, 32, 64, 64, 16, 8, 32, 8, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 32, 64, 64, 24, 8, 32, 8, 32 >()

gemm_time_measure< double, 8, 32, 64, 64, 24, 8, 32, 8, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 32, 64, 64, 8, 8, 32, 8, 32 >()

gemm_time_measure< double, 8, 32, 64, 64, 8, 8, 32, 8, 32 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 16, 12, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 16, 12, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 16, 16, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 16, 16, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 16, 20, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 16, 20, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 16, 24, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 16, 24, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 16, 28, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 16, 28, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 16, 32, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 16, 32, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 24, 12, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 24, 12, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 24, 16, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 24, 16, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 24, 20, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 24, 20, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 24, 24, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 24, 24, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 24, 8, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 24, 8, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 32, 12, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 32, 12, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 32, 16, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 32, 16, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 32, 8, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 32, 8, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 4, 40, 8, 8, 8, 4, 8, 4 >()

gemm_time_measure< double, 8, 4, 40, 8, 8, 8, 4, 8, 4 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 16, 24, 16, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 16, 24, 16, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 16, 24, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 16, 24, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 16, 32, 16, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 16, 32, 16, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 16, 32, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 16, 32, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 16, 40, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 16, 40, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 16, 48, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 16, 48, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 16, 56, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 16, 56, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 16, 64, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 16, 64, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 24, 16, 16, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 24, 16, 16, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 24, 16, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 24, 16, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 24, 24, 16, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 24, 24, 16, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 24, 24, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 24, 24, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 24, 32, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 24, 32, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 24, 40, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 24, 40, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 24, 48, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 24, 48, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 24, 56, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 24, 56, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 24, 64, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 24, 64, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 32, 16, 16, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 32, 16, 16, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 32, 16, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 32, 16, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 32, 24, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 32, 24, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 32, 32, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 32, 32, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 32, 40, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 32, 40, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 32, 48, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 32, 48, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 32, 56, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 32, 56, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 40, 16, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 40, 16, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 40, 24, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 40, 24, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 40, 32, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 40, 32, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 40, 40, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 40, 40, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 40, 48, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 40, 48, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 48, 16, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 48, 16, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 48, 24, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 48, 24, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 48, 32, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 48, 32, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 48, 40, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 48, 40, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 56, 16, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 56, 16, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 56, 24, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 56, 24, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 56, 32, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 56, 32, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 64, 16, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 64, 16, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)

◆ gemm_time_measure< double, 8, 8, 64, 24, 8, 8, 8, 8, 8 >()

gemm_time_measure< double, 8, 8, 64, 24, 8, 8, 8, 8, 8 > ( max_m  ,
max_n  ,
d_m  ,
d_n  ,
d_k  ,
d_global_A_array  ,
d_global_lda  ,
d_global_B_array  ,
d_global_ldb  ,
d_global_C_array  ,
d_global_ldc  ,
batchCount  ,
temp_stream  ,
fastest_time  ,
fastest_algo  ,
cpu_result  ,
h_global_C  ,
d_global_C   
)