blockM | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | static |
blockN | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | static |
has_mul_operand_mask | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | static |
has_mul_output_mask | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | static |
has_operand_mask | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | static |
has_output_mask | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | static |
load_safe(const device T *src, thread U dst[TN], const int src_offset=0, const int src_size=TN) | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | inlinestatic |
load_unsafe(const device T *src, thread U dst[TN], const int src_offset=0) | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | inlinestatic |
needs_tgp_reduction | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | static |
run(const device T *mat, const device T *in_vec, device T *out_vec, const constant int &in_vec_size, const constant int &out_vec_size, const constant int &matrix_ld, const device out_mask_t *out_mask, const device op_mask_t *mat_mask, const device op_mask_t *vec_mask, const constant int *mask_strides, threadgroup AccT *tgp_memory, uint3 tid, uint3 lid, uint simd_gid, uint simd_lid) | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | inlinestatic |
tgp_mem_size | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | static |
threadsM | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | static |
threadsN | GEMVKernel< T, out_mask_t, op_mask_t, BM, BN, SM, SN, TM, TN, AccT > | static |