14 const std::string& kernel_name,
19 const std::string& kernel_name,
22 const std::string op);
26 const std::string& kernel_name,
29 const std::string op);
33 const std::string& kernel_name,
36 const std::string op);
40 const std::string& kernel_name,
42 const std::string op);
46 const std::string& kernel_name,
52 const std::string& kernel_name,
58 const std::string& kernel_name,
64 const std::string& kernel_name,
67 const std::string& reduce_type,
73 const std::string& kernel_name,
81 const std::string& kernel_name,
89 const std::string& kernel_name,
90 const std::string& func_name,
91 const std::string& op_name,
92 const Dtype& out_type);
96 const std::string& kernel_name,
97 const std::string& func_name,
98 const std::string& op_name,
100 const Dtype& out_type,
101 const std::string& idx_t,
108 const std::string& kernel_name,
109 const std::string& hash_name,
122 const std::string& kernel_name,
137 const std::string& kernel_name,
144 const std::string& kernel_name,
146 const std::optional<array>& mask_out,
147 const std::optional<array>& mask_op,
160 const std::string& kernel_name,
167 int n_channel_specialization,
172 const std::string& kernel_name,
174 const std::optional<array>& mask_out,
175 const std::optional<array>& mask_op,
187 const std::string& kernel_name,
197 const std::string& kernel_name,
198 const std::string& hash_name,
200 const std::string& template_def);
204 const std::string& kernel_name,
205 const std::string& template_def);
208template <
typename...
Args>
211 std::ostringstream s;
214 auto add_arg = [&s, &first](
const auto& arg) {
221 (add_arg(args), ...);
224 "\ntemplate [[host_name(\"{0}\")]] [[kernel]] decltype({1}) {1};\n",
array contiguous(const array &a, bool allow_col_major=false, StreamOrDevice s={})
MTL::ComputePipelineState * get_copy_kernel(metal::Device &d, const std::string &kernel_name, const array &in, const array &out)
MTL::ComputePipelineState * get_steel_gemm_splitk_accum_kernel(metal::Device &d, const std::string &kernel_name, const array &in, const array &out, bool axbpy)
MTL::ComputePipelineState * get_reduce_kernel(metal::Device &d, const std::string &kernel_name, const std::string &func_name, const std::string &op_name, const Dtype &in_type, const Dtype &out_type, const std::string &idx_t, int ndim=-1, int bm=-1, int bn=-1)
MTL::ComputePipelineState * get_fft_kernel(metal::Device &d, const std::string &kernel_name, const std::string &hash_name, const metal::MTLFCList &func_consts, const std::string &template_def)
MTL::ComputePipelineState * get_softmax_kernel(metal::Device &d, const std::string &kernel_name, bool precise, const array &out)
MTL::ComputePipelineState * get_binary_kernel(metal::Device &d, const std::string &kernel_name, Dtype in_type, Dtype out_type, const std::string op)
MTL::ComputePipelineState * get_binary_two_kernel(metal::Device &d, const std::string &kernel_name, Dtype in_type, Dtype out_type, const std::string op)
MTL::ComputePipelineState * get_ternary_kernel(metal::Device &d, const std::string &kernel_name, Dtype type, const std::string op)
MTL::ComputePipelineState * get_arange_kernel(metal::Device &d, const std::string &kernel_name, const array &out)
MTL::ComputePipelineState * get_sort_kernel(metal::Device &d, const std::string &kernel_name, const array &in, const array &out, int bn, int tn)
MTL::ComputePipelineState * get_steel_gemm_fused_kernel(metal::Device &d, const std::string &kernel_name, const std::string &hash_name, const metal::MTLFCList &func_consts, const array &out, bool transpose_a, bool transpose_b, int bm, int bn, int bk, int wm, int wn)
MTL::ComputePipelineState * get_gemv_masked_kernel(metal::Device &d, const std::string &kernel_name, const array &out, const std::optional< array > &mask_out, const std::optional< array > &mask_op, bool transpose_mat, int bm, int bn, int sm, int sn, int tm, int tn, bool contiguous)
MTL::ComputePipelineState * get_quantized_kernel(metal::Device &d, const std::string &kernel_name, const std::string &template_def)
std::string get_template_definition(std::string name, std::string func, Args... args)
Definition kernels.h:210
MTL::ComputePipelineState * get_steel_gemm_masked_kernel(metal::Device &d, const std::string &kernel_name, const array &out, const std::optional< array > &mask_out, const std::optional< array > &mask_op, bool transpose_a, bool transpose_b, int bm, int bn, int bk, int wm, int wn, bool mn_aligned, bool k_aligned)
MTL::ComputePipelineState * get_steel_conv_general_kernel(metal::Device &d, const std::string &kernel_name, const array &out, int bm, int bn, int bk, int wm, int wn)
std::vector< array > Args
Definition export.h:11
MTL::ComputePipelineState * get_steel_conv_kernel(metal::Device &d, const std::string &kernel_name, const array &out, int bm, int bn, int bk, int wm, int wn, int n_channel_specialization, bool small_filter)
MTL::ComputePipelineState * get_dynamic_copy_kernel(metal::Device &d, const std::string &kernel_name, const array &in, const array &out)
MTL::ComputePipelineState * get_reduce_init_kernel(metal::Device &d, const std::string &kernel_name, const std::string &func_name, const std::string &op_name, const Dtype &out_type)
MTL::ComputePipelineState * get_scan_kernel(metal::Device &d, const std::string &kernel_name, bool reverse, bool inclusive, const std::string &reduce_type, const array &in, const array &out)
MTL::ComputePipelineState * get_steel_gemm_splitk_kernel(metal::Device &d, const std::string &kernel_name, const array &in, const array &out, bool transpose_a, bool transpose_b, int bm, int bn, int bk, int wm, int wn, bool mn_aligned, bool k_aligned)
MTL::ComputePipelineState * get_mb_sort_kernel(metal::Device &d, const std::string &kernel_name, const array &in, const array &idx, int bn, int tn)
MTL::ComputePipelineState * get_unary_kernel(metal::Device &d, const std::string &kernel_name, Dtype in_type, Dtype out_type, const std::string op)