mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
* Organize and collect metal subroutine templates and elements in `metal/kernels/steel/` * Update gemm elements for better performance * Add split-K specialization for gemm * Add `addmm` primitive, op and bindings for fused matmul and bias addition * Update tests and benchmarks as needed
9 lines
236 B
C
9 lines
236 B
C
// Copyright © 2024 Apple Inc.
|
|
|
|
#pragma once
|
|
|
|
#include <metal_stdlib>
|
|
#include "mlx/backend/metal/kernels/steel/host.h"
|
|
|
|
#define STEEL_CONST static constant constexpr const
|
|
#define STEEL_PRAGMA_UNROLL _Pragma("clang loop unroll(full)") |