Enabling fused attention for head dim 128 (#1899)

* Share KV smem

* Fix bfloat error

* Unroll O = S @ V loop

* Perf upgrade

* Remove commented out function

* Add -Wno-c++17-extensions flag to metal flags

* Add -Wno-c++17-extensions flag to metal extension flags
This commit is contained in:
Jagrit Digani
2025-02-26 10:02:06 -08:00
committed by GitHub
parent 6bf00ef631
commit 89d327075f
5 changed files with 102 additions and 46 deletions

View File

@@ -9,7 +9,7 @@ set(BASE_HEADERS
utils.h)
function(build_kernel_base TARGET SRCFILE DEPS)
set(METAL_FLAGS -Wall -Wextra -fno-fast-math)
set(METAL_FLAGS -Wall -Wextra -fno-fast-math -Wno-c++17-extensions)
if(MLX_METAL_DEBUG)
set(METAL_FLAGS ${METAL_FLAGS} -gline-tables-only -frecord-sources)
endif()