Fused attention for single query (#1497)

This commit is contained in:
Angelos Katharopoulos
2024-10-18 00:58:52 -07:00
committed by GitHub
parent 9dd72cd421
commit 50d8bed468
6 changed files with 299 additions and 742 deletions

View File

@@ -30,8 +30,9 @@ build_kernel(layer_norm)
build_kernel(random)
build_kernel(rms_norm)
build_kernel(rope)
build_kernel(scaled_dot_product_attention scaled_dot_product_attention_params.h
steel/defines.h steel/gemm/transforms.h steel/utils.h)
build_kernel(
scaled_dot_product_attention scaled_dot_product_attention_params.h
sdpa_vector.h steel/defines.h steel/gemm/transforms.h steel/utils.h)
set(STEEL_HEADERS
steel/defines.h