mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-01 12:49:50 +08:00
Make attention faster for a some models (#574)
* make attention faster for a couple models * remove unused generation flags * add comment on lora * include text files as well
This commit is contained in:
@@ -68,7 +68,6 @@ class RoPEAttention(nn.Module):
|
||||
keys = self.rope(keys)
|
||||
|
||||
queries = queries.astype(mx.float32)
|
||||
keys = keys.astype(mx.float32)
|
||||
|
||||
# Finally perform the attention computation
|
||||
scale = math.sqrt(1 / queries.shape[-1])
|
||||
|
Reference in New Issue
Block a user