From a4fcc893cd4caad05c97ed038e083b9c8395580c Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Mon, 7 Jul 2025 09:29:23 -0700 Subject: [PATCH] auto build linux release (#2341) --- .circleci/config.yml | 10 ++++++++++ python/src/fast.cpp | 36 +++++++++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 293cdce79..be5f7aac5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -492,6 +492,16 @@ workflows: branches: ignore: /.*/ upload-docs: true + - build_linux_release: + filters: + tags: + only: /^v.*/ + branches: + ignore: /.*/ + matrix: + parameters: + python_version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + extra_env: ["PYPI_RELEASE=1"] prb: when: diff --git a/python/src/fast.cpp b/python/src/fast.cpp index c94f99e1a..8adba2a25 100644 --- a/python/src/fast.cpp +++ b/python/src/fast.cpp @@ -175,11 +175,12 @@ void init_fast(nb::module_& parent_module) { * `Grouped Query Attention `_ * `Multi-Query Attention `_ - Note: The softmax operation is performed in ``float32`` regardless of - the input precision. + .. note:: - Note: For Grouped Query Attention and Multi-Query Attention, the ``k`` - and ``v`` inputs should not be pre-tiled to match ``q``. + * The softmax operation is performed in ``float32`` regardless of + the input precision. + * For Grouped Query Attention and Multi-Query Attention, the ``k`` + and ``v`` inputs should not be pre-tiled to match ``q``. In the following the dimensions are given by: @@ -195,13 +196,30 @@ void init_fast(nb::module_& parent_module) { k (array): Keys with shape ``[B, N_kv, T_kv, D]``. v (array): Values with shape ``[B, N_kv, T_kv, D]``. scale (float): Scale for queries (typically ``1.0 / sqrt(q.shape(-1)``) - mask (Union[None, str, array], optional): A causal, boolean or additive - mask to apply to the query-key scores. The mask can have at most 4 - dimensions and must be broadcast-compatible with the shape - ``[B, N, T_q, T_kv]``. If an additive mask is given its type must - promote to the promoted type of ``q``, ``k``, and ``v``. + mask (Union[None, str, array], optional): The mask to apply to the + query-key scores. The mask can be an array or a string indicating + the mask type. The only supported string type is ``"causal"``. If + the mask is an array it can be a boolean or additive mask. The mask + can have at most 4 dimensions and must be broadcast-compatible with + the shape ``[B, N, T_q, T_kv]``. If an additive mask is given its + type must promote to the promoted type of ``q``, ``k``, and ``v``. Returns: array: The output array. + + Example: + + .. code-block:: python + + B = 2 + N_q = N_kv = 32 + T_q = T_kv = 1000 + D = 128 + + q = mx.random.normal(shape=(B, N_q, T_q, D)) + k = mx.random.normal(shape=(B, N_kv, T_kv, D)) + v = mx.random.normal(shape=(B, N_kv, T_kv, D)) + scale = D ** -0.5 + out = mx.fast.scaled_dot_product_attention(q, k, v, scale=scale, mask="causal") )pbdoc"); m.def(