From d197c185280b5839bb13379cd45e664f3f8263cf Mon Sep 17 00:00:00 2001 From: Jack Date: Tue, 13 May 2025 21:45:30 -0400 Subject: [PATCH] Add set_threadgroup_memory_length to CommandEncoder This method exposes the Metal API's setThreadgroupMemoryLength functionality, which is needed when implementing custom kernels that require configuring threadgroup memory size. This allows for better performance tuning in specialized Metal compute operations that rely on shared threadgroup memory. --- mlx/backend/metal/device.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mlx/backend/metal/device.h b/mlx/backend/metal/device.h index 26c9a0a28..566b5f36b 100644 --- a/mlx/backend/metal/device.h +++ b/mlx/backend/metal/device.h @@ -95,6 +95,10 @@ struct CommandEncoder { return enc_->setBytes(&v, sizeof(T), idx); } + void set_threadgroup_memory_length(size_t length, NS::UInteger index) { + enc_->setThreadgroupMemoryLength(length, index); + } + ConcurrentContext start_concurrent() { return ConcurrentContext(*this); }