From c82a8cc5265370e87b1a19355101bdece6fdc844 Mon Sep 17 00:00:00 2001 From: davidkoski <46639364+davidkoski@users.noreply.github.com> Date: Thu, 4 Jan 2024 16:12:00 -0800 Subject: [PATCH] move all ObjC (via metal-cpp) interaction until post static initializers (#370) * move all ObjC (via metal-cpp) interaction until post static initializers - metal-cpp relies on static initializers to cache class and selector pointers - code in mlx was using metal-cpp to set up NSAutoreleasePools during its own static init time - but this code was silently failing as the class and selector pointers from metal-cpp were still nil - defer the creation of NSAutoreleasePools until after static init time - ensure that we have coverage where autorelease pools are needed * Update device.cpp remove commented code * Update device.cpp remove commented out code * Update scheduler.h update comment * per discussion use the pool inside the task() -- this will be metal only, not needed for cpu * Update allocator.cpp move pool to release/alloc area --- mlx/backend/metal/allocator.cpp | 3 +++ mlx/backend/metal/device.cpp | 9 +++++---- mlx/scheduler.h | 13 +++++++++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/mlx/backend/metal/allocator.cpp b/mlx/backend/metal/allocator.cpp index 07f502998..3debc4742 100644 --- a/mlx/backend/metal/allocator.cpp +++ b/mlx/backend/metal/allocator.cpp @@ -29,6 +29,7 @@ BufferCache::BufferCache(MTL::Device* device) : device_(device), head_(nullptr), tail_(nullptr), pool_size_(0) {} BufferCache::~BufferCache() { + auto thread_pool = metal::new_scoped_memory_pool(); clear(); } @@ -166,6 +167,8 @@ Buffer MetalAllocator::malloc(size_t size, bool allow_swap /* = false */) { return Buffer{nullptr}; } + auto thread_pool = metal::new_scoped_memory_pool(); + // If we have a lot of memory pressure, check if we can reclaim some memory // from the cache if (device_->currentAllocatedSize() + size >= gc_limit_) { diff --git a/mlx/backend/metal/device.cpp b/mlx/backend/metal/device.cpp index c48f2908f..6d7528f8a 100644 --- a/mlx/backend/metal/device.cpp +++ b/mlx/backend/metal/device.cpp @@ -19,16 +19,14 @@ namespace mlx::core::metal { namespace { -// Catch things related to the main-thread static variables -static std::shared_ptr global_memory_pool = new_scoped_memory_pool(); - // TODO nicer way to set this or possibly expose as an environment variable static constexpr int MAX_BUFFERS_PER_QUEUE = 12; static constexpr const char* default_mtllib_path = METAL_PATH; auto load_device() { - MTL::Device* device = MTL::CreateSystemDefaultDevice(); + auto devices = MTL::CopyAllDevices(); + auto device = static_cast(devices->object(0)); if (!device) { throw std::runtime_error("Failed to load device"); } @@ -120,6 +118,7 @@ Device::Device() { } Device::~Device() { + auto pool = new_scoped_memory_pool(); for (auto& q : queue_map_) { q.second->release(); } @@ -139,6 +138,8 @@ Device::~Device() { } void Device::new_queue(int index) { + auto thread_pool = metal::new_scoped_memory_pool(); + // Multiple threads can ask the device for queues // We lock this as a critical section for safety const std::lock_guard lock(mtx_); diff --git a/mlx/scheduler.h b/mlx/scheduler.h index 150cc96db..755efeaf1 100644 --- a/mlx/scheduler.h +++ b/mlx/scheduler.h @@ -35,8 +35,7 @@ struct StreamThread { } void thread_fn() { - auto thread_pool = metal::new_scoped_memory_pool(); - metal::new_stream(stream); + bool initialized = false; while (true) { std::function task; { @@ -48,6 +47,16 @@ struct StreamThread { task = std::move(q.front()); q.pop(); } + + // thread_fn may be called from a static initializer and we cannot + // call metal-cpp until all static initializers have completed. waiting + // for a task to arrive means that user code is running so metal-cpp + // can safely be called. + if (!initialized) { + initialized = true; + metal::new_stream(stream); + } + task(); } }