Add memory cache to CUDA backend (#2221)

* Move BufferCache out of allocator * Add memory cache to cuda backend allocator * Simplify BufferCache assuming buf can not be null
2025-12-16 01:49:05 +08:00 · 2025-05-31 04:12:54 +09:00
parent 6ef2f67e7f
commit db5a7c6192
5 changed files with 259 additions and 203 deletions
--- a/mlx/backend/cuda/allocator.h
+++ b/mlx/backend/cuda/allocator.h
@@ -3,6 +3,7 @@
 #pragma once

 #include "mlx/allocator.h"
+#include "mlx/backend/common/buffer_cache.h"

 #include <mutex>
 #include <set>
@@ -38,17 +39,24 @@ class CudaAllocator : public allocator::Allocator {
  void reset_peak_memory();
  size_t get_memory_limit();
  size_t set_memory_limit(size_t limit);
+  size_t get_cache_memory() const;
+  size_t set_cache_limit(size_t limit);
+  void clear_cache();

 private:
  CudaAllocator();
  friend CudaAllocator& allocator();

+  void cuda_free(CudaBuffer* buf);
+
  std::mutex worker_mutex_;
  std::unique_ptr<Worker> worker_;
  std::set<std::thread::id> allowed_threads_;

  std::mutex mutex_;
  size_t memory_limit_;
+  size_t max_pool_size_;
+  BufferCache<CudaBuffer> buffer_cache_;
  size_t active_memory_{0};
  size_t peak_memory_{0};
 };