[CUDA] Simplify allocator (#2392)

* simplify allocator and fixe race with small pool * Don't use shared event in worker * use cuda buffer in small pool * comment * comment
2025-12-16 01:49:05 +08:00 · 2025-07-22 08:24:01 -07:00
parent 74eccbf3fa
commit 1e496ddb82
9 changed files with 100 additions and 162 deletions
--- a/mlx/backend/cuda/eval.cpp
+++ b/mlx/backend/cuda/eval.cpp
@@ -19,8 +19,6 @@ void new_stream(Stream s) {
  cudaFree(nullptr);
  // Ensure the static stream objects get created.
  cu::get_command_encoder(s);
-  // The main thread is safe to free buffers.
-  cu::allocator().register_this_thread();
 }

 void eval(array& arr) {