[CUDA] Simplify allocator (#2392)

* simplify allocator and fixe race with small pool

* Don't use shared event in worker

* use cuda buffer in small pool

* comment

* comment
This commit is contained in:
Awni Hannun
2025-07-22 08:24:01 -07:00
committed by GitHub
parent 74eccbf3fa
commit 1e496ddb82
9 changed files with 100 additions and 162 deletions

View File

@@ -19,8 +19,6 @@ void new_stream(Stream s) {
cudaFree(nullptr);
// Ensure the static stream objects get created.
cu::get_command_encoder(s);
// The main thread is safe to free buffers.
cu::allocator().register_this_thread();
}
void eval(array& arr) {