Use async cuda malloc managed with cuda 13

This commit is contained in:
Awni Hannun
2025-10-26 16:17:27 -07:00
parent 74c1ed25bb
commit 764b4b7ce8
19 changed files with 110 additions and 54 deletions

View File

@@ -41,9 +41,8 @@ void Arange::eval_gpu(const std::vector<array>& inputs, array& out) {
if (out.size() == 0) {
return;
}
out.set_data(allocator::malloc(out.nbytes()));
auto& encoder = cu::get_command_encoder(stream());
out.set_data(cu::malloc_async(out.nbytes(), encoder.stream()));
encoder.set_output_array(out);
dispatch_int_float_types(out.dtype(), "Arange", [&](auto type_tag) {