mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
wait for tasks in cuda (#2636)
This commit is contained in:
@@ -5,11 +5,15 @@
|
||||
#include "mlx/backend/cuda/device.h"
|
||||
#include "mlx/backend/gpu/available.h"
|
||||
#include "mlx/primitives.h"
|
||||
#include "mlx/scheduler.h"
|
||||
|
||||
#include <nvtx3/nvtx3.hpp>
|
||||
|
||||
namespace mlx::core::gpu {
|
||||
|
||||
// Can be tuned with MLX_MAX_OPS_PER_BUFFER
|
||||
constexpr int default_max_nodes_per_graph = 20;
|
||||
|
||||
bool is_available() {
|
||||
return true;
|
||||
}
|
||||
@@ -36,7 +40,8 @@ void eval(array& arr) {
|
||||
arr.primitive().eval_gpu(arr.inputs(), outputs);
|
||||
}
|
||||
|
||||
auto& encoder = cu::get_command_encoder(arr.primitive().stream());
|
||||
auto& stream = arr.primitive().stream();
|
||||
auto& encoder = cu::get_command_encoder(stream);
|
||||
// Keep used buffers alive until kernel finishes running.
|
||||
for (auto& in : arr.inputs()) {
|
||||
// Except for the donated one.
|
||||
@@ -47,7 +52,14 @@ void eval(array& arr) {
|
||||
for (auto& s : arr.siblings()) {
|
||||
encoder.add_temporary(s);
|
||||
}
|
||||
encoder.maybe_commit();
|
||||
|
||||
if (encoder.get_num_ops() >=
|
||||
env::max_ops_per_buffer(default_max_nodes_per_graph)) {
|
||||
scheduler::notify_new_task(stream);
|
||||
encoder.add_completed_handler(
|
||||
[stream]() { scheduler::notify_task_completion(stream); });
|
||||
encoder.commit();
|
||||
}
|
||||
}
|
||||
|
||||
void finalize(Stream s) {
|
||||
|
||||
Reference in New Issue
Block a user