mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
[CUDA] Save primitive inputs faster (#2449)
* Add more nvtx loggings * [CUDA] Saving primitive inputs faster * Remove unneeded check
This commit is contained in:
@@ -192,7 +192,7 @@ void ternary_op_gpu(
|
||||
}
|
||||
|
||||
void Select::eval_gpu(const std::vector<array>& inputs, array& out) {
|
||||
nvtx3::scoped_range r("select::eval_gpu");
|
||||
nvtx3::scoped_range r("Select::eval_gpu");
|
||||
auto& s = out.primitive().stream();
|
||||
ternary_op_gpu<cu::Select>(inputs, out, s);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user