Files
mlx/mlx/backend/cuda/unary/sqrt.cu
Awni Hannun 6441c21a94 Faster general unary op (#2472)
* faster general unary op

* faster general ops + reorg

* fix + comment

* binary two

* copy general
2025-08-15 15:04:12 -07:00

16 lines
413 B
Plaintext

// Copyright © 2025 Apple Inc.
#include "mlx/backend/cuda/unary/unary.cuh"
namespace mlx::core {
void Sqrt::eval_gpu(const std::vector<array>& inputs, array& out) {
nvtx3::scoped_range r("Sqrt::eval_gpu");
auto& s = out.primitive().stream();
if (recip_) {
unary_op_gpu<cu::Rsqrt>(inputs, out, "Rsqrt", s);
} else {
unary_op_gpu<cu::Sqrt>(inputs, out, "Sqrt", s);
}
}
} // namespace mlx::core