mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
* faster general unary op * faster general ops + reorg * fix + comment * binary two * copy general
16 lines
413 B
Plaintext
16 lines
413 B
Plaintext
// Copyright © 2025 Apple Inc.
|
|
|
|
#include "mlx/backend/cuda/unary/unary.cuh"
|
|
|
|
namespace mlx::core {
|
|
void Sqrt::eval_gpu(const std::vector<array>& inputs, array& out) {
|
|
nvtx3::scoped_range r("Sqrt::eval_gpu");
|
|
auto& s = out.primitive().stream();
|
|
if (recip_) {
|
|
unary_op_gpu<cu::Rsqrt>(inputs, out, "Rsqrt", s);
|
|
} else {
|
|
unary_op_gpu<cu::Sqrt>(inputs, out, "Sqrt", s);
|
|
}
|
|
}
|
|
} // namespace mlx::core
|