mirror of
https://github.com/ml-explore/mlx.git
synced 2025-07-23 18:11:17 +08:00
20 lines
510 B
Plaintext
20 lines
510 B
Plaintext
// Copyright © 2025 Apple Inc.
|
|
|
|
#include <hip/hip_runtime.h>
|
|
|
|
namespace mlx::core::rocm {
|
|
|
|
__global__ void copy_kernel(float* src, float* dst, int n) {
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (idx < n) {
|
|
dst[idx] = src[idx];
|
|
}
|
|
}
|
|
|
|
void launch_copy(float* src, float* dst, int n, hipStream_t stream) {
|
|
int threads = 256;
|
|
int blocks = (n + threads - 1) / threads;
|
|
hipLaunchKernelGGL(copy_kernel, dim3(blocks), dim3(threads), 0, stream, src, dst, n);
|
|
}
|
|
|
|
} // namespace mlx::core::rocm |