mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
* Link with cuDNN * Initial implementation * Remove backend apis * Fix recording cudnn conv * More unused backend apis * Fix C++ conv tests * include cudnn as python dep * Install libcudnn9-dev-cuda-12 in CI * cudnn only accepts contiguous inputs * Switch to backend apis * Plan needs to be kept alive * Turn off tf32 * Add cache * Test the native cuda graph api * Set cudnn stream before execution * Make LRUCache more like a normal container * Do error check for cublas handle * Zero-initilizing array * Use tf32 for conv * Skip TestConv.test_torch_conv_2D test --------- Co-authored-by: Awni Hannun <awni@apple.com>
49 lines
1.1 KiB
C++
49 lines
1.1 KiB
C++
// Copyright © 2025 Apple Inc.
|
|
|
|
// This file include utilies that are used by C++ code (i.e. .cpp files).
|
|
|
|
#pragma once
|
|
|
|
#include <cublasLt.h>
|
|
#include <cuda.h>
|
|
#include <cuda_runtime.h>
|
|
|
|
namespace mlx::core {
|
|
|
|
namespace cu {
|
|
class Device;
|
|
}
|
|
|
|
struct Dtype;
|
|
|
|
// Cuda stream managed with RAII.
|
|
class CudaStream {
|
|
public:
|
|
explicit CudaStream(cu::Device& device);
|
|
~CudaStream();
|
|
|
|
CudaStream(const CudaStream&) = delete;
|
|
CudaStream& operator=(const CudaStream&) = delete;
|
|
|
|
operator cudaStream_t() const {
|
|
return stream_;
|
|
}
|
|
|
|
private:
|
|
cudaStream_t stream_;
|
|
};
|
|
|
|
// Throw exception if the cuda API does not succeed.
|
|
void check_cublas_error(const char* name, cublasStatus_t err);
|
|
void check_cuda_error(const char* name, cudaError_t err);
|
|
void check_cuda_error(const char* name, CUresult err);
|
|
|
|
// The macro version that prints the command that failed.
|
|
#define CHECK_CUBLAS_ERROR(cmd) check_cublas_error(#cmd, (cmd))
|
|
#define CHECK_CUDA_ERROR(cmd) check_cuda_error(#cmd, (cmd))
|
|
|
|
// Convert Dtype to CUDA C++ types.
|
|
const char* dtype_to_cuda_type(const Dtype& dtype);
|
|
|
|
} // namespace mlx::core
|