mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
Some checks failed
Build and Test / Check Lint (push) Has been cancelled
Build and Test / Linux (cpu, aarch64) (push) Has been cancelled
Build and Test / Linux (cpu, x86_64) (push) Has been cancelled
Build and Test / Linux (cuda-12.6, aarch64) (push) Has been cancelled
Build and Test / Linux (cuda-12.9, aarch64) (push) Has been cancelled
Build and Test / Linux (cuda-12.6, x86_64) (push) Has been cancelled
Build and Test / Linux (cuda-12.9, x86_64) (push) Has been cancelled
Build and Test / macOS (14.0) (push) Has been cancelled
Build and Test / macOS (15.0) (push) Has been cancelled
Build and Test / Build Documentation (push) Has been cancelled
Build and Test / Linux Fedora (aarch64) (push) Has been cancelled
Build and Test / Linux Fedora (x86_64) (push) Has been cancelled
Nightly Build / build_linux_release (3.10) (push) Has been cancelled
Nightly Build / build_linux_release (3.14) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.11, ubuntu-22.04) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.11, ubuntu-22.04-arm) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.12, ubuntu-22.04) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.12, ubuntu-22.04-arm) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.13, ubuntu-22.04) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.13, ubuntu-22.04-arm) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.14, ubuntu-22.04) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.14, ubuntu-22.04-arm) (push) Has been cancelled
Nightly Build / build_mac_release (3.10) (push) Has been cancelled
Nightly Build / build_mac_release (3.13) (push) Has been cancelled
Nightly Build / build_cuda_release (push) Has been cancelled
47 lines
1.2 KiB
C++
47 lines
1.2 KiB
C++
// Copyright © 2025 Apple Inc.
|
|
|
|
// This file include utilities that are used by C++ code (i.e. .cpp files).
|
|
|
|
#pragma once
|
|
|
|
#include "mlx/array.h"
|
|
#include "mlx/backend/cuda/allocator.h"
|
|
#include "mlx/backend/cuda/cuda_utils.h"
|
|
|
|
namespace mlx::core {
|
|
|
|
template <typename T>
|
|
inline uint max_occupancy_block_dim(T kernel) {
|
|
int _, block_dim;
|
|
if constexpr (std::is_same_v<T, CUfunction>) {
|
|
CHECK_CUDA_ERROR(
|
|
cuOccupancyMaxPotentialBlockSize(&_, &block_dim, kernel, 0, 0, 0));
|
|
} else {
|
|
CHECK_CUDA_ERROR(
|
|
cudaOccupancyMaxPotentialBlockSize(&_, &block_dim, kernel));
|
|
}
|
|
return block_dim;
|
|
}
|
|
|
|
template <typename T>
|
|
inline T* gpu_ptr(array& arr) {
|
|
return reinterpret_cast<T*>(
|
|
static_cast<char*>(
|
|
static_cast<cu::CudaBuffer*>(arr.buffer().ptr())->data) +
|
|
arr.offset());
|
|
}
|
|
|
|
// For const array, keep constness in pointer unless it is untyped.
|
|
template <typename T>
|
|
inline std::conditional_t<std::is_same_v<T, void>, void*, const T*> gpu_ptr(
|
|
const array& arr) {
|
|
return gpu_ptr<T>(const_cast<array&>(arr));
|
|
}
|
|
|
|
struct Dtype;
|
|
|
|
// Convert Dtype to CUDA C++ types.
|
|
const char* dtype_to_cuda_type(const Dtype& dtype);
|
|
|
|
} // namespace mlx::core
|