Files
mlx/mlx/backend/cuda/allocator.h

90 lines
1.9 KiB
C
Raw Normal View History

2025-05-07 13:26:46 +09:00
// Copyright © 2025 Apple Inc.
#pragma once
#include "mlx/allocator.h"
#include "mlx/backend/common/buffer_cache.h"
#include "mlx/backend/cuda/cuda_utils.h"
2025-05-07 13:26:46 +09:00
#include <cuda_runtime.h>
2025-05-07 13:26:46 +09:00
#include <mutex>
#include <set>
#include <utility>
namespace mlx::core::cu {
2025-11-18 12:55:19 +09:00
class CommandEncoder;
2025-05-07 13:26:46 +09:00
using allocator::Buffer;
// Stores cuda-managed unified memory.
struct CudaBuffer {
void* data;
size_t size;
int device; // -1 for managed
2025-05-07 13:26:46 +09:00
};
class SmallSizePool {
private:
union Block {
Block* next;
CudaBuffer buf;
};
Block* buffer_{nullptr};
void* data_{nullptr};
Block* next_free_{nullptr};
public:
SmallSizePool();
~SmallSizePool();
SmallSizePool(const SmallSizePool&) = delete;
SmallSizePool& operator=(const SmallSizePool&) = delete;
CudaBuffer* malloc();
void free(CudaBuffer* buf);
bool in_pool(CudaBuffer* buf);
};
2025-05-07 13:26:46 +09:00
class CudaAllocator : public allocator::Allocator {
public:
Buffer malloc(size_t size) override;
2025-11-18 12:55:19 +09:00
Buffer malloc_async(size_t size, int device, cudaStream_t stream);
2025-05-07 13:26:46 +09:00
void free(Buffer buffer) override;
size_t size(Buffer buffer) const override;
size_t get_active_memory() const;
size_t get_peak_memory() const;
void reset_peak_memory();
size_t get_memory_limit();
size_t set_memory_limit(size_t limit);
size_t get_cache_memory() const;
size_t set_cache_limit(size_t limit);
void clear_cache();
2025-05-07 13:26:46 +09:00
private:
void cuda_free(CudaBuffer* buf);
2025-05-07 13:26:46 +09:00
CudaAllocator();
friend CudaAllocator& allocator();
std::mutex mutex_;
size_t memory_limit_;
size_t free_limit_;
size_t total_memory_;
size_t max_pool_size_;
BufferCache<CudaBuffer> buffer_cache_;
2025-05-07 13:26:46 +09:00
size_t active_memory_{0};
size_t peak_memory_{0};
std::vector<cudaStream_t> free_streams_;
std::vector<cudaMemPool_t> mem_pools_;
SmallSizePool scalar_pool_;
2025-05-07 13:26:46 +09:00
};
CudaAllocator& allocator();
2025-11-18 12:55:19 +09:00
Buffer malloc_async(size_t size, CommandEncoder& encoder);
2025-05-07 13:26:46 +09:00
} // namespace mlx::core::cu