2025-05-07 13:26:46 +09:00
|
|
|
// Copyright © 2025 Apple Inc.
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include "mlx/allocator.h"
|
2025-05-31 04:12:54 +09:00
|
|
|
#include "mlx/backend/common/buffer_cache.h"
|
2025-11-05 16:05:23 -08:00
|
|
|
#include "mlx/backend/cuda/cuda_utils.h"
|
2025-05-07 13:26:46 +09:00
|
|
|
|
2025-11-05 16:05:23 -08:00
|
|
|
#include <cuda_runtime.h>
|
2025-05-07 13:26:46 +09:00
|
|
|
#include <mutex>
|
|
|
|
|
#include <set>
|
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
|
|
namespace mlx::core::cu {
|
|
|
|
|
|
2025-11-18 12:55:19 +09:00
|
|
|
class CommandEncoder;
|
|
|
|
|
|
2025-05-07 13:26:46 +09:00
|
|
|
using allocator::Buffer;
|
|
|
|
|
|
|
|
|
|
// Stores cuda-managed unified memory.
|
|
|
|
|
struct CudaBuffer {
|
|
|
|
|
void* data;
|
|
|
|
|
size_t size;
|
2025-11-05 16:05:23 -08:00
|
|
|
int device; // -1 for managed
|
2025-05-07 13:26:46 +09:00
|
|
|
};
|
|
|
|
|
|
2025-07-18 21:47:31 -07:00
|
|
|
class SmallSizePool {
|
|
|
|
|
private:
|
2025-07-22 08:24:01 -07:00
|
|
|
union Block {
|
2025-07-18 21:47:31 -07:00
|
|
|
Block* next;
|
2025-07-22 08:24:01 -07:00
|
|
|
CudaBuffer buf;
|
2025-07-18 21:47:31 -07:00
|
|
|
};
|
|
|
|
|
|
2025-07-22 08:24:01 -07:00
|
|
|
Block* buffer_{nullptr};
|
|
|
|
|
void* data_{nullptr};
|
2025-07-18 21:47:31 -07:00
|
|
|
Block* next_free_{nullptr};
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
SmallSizePool();
|
|
|
|
|
~SmallSizePool();
|
|
|
|
|
|
|
|
|
|
SmallSizePool(const SmallSizePool&) = delete;
|
|
|
|
|
SmallSizePool& operator=(const SmallSizePool&) = delete;
|
|
|
|
|
|
2025-07-22 08:24:01 -07:00
|
|
|
CudaBuffer* malloc();
|
|
|
|
|
void free(CudaBuffer* buf);
|
|
|
|
|
bool in_pool(CudaBuffer* buf);
|
2025-07-18 21:47:31 -07:00
|
|
|
};
|
|
|
|
|
|
2025-05-07 13:26:46 +09:00
|
|
|
class CudaAllocator : public allocator::Allocator {
|
|
|
|
|
public:
|
|
|
|
|
Buffer malloc(size_t size) override;
|
2025-11-18 12:55:19 +09:00
|
|
|
Buffer malloc_async(size_t size, int device, cudaStream_t stream);
|
2025-05-07 13:26:46 +09:00
|
|
|
void free(Buffer buffer) override;
|
|
|
|
|
size_t size(Buffer buffer) const override;
|
|
|
|
|
|
|
|
|
|
size_t get_active_memory() const;
|
|
|
|
|
size_t get_peak_memory() const;
|
|
|
|
|
void reset_peak_memory();
|
|
|
|
|
size_t get_memory_limit();
|
|
|
|
|
size_t set_memory_limit(size_t limit);
|
2025-05-31 04:12:54 +09:00
|
|
|
size_t get_cache_memory() const;
|
|
|
|
|
size_t set_cache_limit(size_t limit);
|
|
|
|
|
void clear_cache();
|
2025-05-07 13:26:46 +09:00
|
|
|
|
|
|
|
|
private:
|
2025-07-22 08:24:01 -07:00
|
|
|
void cuda_free(CudaBuffer* buf);
|
|
|
|
|
|
2025-05-07 13:26:46 +09:00
|
|
|
CudaAllocator();
|
|
|
|
|
friend CudaAllocator& allocator();
|
|
|
|
|
|
|
|
|
|
std::mutex mutex_;
|
|
|
|
|
size_t memory_limit_;
|
2025-12-07 06:11:00 -08:00
|
|
|
size_t free_limit_;
|
|
|
|
|
size_t total_memory_;
|
2025-05-31 04:12:54 +09:00
|
|
|
size_t max_pool_size_;
|
|
|
|
|
BufferCache<CudaBuffer> buffer_cache_;
|
2025-05-07 13:26:46 +09:00
|
|
|
size_t active_memory_{0};
|
|
|
|
|
size_t peak_memory_{0};
|
2025-11-05 16:05:23 -08:00
|
|
|
std::vector<cudaStream_t> free_streams_;
|
2025-12-07 06:11:00 -08:00
|
|
|
std::vector<cudaMemPool_t> mem_pools_;
|
2025-07-18 21:47:31 -07:00
|
|
|
SmallSizePool scalar_pool_;
|
2025-05-07 13:26:46 +09:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
CudaAllocator& allocator();
|
|
|
|
|
|
2025-11-18 12:55:19 +09:00
|
|
|
Buffer malloc_async(size_t size, CommandEncoder& encoder);
|
2025-11-05 16:05:23 -08:00
|
|
|
|
2025-05-07 13:26:46 +09:00
|
|
|
} // namespace mlx::core::cu
|