mirror of
https://github.com/ml-explore/mlx.git
synced 2025-08-20 18:39:45 +08:00
tune / fix allocation
This commit is contained in:
parent
5ff2a6c762
commit
97ea5b23cb
@ -52,10 +52,14 @@ MTL::Buffer* BufferCache::reuse_from_cache(size_t size) {
|
||||
MTL::Buffer* pbuf = nullptr;
|
||||
|
||||
// Make sure we use most of the available memory
|
||||
if (auto it = buffer_pool_.lower_bound(size); it != buffer_pool_.end() &&
|
||||
it->first < std::min(2 * size, size + vm_page_size)) {
|
||||
auto it = buffer_pool_.lower_bound(size);
|
||||
|
||||
// Make sure we use most of the available memory
|
||||
while (!pbuf && it != buffer_pool_.end() &&
|
||||
it->first < std::min(2 * size, size + 2 * vm_page_size)) {
|
||||
// Collect from the cache
|
||||
pbuf = it->second->buf;
|
||||
|
||||
// Remove from cache
|
||||
remove_from_list(it->second);
|
||||
delete it->second;
|
||||
@ -81,6 +85,25 @@ void BufferCache::recycle_to_cache(MTL::Buffer* buf) {
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCache::release_cached_buffers(size_t min_bytes_to_free) {
|
||||
if (min_bytes_to_free >= 0.9 * pool_size_) {
|
||||
clear();
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lk(cache_mutex_);
|
||||
size_t total_bytes_freed = 0;
|
||||
|
||||
while (tail_ && (total_bytes_freed < min_bytes_to_free)) {
|
||||
if (tail_->buf) {
|
||||
total_bytes_freed += tail_->buf->length();
|
||||
tail_->buf->release();
|
||||
tail_->buf = nullptr;
|
||||
}
|
||||
remove_from_list(tail_);
|
||||
}
|
||||
pool_size_ -= total_bytes_freed;
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCache::add_at_head(BufferCache::BufferHolder* to_add) {
|
||||
if (!to_add)
|
||||
return;
|
||||
@ -96,8 +119,9 @@ void BufferCache::add_at_head(BufferCache::BufferHolder* to_add) {
|
||||
}
|
||||
|
||||
void BufferCache::remove_from_list(BufferCache::BufferHolder* to_remove) {
|
||||
if (!to_remove)
|
||||
if (!to_remove) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If in the middle
|
||||
if (to_remove->prev && to_remove->next) {
|
||||
@ -124,7 +148,8 @@ MetalAllocator::MetalAllocator()
|
||||
: device_(device(mlx::core::Device::gpu).mtl_device()),
|
||||
buffer_cache_(device_),
|
||||
peak_allocated_size_(0),
|
||||
block_limit_(device_->recommendedMaxWorkingSetSize()) {}
|
||||
block_limit_(device_->recommendedMaxWorkingSetSize()),
|
||||
gc_limit_(0.95 * device_->recommendedMaxWorkingSetSize()) {}
|
||||
|
||||
Buffer MetalAllocator::malloc(size_t size, bool allow_swap /* = false */) {
|
||||
// Align up memory
|
||||
@ -136,17 +161,19 @@ Buffer MetalAllocator::malloc(size_t size, bool allow_swap /* = false */) {
|
||||
MTL::Buffer* buf = buffer_cache_.reuse_from_cache(size);
|
||||
|
||||
if (!buf) {
|
||||
// If we have memory pressure, first check if we can reclaim some memory
|
||||
// from the cache
|
||||
if (device_->currentAllocatedSize() + size >= block_limit_) {
|
||||
buffer_cache_.clear();
|
||||
}
|
||||
|
||||
// If there is still too much memory pressure, fail (likely causes a wait).
|
||||
// If there is too much memory pressure, fail (likely causes a wait).
|
||||
if (!allow_swap && device_->currentAllocatedSize() + size >= block_limit_) {
|
||||
return Buffer{nullptr};
|
||||
}
|
||||
|
||||
// If we have a lot of memory pressure, check if we can reclaim some memory
|
||||
// from the cache
|
||||
if (device_->currentAllocatedSize() + size >= gc_limit_) {
|
||||
size_t min_bytes_to_free =
|
||||
size + device_->currentAllocatedSize() - gc_limit_;
|
||||
buffer_cache_.release_cached_buffers(min_bytes_to_free);
|
||||
}
|
||||
|
||||
// Allocate new buffer if needed
|
||||
size_t res_opt = MTL::ResourceStorageModeShared;
|
||||
res_opt |= MTL::ResourceHazardTrackingModeTracked;
|
||||
|
@ -23,6 +23,7 @@ class BufferCache {
|
||||
|
||||
MTL::Buffer* reuse_from_cache(size_t size);
|
||||
void recycle_to_cache(MTL::Buffer* buf);
|
||||
void release_cached_buffers(size_t min_bytes_to_free);
|
||||
|
||||
private:
|
||||
struct BufferHolder {
|
||||
@ -65,6 +66,7 @@ class MetalAllocator : public allocator::Allocator {
|
||||
// Allocation stats
|
||||
size_t peak_allocated_size_;
|
||||
size_t block_limit_;
|
||||
size_t gc_limit_;
|
||||
};
|
||||
|
||||
MetalAllocator& allocator();
|
||||
|
Loading…
Reference in New Issue
Block a user