mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-18 18:28:12 +08:00
Support disable metal buffer cache to prevent performance degradation caused by large memory caching (#390)
* support disable metal buffer cache, due to large unused memory buffered when llm generated long context tokens * Run format and add "cache_enabled" feature tests
This commit is contained in:
@@ -23,6 +23,16 @@ void* Buffer::raw_ptr() {
|
||||
|
||||
namespace metal {
|
||||
|
||||
static bool cache_enabled_ = true;
|
||||
|
||||
bool cache_enabled() {
|
||||
return cache_enabled_;
|
||||
}
|
||||
|
||||
void set_cache_enabled(bool enabled) {
|
||||
cache_enabled_ = enabled;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
BufferCache::BufferCache(MTL::Device* device)
|
||||
@@ -196,7 +206,11 @@ Buffer MetalAllocator::malloc(size_t size, bool allow_swap /* = false */) {
|
||||
|
||||
void MetalAllocator::free(Buffer buffer) {
|
||||
auto buf = static_cast<MTL::Buffer*>(buffer.ptr());
|
||||
buffer_cache_.recycle_to_cache(buf);
|
||||
if (cache_enabled()) {
|
||||
buffer_cache_.recycle_to_cache(buf);
|
||||
} else {
|
||||
buf->release();
|
||||
}
|
||||
}
|
||||
|
||||
MetalAllocator& allocator() {
|
||||
|
@@ -19,6 +19,9 @@ constexpr bool is_available() {
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cache_enabled(void);
|
||||
void set_cache_enabled(bool enabled);
|
||||
|
||||
void new_stream(Stream stream);
|
||||
std::shared_ptr<void> new_scoped_memory_pool();
|
||||
|
||||
|
Reference in New Issue
Block a user