Support disable metal buffer cache to prevent performance degradation caused by large memory caching (#390)

* support disable metal buffer cache, due to large unused memory buffered when llm generated long context tokens

* Run format and add "cache_enabled" feature tests
This commit is contained in:
Ethan
2024-01-19 00:33:34 +08:00
committed by GitHub
parent 49a52610b7
commit a749a91c75
4 changed files with 67 additions and 1 deletions

View File

@@ -23,6 +23,16 @@ void* Buffer::raw_ptr() {
namespace metal {
static bool cache_enabled_ = true;
bool cache_enabled() {
return cache_enabled_;
}
void set_cache_enabled(bool enabled) {
cache_enabled_ = enabled;
}
namespace {
BufferCache::BufferCache(MTL::Device* device)
@@ -196,7 +206,11 @@ Buffer MetalAllocator::malloc(size_t size, bool allow_swap /* = false */) {
void MetalAllocator::free(Buffer buffer) {
auto buf = static_cast<MTL::Buffer*>(buffer.ptr());
buffer_cache_.recycle_to_cache(buf);
if (cache_enabled()) {
buffer_cache_.recycle_to_cache(buf);
} else {
buf->release();
}
}
MetalAllocator& allocator() {

View File

@@ -19,6 +19,9 @@ constexpr bool is_available() {
#endif
}
bool cache_enabled(void);
void set_cache_enabled(bool enabled);
void new_stream(Stream stream);
std::shared_ptr<void> new_scoped_memory_pool();