Support disable metal buffer cache to prevent performance degradation caused by large memory caching (#390)

* support disable metal buffer cache, due to large unused memory buffered when llm generated long context tokens

* Run format and add "cache_enabled" feature tests
This commit is contained in:
Ethan
2024-01-19 00:33:34 +08:00
committed by GitHub
parent 49a52610b7
commit a749a91c75
4 changed files with 67 additions and 1 deletions

View File

@@ -11,4 +11,12 @@ using namespace mlx::core;
void init_metal(py::module_& m) {
py::module_ metal = m.def_submodule("metal", "mlx.metal");
metal.def("is_available", &metal::is_available);
metal.def(
"cache_enabled",
&metal::cache_enabled,
"check if metal buffer cache is enabled, default is true");
metal.def(
"set_cache_enabled",
&metal::set_cache_enabled,
"enable or disable metal buffer cache");
}