mirror of
https://github.com/ml-explore/mlx.git
synced 2025-10-18 15:28:16 +08:00
Support disable metal buffer cache to prevent performance degradation caused by large memory caching (#390)
* support disable metal buffer cache, due to large unused memory buffered when llm generated long context tokens * Run format and add "cache_enabled" feature tests
This commit is contained in:
@@ -11,4 +11,12 @@ using namespace mlx::core;
|
||||
void init_metal(py::module_& m) {
|
||||
py::module_ metal = m.def_submodule("metal", "mlx.metal");
|
||||
metal.def("is_available", &metal::is_available);
|
||||
metal.def(
|
||||
"cache_enabled",
|
||||
&metal::cache_enabled,
|
||||
"check if metal buffer cache is enabled, default is true");
|
||||
metal.def(
|
||||
"set_cache_enabled",
|
||||
&metal::set_cache_enabled,
|
||||
"enable or disable metal buffer cache");
|
||||
}
|
||||
|
Reference in New Issue
Block a user