move memory APIs into top level mlx.core (#1982)

This commit is contained in:
Awni Hannun
2025-03-21 07:25:12 -07:00
committed by GitHub
parent 65a38c452b
commit 4e1994e9d7
25 changed files with 418 additions and 323 deletions

View File

@@ -17,6 +17,7 @@ nanobind_add_module(
${CMAKE_CURRENT_SOURCE_DIR}/indexing.cpp
${CMAKE_CURRENT_SOURCE_DIR}/load.cpp
${CMAKE_CURRENT_SOURCE_DIR}/metal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mlx_func.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ops.cpp
${CMAKE_CURRENT_SOURCE_DIR}/stream.cpp

125
python/src/memory.cpp Normal file
View File

@@ -0,0 +1,125 @@
// Copyright © 2025 Apple Inc.
#include "mlx/memory.h"
#include <nanobind/nanobind.h>
namespace mx = mlx::core;
namespace nb = nanobind;
using namespace nb::literals;
void init_memory(nb::module_& m) {
m.def(
"get_active_memory",
&mx::get_active_memory,
R"pbdoc(
Get the actively used memory in bytes.
Note, this will not always match memory use reported by the system because
it does not include cached memory buffers.
)pbdoc");
m.def(
"get_peak_memory",
&mx::get_peak_memory,
R"pbdoc(
Get the peak amount of used memory in bytes.
The maximum memory used recorded from the beginning of the program
execution or since the last call to :func:`reset_peak_memory`.
)pbdoc");
m.def(
"reset_peak_memory",
&mx::reset_peak_memory,
R"pbdoc(
Reset the peak memory to zero.
)pbdoc");
m.def(
"get_cache_memory",
&mx::get_cache_memory,
R"pbdoc(
Get the cache size in bytes.
The cache includes memory not currently used that has not been returned
to the system allocator.
)pbdoc");
m.def(
"set_memory_limit",
&mx::set_memory_limit,
"limit"_a,
R"pbdoc(
Set the memory limit.
The memory limit is a guideline for the maximum amount of memory to use
during graph evaluation. If the memory limit is exceeded and there is no
more RAM (including swap when available) allocations will result in an
exception.
When metal is available the memory limit defaults to 1.5 times the
maximum recommended working set size reported by the device.
Args:
limit (int): Memory limit in bytes.
Returns:
int: The previous memory limit in bytes.
)pbdoc");
m.def(
"set_cache_limit",
&mx::set_cache_limit,
"limit"_a,
R"pbdoc(
Set the free cache limit.
If using more than the given limit, free memory will be reclaimed
from the cache on the next allocation. To disable the cache, set
the limit to ``0``.
The cache limit defaults to the memory limit. See
:func:`set_memory_limit` for more details.
Args:
limit (int): The cache limit in bytes.
Returns:
int: The previous cache limit in bytes.
)pbdoc");
m.def(
"set_wired_limit",
&mx::set_wired_limit,
"limit"_a,
R"pbdoc(
Set the wired size limit.
.. note::
* This function is only useful on macOS 15.0 or higher.
* The wired limit should remain strictly less than the total
memory size.
The wired limit is the total size in bytes of memory that will be kept
resident. The default value is ``0``.
Setting a wired limit larger than system wired limit is an error. You can
increase the system wired limit with:
.. code-block::
sudo sysctl iogpu.wired_limit_mb=<size_in_megabytes>
Use :func:`device_info` to query the system wired limit
(``"max_recommended_working_set_size"``) and the total memory size
(``"memory_size"``).
Args:
limit (int): The wired limit in bytes.
Returns:
int: The previous wired limit in bytes.
)pbdoc");
m.def(
"clear_cache",
&mx::clear_cache,
R"pbdoc(
Clear the memory cache.
After calling this, :func:`get_cache_memory` should return ``0``.
)pbdoc");
}

View File

@@ -1,17 +1,27 @@
// Copyright © 2023-2024 Apple Inc.
#include <iostream>
#include "mlx/backend/metal/metal.h"
#include <nanobind/nanobind.h>
#include <nanobind/stl/optional.h>
#include <nanobind/stl/string.h>
#include <nanobind/stl/unordered_map.h>
#include <nanobind/stl/variant.h>
#include <nanobind/stl/vector.h>
#include "mlx/backend/metal/metal.h"
#include "mlx/memory.h"
namespace mx = mlx::core;
namespace nb = nanobind;
using namespace nb::literals;
bool DEPRECATE(const std::string& old_fn, const std::string new_fn) {
std::cerr << old_fn << " is deprecated and will be removed in a future "
<< "version. Use " << new_fn << " instead." << std::endl;
return true;
}
#define DEPRECATE(oldfn, newfn) static bool dep = DEPRECATE(oldfn, newfn)
void init_metal(nb::module_& m) {
nb::module_ metal = m.def_submodule("metal", "mlx.metal");
metal.def(
@@ -20,121 +30,47 @@ void init_metal(nb::module_& m) {
R"pbdoc(
Check if the Metal back-end is available.
)pbdoc");
metal.def(
"get_active_memory",
&mx::metal::get_active_memory,
R"pbdoc(
Get the actively used memory in bytes.
Note, this will not always match memory use reported by the system because
it does not include cached memory buffers.
)pbdoc");
metal.def(
"get_peak_memory",
&mx::metal::get_peak_memory,
R"pbdoc(
Get the peak amount of used memory in bytes.
The maximum memory used recorded from the beginning of the program
execution or since the last call to :func:`reset_peak_memory`.
)pbdoc");
metal.def(
"reset_peak_memory",
&mx::metal::reset_peak_memory,
R"pbdoc(
Reset the peak memory to zero.
)pbdoc");
metal.def(
"get_cache_memory",
&mx::metal::get_cache_memory,
R"pbdoc(
Get the cache size in bytes.
The cache includes memory not currently used that has not been returned
to the system allocator.
)pbdoc");
metal.def("get_active_memory", []() {
DEPRECATE("mx.metal.get_active_memory", "mx.get_active_memory");
return mx::get_active_memory();
});
metal.def("get_peak_memory", []() {
DEPRECATE("mx.metal.get_peak_memory", "mx.get_peak_memory");
return mx::get_active_memory();
});
metal.def("reset_peak_memory", []() {
DEPRECATE("mx.metal.reset_peak_memory", "mx.reset_peak_memory");
mx::reset_peak_memory();
});
metal.def("get_cache_memory", []() {
DEPRECATE("mx.metal.get_cache_memory", "mx.get_cache_memory");
return mx::get_cache_memory();
});
metal.def(
"set_memory_limit",
&mx::metal::set_memory_limit,
"limit"_a,
R"pbdoc(
Set the memory limit.
The memory limit is a guideline for the maximum amount of memory to use
during graph evaluation. If the memory limit is exceeded and there is no
more RAM (including swap when available) allocations will result in an
exception.
When metal is available the memory limit defaults to 1.5 times the
maximum recommended working set size reported by the device.
Args:
limit (int): Memory limit in bytes.
Returns:
int: The previous memory limit in bytes.
)pbdoc");
[](size_t limit) {
DEPRECATE("mx.metal.set_memory_limt", "mx.set_memory_limit");
return mx::set_memory_limit(limit);
},
"limit"_a);
metal.def(
"set_cache_limit",
&mx::metal::set_cache_limit,
"limit"_a,
R"pbdoc(
Set the free cache limit.
If using more than the given limit, free memory will be reclaimed
from the cache on the next allocation. To disable the cache, set
the limit to ``0``.
The cache limit defaults to the memory limit. See
:func:`set_memory_limit` for more details.
Args:
limit (int): The cache limit in bytes.
Returns:
int: The previous cache limit in bytes.
)pbdoc");
[](size_t limit) {
DEPRECATE("mx.metal.set_cache_limt", "mx.set_cache_limit");
return mx::set_cache_limit(limit);
},
"limit"_a);
metal.def(
"set_wired_limit",
&mx::metal::set_wired_limit,
"limit"_a,
R"pbdoc(
Set the wired size limit.
.. note::
* This function is only useful on macOS 15.0 or higher.
* The wired limit should remain strictly less than the total
memory size.
The wired limit is the total size in bytes of memory that will be kept
resident. The default value is ``0``.
Setting a wired limit larger than system wired limit is an error. You can
increase the system wired limit with:
.. code-block::
sudo sysctl iogpu.wired_limit_mb=<size_in_megabytes>
Use :func:`device_info` to query the system wired limit
(``"max_recommended_working_set_size"``) and the total memory size
(``"memory_size"``).
Args:
limit (int): The wired limit in bytes.
Returns:
int: The previous wired limit in bytes.
)pbdoc");
metal.def(
"clear_cache",
&mx::metal::clear_cache,
R"pbdoc(
Clear the memory cache.
After calling this, :func:`get_cache_memory` should return ``0``.
)pbdoc");
[](size_t limit) {
DEPRECATE("mx.metal.set_wired_limt", "mx.set_wired_limit");
return mx::set_wired_limit(limit);
},
"limit"_a);
metal.def("clear_cache", []() {
DEPRECATE("mx.metal.clear_cache", "mx.clear_cache");
mx::clear_cache();
});
metal.def(
"start_capture",
&mx::metal::start_capture,

View File

@@ -12,6 +12,7 @@ void init_array(nb::module_&);
void init_device(nb::module_&);
void init_stream(nb::module_&);
void init_metal(nb::module_&);
void init_memory(nb::module_&);
void init_ops(nb::module_&);
void init_transforms(nb::module_&);
void init_random(nb::module_&);
@@ -34,6 +35,7 @@ NB_MODULE(core, m) {
init_stream(m);
init_array(m);
init_metal(m);
init_memory(m);
init_ops(m);
init_transforms(m);
init_random(m);