mirror of
https://github.com/ml-explore/mlx.git
synced 2025-10-19 00:04:41 +08:00
move memory APIs into top level mlx.core (#1982)
This commit is contained in:
@@ -17,6 +17,7 @@ nanobind_add_module(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/indexing.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/load.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/metal.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mlx_func.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ops.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/stream.cpp
|
||||
|
125
python/src/memory.cpp
Normal file
125
python/src/memory.cpp
Normal file
@@ -0,0 +1,125 @@
|
||||
// Copyright © 2025 Apple Inc.
|
||||
|
||||
#include "mlx/memory.h"
|
||||
#include <nanobind/nanobind.h>
|
||||
|
||||
namespace mx = mlx::core;
|
||||
namespace nb = nanobind;
|
||||
using namespace nb::literals;
|
||||
|
||||
void init_memory(nb::module_& m) {
|
||||
m.def(
|
||||
"get_active_memory",
|
||||
&mx::get_active_memory,
|
||||
R"pbdoc(
|
||||
Get the actively used memory in bytes.
|
||||
|
||||
Note, this will not always match memory use reported by the system because
|
||||
it does not include cached memory buffers.
|
||||
)pbdoc");
|
||||
m.def(
|
||||
"get_peak_memory",
|
||||
&mx::get_peak_memory,
|
||||
R"pbdoc(
|
||||
Get the peak amount of used memory in bytes.
|
||||
|
||||
The maximum memory used recorded from the beginning of the program
|
||||
execution or since the last call to :func:`reset_peak_memory`.
|
||||
)pbdoc");
|
||||
m.def(
|
||||
"reset_peak_memory",
|
||||
&mx::reset_peak_memory,
|
||||
R"pbdoc(
|
||||
Reset the peak memory to zero.
|
||||
)pbdoc");
|
||||
m.def(
|
||||
"get_cache_memory",
|
||||
&mx::get_cache_memory,
|
||||
R"pbdoc(
|
||||
Get the cache size in bytes.
|
||||
|
||||
The cache includes memory not currently used that has not been returned
|
||||
to the system allocator.
|
||||
)pbdoc");
|
||||
m.def(
|
||||
"set_memory_limit",
|
||||
&mx::set_memory_limit,
|
||||
"limit"_a,
|
||||
R"pbdoc(
|
||||
Set the memory limit.
|
||||
|
||||
The memory limit is a guideline for the maximum amount of memory to use
|
||||
during graph evaluation. If the memory limit is exceeded and there is no
|
||||
more RAM (including swap when available) allocations will result in an
|
||||
exception.
|
||||
|
||||
When metal is available the memory limit defaults to 1.5 times the
|
||||
maximum recommended working set size reported by the device.
|
||||
|
||||
Args:
|
||||
limit (int): Memory limit in bytes.
|
||||
|
||||
Returns:
|
||||
int: The previous memory limit in bytes.
|
||||
)pbdoc");
|
||||
m.def(
|
||||
"set_cache_limit",
|
||||
&mx::set_cache_limit,
|
||||
"limit"_a,
|
||||
R"pbdoc(
|
||||
Set the free cache limit.
|
||||
|
||||
If using more than the given limit, free memory will be reclaimed
|
||||
from the cache on the next allocation. To disable the cache, set
|
||||
the limit to ``0``.
|
||||
|
||||
The cache limit defaults to the memory limit. See
|
||||
:func:`set_memory_limit` for more details.
|
||||
|
||||
Args:
|
||||
limit (int): The cache limit in bytes.
|
||||
|
||||
Returns:
|
||||
int: The previous cache limit in bytes.
|
||||
)pbdoc");
|
||||
m.def(
|
||||
"set_wired_limit",
|
||||
&mx::set_wired_limit,
|
||||
"limit"_a,
|
||||
R"pbdoc(
|
||||
Set the wired size limit.
|
||||
|
||||
.. note::
|
||||
* This function is only useful on macOS 15.0 or higher.
|
||||
* The wired limit should remain strictly less than the total
|
||||
memory size.
|
||||
|
||||
The wired limit is the total size in bytes of memory that will be kept
|
||||
resident. The default value is ``0``.
|
||||
|
||||
Setting a wired limit larger than system wired limit is an error. You can
|
||||
increase the system wired limit with:
|
||||
|
||||
.. code-block::
|
||||
|
||||
sudo sysctl iogpu.wired_limit_mb=<size_in_megabytes>
|
||||
|
||||
Use :func:`device_info` to query the system wired limit
|
||||
(``"max_recommended_working_set_size"``) and the total memory size
|
||||
(``"memory_size"``).
|
||||
|
||||
Args:
|
||||
limit (int): The wired limit in bytes.
|
||||
|
||||
Returns:
|
||||
int: The previous wired limit in bytes.
|
||||
)pbdoc");
|
||||
m.def(
|
||||
"clear_cache",
|
||||
&mx::clear_cache,
|
||||
R"pbdoc(
|
||||
Clear the memory cache.
|
||||
|
||||
After calling this, :func:`get_cache_memory` should return ``0``.
|
||||
)pbdoc");
|
||||
}
|
@@ -1,17 +1,27 @@
|
||||
// Copyright © 2023-2024 Apple Inc.
|
||||
#include <iostream>
|
||||
|
||||
#include "mlx/backend/metal/metal.h"
|
||||
#include <nanobind/nanobind.h>
|
||||
#include <nanobind/stl/optional.h>
|
||||
#include <nanobind/stl/string.h>
|
||||
#include <nanobind/stl/unordered_map.h>
|
||||
#include <nanobind/stl/variant.h>
|
||||
#include <nanobind/stl/vector.h>
|
||||
#include "mlx/backend/metal/metal.h"
|
||||
#include "mlx/memory.h"
|
||||
|
||||
namespace mx = mlx::core;
|
||||
namespace nb = nanobind;
|
||||
using namespace nb::literals;
|
||||
|
||||
bool DEPRECATE(const std::string& old_fn, const std::string new_fn) {
|
||||
std::cerr << old_fn << " is deprecated and will be removed in a future "
|
||||
<< "version. Use " << new_fn << " instead." << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
#define DEPRECATE(oldfn, newfn) static bool dep = DEPRECATE(oldfn, newfn)
|
||||
|
||||
void init_metal(nb::module_& m) {
|
||||
nb::module_ metal = m.def_submodule("metal", "mlx.metal");
|
||||
metal.def(
|
||||
@@ -20,121 +30,47 @@ void init_metal(nb::module_& m) {
|
||||
R"pbdoc(
|
||||
Check if the Metal back-end is available.
|
||||
)pbdoc");
|
||||
metal.def(
|
||||
"get_active_memory",
|
||||
&mx::metal::get_active_memory,
|
||||
R"pbdoc(
|
||||
Get the actively used memory in bytes.
|
||||
|
||||
Note, this will not always match memory use reported by the system because
|
||||
it does not include cached memory buffers.
|
||||
)pbdoc");
|
||||
metal.def(
|
||||
"get_peak_memory",
|
||||
&mx::metal::get_peak_memory,
|
||||
R"pbdoc(
|
||||
Get the peak amount of used memory in bytes.
|
||||
|
||||
The maximum memory used recorded from the beginning of the program
|
||||
execution or since the last call to :func:`reset_peak_memory`.
|
||||
)pbdoc");
|
||||
metal.def(
|
||||
"reset_peak_memory",
|
||||
&mx::metal::reset_peak_memory,
|
||||
R"pbdoc(
|
||||
Reset the peak memory to zero.
|
||||
)pbdoc");
|
||||
metal.def(
|
||||
"get_cache_memory",
|
||||
&mx::metal::get_cache_memory,
|
||||
R"pbdoc(
|
||||
Get the cache size in bytes.
|
||||
|
||||
The cache includes memory not currently used that has not been returned
|
||||
to the system allocator.
|
||||
)pbdoc");
|
||||
metal.def("get_active_memory", []() {
|
||||
DEPRECATE("mx.metal.get_active_memory", "mx.get_active_memory");
|
||||
return mx::get_active_memory();
|
||||
});
|
||||
metal.def("get_peak_memory", []() {
|
||||
DEPRECATE("mx.metal.get_peak_memory", "mx.get_peak_memory");
|
||||
return mx::get_active_memory();
|
||||
});
|
||||
metal.def("reset_peak_memory", []() {
|
||||
DEPRECATE("mx.metal.reset_peak_memory", "mx.reset_peak_memory");
|
||||
mx::reset_peak_memory();
|
||||
});
|
||||
metal.def("get_cache_memory", []() {
|
||||
DEPRECATE("mx.metal.get_cache_memory", "mx.get_cache_memory");
|
||||
return mx::get_cache_memory();
|
||||
});
|
||||
metal.def(
|
||||
"set_memory_limit",
|
||||
&mx::metal::set_memory_limit,
|
||||
"limit"_a,
|
||||
R"pbdoc(
|
||||
Set the memory limit.
|
||||
|
||||
The memory limit is a guideline for the maximum amount of memory to use
|
||||
during graph evaluation. If the memory limit is exceeded and there is no
|
||||
more RAM (including swap when available) allocations will result in an
|
||||
exception.
|
||||
|
||||
When metal is available the memory limit defaults to 1.5 times the
|
||||
maximum recommended working set size reported by the device.
|
||||
|
||||
Args:
|
||||
limit (int): Memory limit in bytes.
|
||||
|
||||
Returns:
|
||||
int: The previous memory limit in bytes.
|
||||
)pbdoc");
|
||||
[](size_t limit) {
|
||||
DEPRECATE("mx.metal.set_memory_limt", "mx.set_memory_limit");
|
||||
return mx::set_memory_limit(limit);
|
||||
},
|
||||
"limit"_a);
|
||||
metal.def(
|
||||
"set_cache_limit",
|
||||
&mx::metal::set_cache_limit,
|
||||
"limit"_a,
|
||||
R"pbdoc(
|
||||
Set the free cache limit.
|
||||
|
||||
If using more than the given limit, free memory will be reclaimed
|
||||
from the cache on the next allocation. To disable the cache, set
|
||||
the limit to ``0``.
|
||||
|
||||
The cache limit defaults to the memory limit. See
|
||||
:func:`set_memory_limit` for more details.
|
||||
|
||||
Args:
|
||||
limit (int): The cache limit in bytes.
|
||||
|
||||
Returns:
|
||||
int: The previous cache limit in bytes.
|
||||
)pbdoc");
|
||||
[](size_t limit) {
|
||||
DEPRECATE("mx.metal.set_cache_limt", "mx.set_cache_limit");
|
||||
return mx::set_cache_limit(limit);
|
||||
},
|
||||
"limit"_a);
|
||||
metal.def(
|
||||
"set_wired_limit",
|
||||
&mx::metal::set_wired_limit,
|
||||
"limit"_a,
|
||||
R"pbdoc(
|
||||
Set the wired size limit.
|
||||
|
||||
.. note::
|
||||
* This function is only useful on macOS 15.0 or higher.
|
||||
* The wired limit should remain strictly less than the total
|
||||
memory size.
|
||||
|
||||
The wired limit is the total size in bytes of memory that will be kept
|
||||
resident. The default value is ``0``.
|
||||
|
||||
Setting a wired limit larger than system wired limit is an error. You can
|
||||
increase the system wired limit with:
|
||||
|
||||
.. code-block::
|
||||
|
||||
sudo sysctl iogpu.wired_limit_mb=<size_in_megabytes>
|
||||
|
||||
Use :func:`device_info` to query the system wired limit
|
||||
(``"max_recommended_working_set_size"``) and the total memory size
|
||||
(``"memory_size"``).
|
||||
|
||||
Args:
|
||||
limit (int): The wired limit in bytes.
|
||||
|
||||
Returns:
|
||||
int: The previous wired limit in bytes.
|
||||
)pbdoc");
|
||||
metal.def(
|
||||
"clear_cache",
|
||||
&mx::metal::clear_cache,
|
||||
R"pbdoc(
|
||||
Clear the memory cache.
|
||||
|
||||
After calling this, :func:`get_cache_memory` should return ``0``.
|
||||
)pbdoc");
|
||||
|
||||
[](size_t limit) {
|
||||
DEPRECATE("mx.metal.set_wired_limt", "mx.set_wired_limit");
|
||||
return mx::set_wired_limit(limit);
|
||||
},
|
||||
"limit"_a);
|
||||
metal.def("clear_cache", []() {
|
||||
DEPRECATE("mx.metal.clear_cache", "mx.clear_cache");
|
||||
mx::clear_cache();
|
||||
});
|
||||
metal.def(
|
||||
"start_capture",
|
||||
&mx::metal::start_capture,
|
||||
|
@@ -12,6 +12,7 @@ void init_array(nb::module_&);
|
||||
void init_device(nb::module_&);
|
||||
void init_stream(nb::module_&);
|
||||
void init_metal(nb::module_&);
|
||||
void init_memory(nb::module_&);
|
||||
void init_ops(nb::module_&);
|
||||
void init_transforms(nb::module_&);
|
||||
void init_random(nb::module_&);
|
||||
@@ -34,6 +35,7 @@ NB_MODULE(core, m) {
|
||||
init_stream(m);
|
||||
init_array(m);
|
||||
init_metal(m);
|
||||
init_memory(m);
|
||||
init_ops(m);
|
||||
init_transforms(m);
|
||||
init_random(m);
|
||||
|
Reference in New Issue
Block a user