mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-18 01:50:16 +08:00
move memory APIs into top level mlx.core (#1982)
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
#include "mlx/backend/metal/metal.h"
|
||||
#include "mlx/backend/metal/metal_impl.h"
|
||||
#include "mlx/backend/metal/resident.h"
|
||||
#include "mlx/memory.h"
|
||||
|
||||
#include <mach/vm_page_size.h>
|
||||
#include <unistd.h>
|
||||
@@ -323,40 +324,40 @@ MetalAllocator& allocator() {
|
||||
return *allocator_;
|
||||
}
|
||||
|
||||
} // namespace metal
|
||||
|
||||
size_t set_cache_limit(size_t limit) {
|
||||
return allocator().set_cache_limit(limit);
|
||||
return metal::allocator().set_cache_limit(limit);
|
||||
}
|
||||
size_t set_memory_limit(size_t limit) {
|
||||
return allocator().set_memory_limit(limit);
|
||||
return metal::allocator().set_memory_limit(limit);
|
||||
}
|
||||
size_t get_memory_limit() {
|
||||
return allocator().get_memory_limit();
|
||||
return metal::allocator().get_memory_limit();
|
||||
}
|
||||
size_t set_wired_limit(size_t limit) {
|
||||
if (limit >
|
||||
std::get<size_t>(device_info().at("max_recommended_working_set_size"))) {
|
||||
if (limit > std::get<size_t>(metal::device_info().at(
|
||||
"max_recommended_working_set_size"))) {
|
||||
throw std::invalid_argument(
|
||||
"[metal::set_wired_limit] Setting a wired limit larger than "
|
||||
"the maximum working set size is not allowed.");
|
||||
}
|
||||
return allocator().set_wired_limit(limit);
|
||||
return metal::allocator().set_wired_limit(limit);
|
||||
}
|
||||
size_t get_active_memory() {
|
||||
return allocator().get_active_memory();
|
||||
return metal::allocator().get_active_memory();
|
||||
}
|
||||
size_t get_peak_memory() {
|
||||
return allocator().get_peak_memory();
|
||||
return metal::allocator().get_peak_memory();
|
||||
}
|
||||
void reset_peak_memory() {
|
||||
allocator().reset_peak_memory();
|
||||
metal::allocator().reset_peak_memory();
|
||||
}
|
||||
size_t get_cache_memory() {
|
||||
return allocator().get_cache_memory();
|
||||
return metal::allocator().get_cache_memory();
|
||||
}
|
||||
void clear_cache() {
|
||||
return allocator().clear_cache();
|
||||
return metal::allocator().clear_cache();
|
||||
}
|
||||
|
||||
} // namespace metal
|
||||
|
||||
} // namespace mlx::core
|
||||
|
@@ -12,74 +12,6 @@ namespace mlx::core::metal {
|
||||
/* Check if the Metal backend is available. */
|
||||
bool is_available();
|
||||
|
||||
/* Get the actively used memory in bytes.
|
||||
*
|
||||
* Note, this will not always match memory use reported by the system because
|
||||
* it does not include cached memory buffers.
|
||||
* */
|
||||
size_t get_active_memory();
|
||||
|
||||
/* Get the peak amount of used memory in bytes.
|
||||
*
|
||||
* The maximum memory used recorded from the beginning of the program
|
||||
* execution or since the last call to reset_peak_memory.
|
||||
* */
|
||||
size_t get_peak_memory();
|
||||
|
||||
/* Reset the peak memory to zero.
|
||||
* */
|
||||
void reset_peak_memory();
|
||||
|
||||
/* Get the cache size in bytes.
|
||||
*
|
||||
* The cache includes memory not currently used that has not been returned
|
||||
* to the system allocator.
|
||||
* */
|
||||
size_t get_cache_memory();
|
||||
|
||||
/* Set the memory limit.
|
||||
* The memory limit is a guideline for the maximum amount of memory to use
|
||||
* during graph evaluation. If the memory limit is exceeded and there is no
|
||||
* more RAM (including swap when available) allocations will result in an
|
||||
* exception.
|
||||
*
|
||||
* When metal is available the memory limit defaults to 1.5 times the maximum
|
||||
* recommended working set size reported by the device.
|
||||
*
|
||||
* Returns the previous memory limit.
|
||||
* */
|
||||
size_t set_memory_limit(size_t limit);
|
||||
|
||||
/* Get the current memory limit. */
|
||||
size_t get_memory_limit();
|
||||
|
||||
/* Set the free cache limit.
|
||||
* If using more than the given limit, free memory will be reclaimed
|
||||
* from the cache on the next allocation. To disable the cache,
|
||||
* set the limit to 0.
|
||||
*
|
||||
* The cache limit defaults to the memory limit.
|
||||
*
|
||||
* Returns the previous cache limit.
|
||||
* */
|
||||
size_t set_cache_limit(size_t limit);
|
||||
|
||||
/* Clear the memory cache. */
|
||||
void clear_cache();
|
||||
|
||||
/* Set the wired size limit.
|
||||
*
|
||||
* Note, this function is only useful for macOS 15.0 or higher.
|
||||
*
|
||||
* The wired limit is the total size in bytes of memory that will be kept
|
||||
* resident. The default value is ``0``.
|
||||
*
|
||||
* Setting a wired limit larger than system wired limit is an error.
|
||||
*
|
||||
* Returns the previous wired limit.
|
||||
* */
|
||||
size_t set_wired_limit(size_t limit);
|
||||
|
||||
/** Capture a GPU trace, saving it to an absolute file `path` */
|
||||
void start_capture(std::string path = "");
|
||||
void stop_capture();
|
||||
|
@@ -2,7 +2,9 @@
|
||||
|
||||
#include "mlx/allocator.h"
|
||||
|
||||
namespace mlx::core::allocator {
|
||||
namespace mlx::core {
|
||||
|
||||
namespace allocator {
|
||||
|
||||
Allocator& allocator() {
|
||||
static CommonAllocator allocator_;
|
||||
@@ -15,5 +17,30 @@ void* Buffer::raw_ptr() {
|
||||
}
|
||||
return static_cast<size_t*>(ptr_) + 1;
|
||||
}
|
||||
} // namespace allocator
|
||||
|
||||
} // namespace mlx::core::allocator
|
||||
size_t get_active_memory() {
|
||||
return 0;
|
||||
}
|
||||
size_t get_peak_memory() {
|
||||
return 0;
|
||||
}
|
||||
void reset_peak_memory() {}
|
||||
size_t get_cache_memory() {
|
||||
return 0;
|
||||
}
|
||||
size_t set_memory_limit(size_t) {
|
||||
return 0;
|
||||
}
|
||||
size_t get_memory_limit() {
|
||||
return 0;
|
||||
}
|
||||
size_t set_cache_limit(size_t) {
|
||||
return 0;
|
||||
}
|
||||
size_t set_wired_limit(size_t) {
|
||||
return 0;
|
||||
}
|
||||
void clear_cache() {}
|
||||
|
||||
} // namespace mlx::core
|
||||
|
@@ -31,33 +31,8 @@ void synchronize(Stream) {
|
||||
"[metal::synchronize] Cannot synchronize GPU without metal backend");
|
||||
}
|
||||
|
||||
// No-ops when Metal is not available.
|
||||
size_t get_active_memory() {
|
||||
return 0;
|
||||
}
|
||||
size_t get_peak_memory() {
|
||||
return 0;
|
||||
}
|
||||
void reset_peak_memory() {}
|
||||
size_t get_cache_memory() {
|
||||
return 0;
|
||||
}
|
||||
size_t set_memory_limit(size_t) {
|
||||
return 0;
|
||||
}
|
||||
size_t get_memory_limit() {
|
||||
return 0;
|
||||
}
|
||||
size_t set_cache_limit(size_t) {
|
||||
return 0;
|
||||
}
|
||||
size_t set_wired_limit(size_t) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void start_capture(std::string) {}
|
||||
void stop_capture() {}
|
||||
void clear_cache() {}
|
||||
|
||||
const std::unordered_map<std::string, std::variant<std::string, size_t>>&
|
||||
device_info() {
|
||||
|
78
mlx/memory.h
Normal file
78
mlx/memory.h
Normal file
@@ -0,0 +1,78 @@
|
||||
// Copyright © 2025 Apple Inc.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
namespace mlx::core {
|
||||
|
||||
/* Get the actively used memory in bytes.
|
||||
*
|
||||
* Note, this will not always match memory use reported by the system because
|
||||
* it does not include cached memory buffers.
|
||||
* */
|
||||
size_t get_active_memory();
|
||||
|
||||
/* Get the peak amount of used memory in bytes.
|
||||
*
|
||||
* The maximum memory used recorded from the beginning of the program
|
||||
* execution or since the last call to reset_peak_memory.
|
||||
* */
|
||||
size_t get_peak_memory();
|
||||
|
||||
/* Reset the peak memory to zero.
|
||||
* */
|
||||
void reset_peak_memory();
|
||||
|
||||
/* Get the cache size in bytes.
|
||||
*
|
||||
* The cache includes memory not currently used that has not been returned
|
||||
* to the system allocator.
|
||||
* */
|
||||
size_t get_cache_memory();
|
||||
|
||||
/* Set the memory limit.
|
||||
* The memory limit is a guideline for the maximum amount of memory to use
|
||||
* during graph evaluation. If the memory limit is exceeded and there is no
|
||||
* more RAM (including swap when available) allocations will result in an
|
||||
* exception.
|
||||
*
|
||||
* When Metal is available the memory limit defaults to 1.5 times the maximum
|
||||
* recommended working set size reported by the device.
|
||||
*
|
||||
* Returns the previous memory limit.
|
||||
* */
|
||||
size_t set_memory_limit(size_t limit);
|
||||
|
||||
/* Get the current memory limit. */
|
||||
size_t get_memory_limit();
|
||||
|
||||
/* Set the cache limit.
|
||||
* If using more than the given limit, free memory will be reclaimed
|
||||
* from the cache on the next allocation. To disable the cache,
|
||||
* set the limit to 0.
|
||||
*
|
||||
* The cache limit defaults to the memory limit.
|
||||
*
|
||||
* Returns the previous cache limit.
|
||||
* */
|
||||
size_t set_cache_limit(size_t limit);
|
||||
|
||||
/* Clear the memory cache. */
|
||||
void clear_cache();
|
||||
|
||||
/* Set the wired size limit.
|
||||
*
|
||||
* Note, this function is only useful when using the Metal backend with
|
||||
* macOS 15.0 or higher.
|
||||
*
|
||||
* The wired limit is the total size in bytes of memory that will be kept
|
||||
* resident. The default value is ``0``.
|
||||
*
|
||||
* Setting a wired limit larger than system wired limit is an error.
|
||||
*
|
||||
* Returns the previous wired limit.
|
||||
* */
|
||||
size_t set_wired_limit(size_t limit);
|
||||
|
||||
} // namespace mlx::core
|
@@ -14,6 +14,7 @@
|
||||
#include "mlx/fft.h"
|
||||
#include "mlx/io.h"
|
||||
#include "mlx/linalg.h"
|
||||
#include "mlx/memory.h"
|
||||
#include "mlx/ops.h"
|
||||
#include "mlx/random.h"
|
||||
#include "mlx/stream.h"
|
||||
|
@@ -12,6 +12,7 @@
|
||||
#include "mlx/backend/cpu/eval.h"
|
||||
#include "mlx/backend/metal/metal_impl.h"
|
||||
#include "mlx/fence.h"
|
||||
#include "mlx/memory.h"
|
||||
#include "mlx/ops.h"
|
||||
#include "mlx/primitives.h"
|
||||
#include "mlx/scheduler.h"
|
||||
@@ -219,7 +220,7 @@ array eval_impl(std::vector<array> outputs, bool async) {
|
||||
}
|
||||
|
||||
if (scheduler::n_active_tasks() > MAX_ACTIVE_TASKS ||
|
||||
(metal::get_active_memory() > metal::get_memory_limit() &&
|
||||
(get_active_memory() > get_memory_limit() &&
|
||||
scheduler::n_active_tasks() > 0)) {
|
||||
// Commit any open streams
|
||||
for (auto& [_, e] : events) {
|
||||
@@ -228,8 +229,7 @@ array eval_impl(std::vector<array> outputs, bool async) {
|
||||
}
|
||||
}
|
||||
scheduler::wait_for_one();
|
||||
// TODO memory api should be moved out of metal
|
||||
while (metal::get_active_memory() > metal::get_memory_limit() &&
|
||||
while (get_active_memory() > get_memory_limit() &&
|
||||
scheduler::n_active_tasks() > 0) {
|
||||
scheduler::wait_for_one();
|
||||
}
|
||||
|
Reference in New Issue
Block a user