move memory APIs into top level mlx.core (#1982)

2025-12-16 01:49:05 +08:00 · 2025-03-21 07:25:12 -07:00
parent 65a38c452b
commit 4e1994e9d7
25 changed files with 418 additions and 323 deletions
--- a/mlx/backend/metal/allocator.cpp
+++ b/mlx/backend/metal/allocator.cpp
@@ -3,6 +3,7 @@
 #include "mlx/backend/metal/metal.h"
 #include "mlx/backend/metal/metal_impl.h"
 #include "mlx/backend/metal/resident.h"
+#include "mlx/memory.h"

 #include <mach/vm_page_size.h>
 #include <unistd.h>
@@ -323,40 +324,40 @@ MetalAllocator& allocator() {
  return *allocator_;
 }

+} // namespace metal
+
 size_t set_cache_limit(size_t limit) {
-  return allocator().set_cache_limit(limit);
+  return metal::allocator().set_cache_limit(limit);
 }
 size_t set_memory_limit(size_t limit) {
-  return allocator().set_memory_limit(limit);
+  return metal::allocator().set_memory_limit(limit);
 }
 size_t get_memory_limit() {
-  return allocator().get_memory_limit();
+  return metal::allocator().get_memory_limit();
 }
 size_t set_wired_limit(size_t limit) {
-  if (limit >
-      std::get<size_t>(device_info().at("max_recommended_working_set_size"))) {
+  if (limit > std::get<size_t>(metal::device_info().at(
+                  "max_recommended_working_set_size"))) {
    throw std::invalid_argument(
        "[metal::set_wired_limit] Setting a wired limit larger than "
        "the maximum working set size is not allowed.");
  }
-  return allocator().set_wired_limit(limit);
+  return metal::allocator().set_wired_limit(limit);
 }
 size_t get_active_memory() {
-  return allocator().get_active_memory();
+  return metal::allocator().get_active_memory();
 }
 size_t get_peak_memory() {
-  return allocator().get_peak_memory();
+  return metal::allocator().get_peak_memory();
 }
 void reset_peak_memory() {
-  allocator().reset_peak_memory();
+  metal::allocator().reset_peak_memory();
 }
 size_t get_cache_memory() {
-  return allocator().get_cache_memory();
+  return metal::allocator().get_cache_memory();
 }
 void clear_cache() {
-  return allocator().clear_cache();
+  return metal::allocator().clear_cache();
 }

-} // namespace metal
-
 } // namespace mlx::core
--- a/mlx/backend/metal/metal.h
+++ b/mlx/backend/metal/metal.h
@@ -12,74 +12,6 @@ namespace mlx::core::metal {
 /* Check if the Metal backend is available. */
 bool is_available();

-/* Get the actively used memory in bytes.
- *
- * Note, this will not always match memory use reported by the system because
- * it does not include cached memory buffers.
- * */
-size_t get_active_memory();
-
-/* Get the peak amount of used memory in bytes.
- *
- * The maximum memory used recorded from the beginning of the program
- * execution or since the last call to reset_peak_memory.
- * */
-size_t get_peak_memory();
-
-/* Reset the peak memory to zero.
- * */
-void reset_peak_memory();
-
-/* Get the cache size in bytes.
- *
- * The cache includes memory not currently used that has not been returned
- * to the system allocator.
- * */
-size_t get_cache_memory();
-
-/* Set the memory limit.
- * The memory limit is a guideline for the maximum amount of memory to use
- * during graph evaluation. If the memory limit is exceeded and there is no
- * more RAM (including swap when available) allocations will result in an
- * exception.
- *
- * When metal is available the memory limit defaults to 1.5 times the maximum
- * recommended working set size reported by the device.
- *
- * Returns the previous memory limit.
- * */
-size_t set_memory_limit(size_t limit);
-
-/* Get the current memory limit. */
-size_t get_memory_limit();
-
-/* Set the free cache limit.
- * If using more than the given limit, free memory will be reclaimed
- * from the cache on the next allocation. To disable the cache,
- * set the limit to 0.
- *
- * The cache limit defaults to the memory limit.
- *
- * Returns the previous cache limit.
- * */
-size_t set_cache_limit(size_t limit);
-
-/* Clear the memory cache. */
-void clear_cache();
-
-/* Set the wired size limit.
- *
- * Note, this function is only useful for macOS 15.0 or higher.
- *
- * The wired limit is the total size in bytes of memory that will be kept
- * resident. The default value is ``0``.
- *
- * Setting a wired limit larger than system wired limit is an error.
- *
- * Returns the previous wired limit.
- * */
-size_t set_wired_limit(size_t limit);
-
 /** Capture a GPU trace, saving it to an absolute file `path` */
 void start_capture(std::string path = "");
 void stop_capture();
--- a/mlx/backend/no_metal/allocator.cpp
+++ b/mlx/backend/no_metal/allocator.cpp
@@ -2,7 +2,9 @@

 #include "mlx/allocator.h"

-namespace mlx::core::allocator {
+namespace mlx::core {
+
+namespace allocator {

 Allocator& allocator() {
  static CommonAllocator allocator_;
@@ -15,5 +17,30 @@ void* Buffer::raw_ptr() {
  }
  return static_cast<size_t*>(ptr_) + 1;
 }
+} // namespace allocator

-} // namespace mlx::core::allocator
+size_t get_active_memory() {
+  return 0;
+}
+size_t get_peak_memory() {
+  return 0;
+}
+void reset_peak_memory() {}
+size_t get_cache_memory() {
+  return 0;
+}
+size_t set_memory_limit(size_t) {
+  return 0;
+}
+size_t get_memory_limit() {
+  return 0;
+}
+size_t set_cache_limit(size_t) {
+  return 0;
+}
+size_t set_wired_limit(size_t) {
+  return 0;
+}
+void clear_cache() {}
+
+} // namespace mlx::core
--- a/mlx/backend/no_metal/metal.cpp
+++ b/mlx/backend/no_metal/metal.cpp
@@ -31,33 +31,8 @@ void synchronize(Stream) {
      "[metal::synchronize] Cannot synchronize GPU without metal backend");
 }

-// No-ops when Metal is not available.
-size_t get_active_memory() {
-  return 0;
-}
-size_t get_peak_memory() {
-  return 0;
-}
-void reset_peak_memory() {}
-size_t get_cache_memory() {
-  return 0;
-}
-size_t set_memory_limit(size_t) {
-  return 0;
-}
-size_t get_memory_limit() {
-  return 0;
-}
-size_t set_cache_limit(size_t) {
-  return 0;
-}
-size_t set_wired_limit(size_t) {
-  return 0;
-}
-
 void start_capture(std::string) {}
 void stop_capture() {}
-void clear_cache() {}

 const std::unordered_map<std::string, std::variant<std::string, size_t>>&
 device_info() {
--- a/mlx/memory.h
+++ b/mlx/memory.h
@@ -0,0 +1,78 @@
+// Copyright © 2025 Apple Inc.
+
+#pragma once
+
+#include <cstdlib>
+
+namespace mlx::core {
+
+/* Get the actively used memory in bytes.
+ *
+ * Note, this will not always match memory use reported by the system because
+ * it does not include cached memory buffers.
+ * */
+size_t get_active_memory();
+
+/* Get the peak amount of used memory in bytes.
+ *
+ * The maximum memory used recorded from the beginning of the program
+ * execution or since the last call to reset_peak_memory.
+ * */
+size_t get_peak_memory();
+
+/* Reset the peak memory to zero.
+ * */
+void reset_peak_memory();
+
+/* Get the cache size in bytes.
+ *
+ * The cache includes memory not currently used that has not been returned
+ * to the system allocator.
+ * */
+size_t get_cache_memory();
+
+/* Set the memory limit.
+ * The memory limit is a guideline for the maximum amount of memory to use
+ * during graph evaluation. If the memory limit is exceeded and there is no
+ * more RAM (including swap when available) allocations will result in an
+ * exception.
+ *
+ * When Metal is available the memory limit defaults to 1.5 times the maximum
+ * recommended working set size reported by the device.
+ *
+ * Returns the previous memory limit.
+ * */
+size_t set_memory_limit(size_t limit);
+
+/* Get the current memory limit. */
+size_t get_memory_limit();
+
+/* Set the cache limit.
+ * If using more than the given limit, free memory will be reclaimed
+ * from the cache on the next allocation. To disable the cache,
+ * set the limit to 0.
+ *
+ * The cache limit defaults to the memory limit.
+ *
+ * Returns the previous cache limit.
+ * */
+size_t set_cache_limit(size_t limit);
+
+/* Clear the memory cache. */
+void clear_cache();
+
+/* Set the wired size limit.
+ *
+ * Note, this function is only useful when using the Metal backend with
+ * macOS 15.0 or higher.
+ *
+ * The wired limit is the total size in bytes of memory that will be kept
+ * resident. The default value is ``0``.
+ *
+ * Setting a wired limit larger than system wired limit is an error.
+ *
+ * Returns the previous wired limit.
+ * */
+size_t set_wired_limit(size_t limit);
+
+} // namespace mlx::core
--- a/mlx/mlx.h
+++ b/mlx/mlx.h
@@ -14,6 +14,7 @@
 #include "mlx/fft.h"
 #include "mlx/io.h"
 #include "mlx/linalg.h"
+#include "mlx/memory.h"
 #include "mlx/ops.h"
 #include "mlx/random.h"
 #include "mlx/stream.h"
--- a/mlx/transforms.cpp
+++ b/mlx/transforms.cpp
@@ -12,6 +12,7 @@
 #include "mlx/backend/cpu/eval.h"
 #include "mlx/backend/metal/metal_impl.h"
 #include "mlx/fence.h"
+#include "mlx/memory.h"
 #include "mlx/ops.h"
 #include "mlx/primitives.h"
 #include "mlx/scheduler.h"
@@ -219,7 +220,7 @@ array eval_impl(std::vector<array> outputs, bool async) {
    }

    if (scheduler::n_active_tasks() > MAX_ACTIVE_TASKS ||
-        (metal::get_active_memory() > metal::get_memory_limit() &&
+        (get_active_memory() > get_memory_limit() &&
         scheduler::n_active_tasks() > 0)) {
      // Commit any open streams
      for (auto& [_, e] : events) {
@@ -228,8 +229,7 @@ array eval_impl(std::vector<array> outputs, bool async) {
        }
      }
      scheduler::wait_for_one();
-      // TODO memory api should be moved out of metal
-      while (metal::get_active_memory() > metal::get_memory_limit() &&
+      while (get_active_memory() > get_memory_limit() &&
             scheduler::n_active_tasks() > 0) {
        scheduler::wait_for_one();
      }