mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-18 18:28:12 +08:00
Improve profiling with gpu tracing (#969)
* improve profiling with gpu tracing * fix for linux * nit * doc fix * fix example
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
// Copyright © 2023-2024 Apple Inc.
|
||||
#include "mlx/backend/metal/allocator.h"
|
||||
#include "mlx/backend/metal/metal.h"
|
||||
#include "mlx/backend/metal/metal_impl.h"
|
||||
|
||||
#include <mach/vm_page_size.h>
|
||||
#include <unistd.h>
|
||||
|
@@ -11,6 +11,7 @@
|
||||
|
||||
#include "mlx/backend/metal/device.h"
|
||||
#include "mlx/backend/metal/metal.h"
|
||||
#include "mlx/backend/metal/metal_impl.h"
|
||||
#include "mlx/backend/metal/mps/gemm.h"
|
||||
#include "mlx/backend/metal/utils.h"
|
||||
|
||||
|
@@ -1,5 +1,4 @@
|
||||
// Copyright © 2023-2024 Apple Inc.
|
||||
|
||||
#include <cstdlib>
|
||||
#include <future>
|
||||
#include <memory>
|
||||
@@ -16,9 +15,6 @@ bool is_available() {
|
||||
}
|
||||
|
||||
int max_ops_per_buffer() {
|
||||
#ifdef MLX_METAL_DEBUG
|
||||
return 1;
|
||||
#else
|
||||
auto get_val = []() {
|
||||
if (const char* buff_str = std::getenv("MLX_MAX_OPS_PER_BUFFER")) {
|
||||
return atoi(buff_str);
|
||||
@@ -28,7 +24,6 @@ int max_ops_per_buffer() {
|
||||
};
|
||||
static int max_ops_per_buffer_ = get_val();
|
||||
return max_ops_per_buffer_;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define MAX_OPS_PER_BUFFER max_ops_per_buffer()
|
||||
|
@@ -2,15 +2,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "mlx/array.h"
|
||||
#include "mlx/stream.h"
|
||||
|
||||
namespace mlx::core::metal {
|
||||
|
||||
/* Check if the Metal backend is available. */
|
||||
bool is_available();
|
||||
|
||||
/* Get the actively used memory in bytes.
|
||||
@@ -58,14 +54,6 @@ size_t set_memory_limit(size_t limit, bool relaxed = true);
|
||||
* */
|
||||
size_t set_cache_limit(size_t limit);
|
||||
|
||||
void new_stream(Stream stream);
|
||||
std::shared_ptr<void> new_scoped_memory_pool();
|
||||
|
||||
std::function<void()> make_task(
|
||||
array& arr,
|
||||
std::vector<std::shared_future<void>> deps,
|
||||
std::shared_ptr<std::promise<void>> p);
|
||||
|
||||
/** Capture a GPU trace, saving it to an absolute file `path` */
|
||||
bool start_capture(std::string path = "");
|
||||
void stop_capture();
|
||||
|
22
mlx/backend/metal/metal_impl.h
Normal file
22
mlx/backend/metal/metal_impl.h
Normal file
@@ -0,0 +1,22 @@
|
||||
// Copyright © 2023-2024 Apple Inc.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "mlx/array.h"
|
||||
#include "mlx/stream.h"
|
||||
|
||||
namespace mlx::core::metal {
|
||||
|
||||
void new_stream(Stream stream);
|
||||
std::shared_ptr<void> new_scoped_memory_pool();
|
||||
|
||||
std::function<void()> make_task(
|
||||
array& arr,
|
||||
std::vector<std::shared_future<void>> deps,
|
||||
std::shared_ptr<std::promise<void>> p);
|
||||
|
||||
} // namespace mlx::core::metal
|
@@ -142,6 +142,9 @@ inline void debug_set_primitive_buffer_label(
|
||||
Primitive& primitive) {
|
||||
#ifdef MLX_METAL_DEBUG
|
||||
std::ostringstream label;
|
||||
if (auto cbuf_label = command_buffer->label(); cbuf_label) {
|
||||
label << cbuf_label->utf8String();
|
||||
}
|
||||
primitive.print(label);
|
||||
command_buffer->setLabel(make_string(label));
|
||||
#endif
|
||||
|
@@ -3,6 +3,7 @@
|
||||
#include <stdexcept>
|
||||
|
||||
#include "mlx/backend/metal/metal.h"
|
||||
#include "mlx/backend/metal/metal_impl.h"
|
||||
|
||||
namespace mlx::core::metal {
|
||||
|
||||
|
@@ -9,6 +9,7 @@
|
||||
#include <unordered_map>
|
||||
|
||||
#include "mlx/backend/metal/metal.h"
|
||||
#include "mlx/backend/metal/metal_impl.h"
|
||||
#include "mlx/device.h"
|
||||
#include "mlx/stream.h"
|
||||
|
||||
|
@@ -7,7 +7,7 @@
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "mlx/backend/metal/metal.h"
|
||||
#include "mlx/backend/metal/metal_impl.h"
|
||||
#include "mlx/ops.h"
|
||||
#include "mlx/primitives.h"
|
||||
#include "mlx/scheduler.h"
|
||||
|
Reference in New Issue
Block a user