mirror of
https://github.com/ml-explore/mlx.git
synced 2025-10-22 02:58:16 +08:00
Even Even Faster IO (#1374)
* even more faster io * make reader pool static * make python reader thread safe * one more optimization
This commit is contained in:
@@ -202,15 +202,18 @@ void Load::eval_gpu(const std::vector<array>& inputs, array& out) {
|
||||
static Stream io_stream = new_stream(Device::cpu);
|
||||
out.set_data(allocator::malloc_or_wait(out.nbytes()));
|
||||
|
||||
auto task = [out = out,
|
||||
offset = offset_,
|
||||
reader = reader_,
|
||||
swap_endianness = swap_endianness_]() mutable {
|
||||
auto read_task = [out = out,
|
||||
offset = offset_,
|
||||
reader = reader_,
|
||||
swap_endianness = swap_endianness_]() mutable {
|
||||
load(out, offset, reader, swap_endianness);
|
||||
};
|
||||
auto fut = io::thread_pool().enqueue(std::move(read_task)).share();
|
||||
auto signal_task = [out = out, fut = std::move(fut)]() {
|
||||
fut.wait();
|
||||
out.event().signal();
|
||||
};
|
||||
|
||||
scheduler::enqueue(io_stream, std::move(task));
|
||||
scheduler::enqueue(io_stream, std::move(signal_task));
|
||||
auto& d = metal::device(stream().device);
|
||||
d.end_encoding(stream().index);
|
||||
auto command_buffer = d.get_command_buffer(stream().index);
|
||||
|
Reference in New Issue
Block a user