mirror of
https://github.com/ml-explore/mlx.git
synced 2025-06-24 17:31:16 +08:00
Even Faster I/O (#1369)
* try multithreading for faster IO * smaller batch size * Account for pread returning less than size * nit --------- Co-authored-by: Angelos Katharopoulos <a_katharopoulos@apple.com>
This commit is contained in:
parent
4e22a1dffe
commit
fcb65a3897
@ -33,8 +33,7 @@ void Load::eval(const std::vector<array>& inputs, array& out) {
|
||||
assert(inputs.size() == 0);
|
||||
out.set_data(allocator::malloc_or_wait(out.nbytes()));
|
||||
|
||||
reader_->seek(offset_);
|
||||
reader_->read(out.data<char>(), out.nbytes());
|
||||
reader_->read(out.data<char>(), out.nbytes(), offset_);
|
||||
|
||||
if (swap_endianness_) {
|
||||
switch (out.itemsize()) {
|
||||
|
@ -298,7 +298,51 @@ array load(std::shared_ptr<io::Reader> in_stream, StreamOrDevice s) {
|
||||
|
||||
/** Load array from file in .npy format */
|
||||
array load(std::string file, StreamOrDevice s) {
|
||||
return load(std::make_shared<io::FileReader>(std::move(file)), s);
|
||||
return load(std::make_shared<io::ParallelFileReader>(std::move(file), 4), s);
|
||||
}
|
||||
|
||||
namespace io {
|
||||
|
||||
void ParallelFileReader::read(char* data, size_t n) {
|
||||
while (n != 0) {
|
||||
auto m = ::read(fd_, data, std::min(n, static_cast<size_t>(INT32_MAX)));
|
||||
if (m <= 0) {
|
||||
std::ostringstream msg;
|
||||
msg << "[read] Unable to read " << n << " bytes from file.";
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
data += m;
|
||||
n -= m;
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelFileReader::read(char* data, size_t n, size_t offset) {
|
||||
auto readfn = [fd = fd_](size_t offset, size_t size, char* buffer) -> bool {
|
||||
while (size != 0) {
|
||||
auto m = pread(fd, buffer, size, offset);
|
||||
if (m <= 0) {
|
||||
return false;
|
||||
}
|
||||
buffer += m;
|
||||
size -= m;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
std::vector<std::future<bool>> futs;
|
||||
while (n != 0) {
|
||||
size_t m = std::min(batch_size_, n);
|
||||
futs.emplace_back(thread_pool_.enqueue(readfn, offset, m, data));
|
||||
data += m;
|
||||
n -= m;
|
||||
offset += m;
|
||||
}
|
||||
for (auto& f : futs) {
|
||||
if (!f.get()) {
|
||||
throw std::runtime_error("[read] Unable to read from file.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace io
|
||||
|
||||
} // namespace mlx::core
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
|
||||
#include "mlx/io/threadpool.h"
|
||||
|
||||
namespace mlx::core {
|
||||
|
||||
namespace io {
|
||||
@ -21,6 +23,7 @@ class Reader {
|
||||
int64_t off,
|
||||
std::ios_base::seekdir way = std::ios_base::beg) = 0;
|
||||
virtual void read(char* data, size_t n) = 0;
|
||||
virtual void read(char* data, size_t n, size_t offset) = 0;
|
||||
virtual std::string label() const = 0;
|
||||
virtual ~Reader() = default;
|
||||
};
|
||||
@ -38,12 +41,14 @@ class Writer {
|
||||
virtual ~Writer() = default;
|
||||
};
|
||||
|
||||
class FileReader : public Reader {
|
||||
class ParallelFileReader : public Reader {
|
||||
public:
|
||||
explicit FileReader(std::string file_path)
|
||||
: fd_(open(file_path.c_str(), O_RDONLY)), label_(std::move(file_path)) {}
|
||||
explicit ParallelFileReader(std::string file_path, int num_threads)
|
||||
: fd_(open(file_path.c_str(), O_RDONLY)),
|
||||
label_(std::move(file_path)),
|
||||
thread_pool_(ThreadPool(num_threads)) {}
|
||||
|
||||
~FileReader() override {
|
||||
~ParallelFileReader() override {
|
||||
close(fd_);
|
||||
}
|
||||
|
||||
@ -59,35 +64,26 @@ class FileReader : public Reader {
|
||||
return lseek(fd_, 0, SEEK_CUR);
|
||||
}
|
||||
|
||||
void seek(int64_t off, std::ios_base::seekdir way = std::ios_base::beg)
|
||||
override {
|
||||
if (way == std::ios_base::beg) {
|
||||
lseek(fd_, off, 0);
|
||||
} else {
|
||||
lseek(fd_, off, SEEK_CUR);
|
||||
}
|
||||
void seek(int64_t, std::ios_base::seekdir = std::ios_base::beg) override {
|
||||
throw std::runtime_error("[ParallelFileReader::seek] Not allowed");
|
||||
}
|
||||
|
||||
void read(char* data, size_t n) override {
|
||||
while (n != 0) {
|
||||
auto m = ::read(fd_, data, std::min(n, static_cast<size_t>(INT32_MAX)));
|
||||
if (m <= 0) {
|
||||
std::ostringstream msg;
|
||||
msg << "[read] Unable to read " << n << " bytes from file.";
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
data += m;
|
||||
n -= m;
|
||||
}
|
||||
}
|
||||
// Warning: do not use this function from multiple threads as
|
||||
// it advances the file descriptor
|
||||
void read(char* data, size_t n) override;
|
||||
|
||||
void read(char* data, size_t n, size_t offset) override;
|
||||
|
||||
std::string label() const override {
|
||||
return "file " + label_;
|
||||
}
|
||||
|
||||
private:
|
||||
// 4MB
|
||||
static constexpr size_t batch_size_ = (1 << 22);
|
||||
int fd_;
|
||||
std::string label_;
|
||||
ThreadPool thread_pool_;
|
||||
};
|
||||
|
||||
class FileWriter : public Writer {
|
||||
|
@ -147,7 +147,7 @@ SafetensorsLoad load_safetensors(
|
||||
}
|
||||
|
||||
SafetensorsLoad load_safetensors(const std::string& file, StreamOrDevice s) {
|
||||
return load_safetensors(std::make_shared<io::FileReader>(file), s);
|
||||
return load_safetensors(std::make_shared<io::ParallelFileReader>(file, 4), s);
|
||||
}
|
||||
|
||||
void save_safetensors(
|
||||
|
86
mlx/io/threadpool.h
Normal file
86
mlx/io/threadpool.h
Normal file
@ -0,0 +1,86 @@
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <stdexcept>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
class ThreadPool {
|
||||
public:
|
||||
ThreadPool(size_t);
|
||||
template <class F, class... Args>
|
||||
auto enqueue(F&& f, Args&&... args)
|
||||
-> std::future<typename std::result_of_t<F(Args...)>>;
|
||||
~ThreadPool();
|
||||
|
||||
private:
|
||||
// need to keep track of threads so we can join them
|
||||
std::vector<std::thread> workers;
|
||||
// the task queue
|
||||
std::queue<std::function<void()>> tasks;
|
||||
|
||||
// synchronization
|
||||
std::mutex queue_mutex;
|
||||
std::condition_variable condition;
|
||||
bool stop;
|
||||
};
|
||||
|
||||
inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
|
||||
for (size_t i = 0; i < threads; ++i)
|
||||
workers.emplace_back([this] {
|
||||
for (;;) {
|
||||
std::function<void()> task;
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(this->queue_mutex);
|
||||
this->condition.wait(
|
||||
lock, [this] { return this->stop || !this->tasks.empty(); });
|
||||
if (this->stop && this->tasks.empty())
|
||||
return;
|
||||
task = std::move(this->tasks.front());
|
||||
this->tasks.pop();
|
||||
}
|
||||
|
||||
task();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <class F, class... Args>
|
||||
auto ThreadPool::enqueue(F&& f, Args&&... args)
|
||||
-> std::future<typename std::result_of_t<F(Args...)>> {
|
||||
using return_type = typename std::result_of_t<F(Args...)>;
|
||||
|
||||
auto task = std::make_shared<std::packaged_task<return_type()>>(
|
||||
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
|
||||
|
||||
std::future<return_type> res = task->get_future();
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||
|
||||
// don't allow enqueueing after stopping the pool
|
||||
if (stop) {
|
||||
throw std::runtime_error(
|
||||
"[ThreadPool::enqueue] Not allowed on stopped ThreadPool");
|
||||
}
|
||||
|
||||
tasks.emplace([task]() { (*task)(); });
|
||||
}
|
||||
condition.notify_one();
|
||||
return res;
|
||||
}
|
||||
|
||||
inline ThreadPool::~ThreadPool() {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||
stop = true;
|
||||
}
|
||||
condition.notify_all();
|
||||
for (std::thread& worker : workers)
|
||||
worker.join();
|
||||
}
|
@ -146,6 +146,11 @@ class PyFileReader : public io::Reader {
|
||||
}
|
||||
}
|
||||
|
||||
void read(char* data, size_t n, size_t offset) override {
|
||||
seek(offset);
|
||||
read(data, n);
|
||||
}
|
||||
|
||||
std::string label() const override {
|
||||
return "python file object";
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user