mirror of
https://github.com/ml-explore/mlx.git
synced 2025-06-25 01:41:17 +08:00
Even Faster I/O (#1369)
* try multithreading for faster IO * smaller batch size * Account for pread returning less than size * nit --------- Co-authored-by: Angelos Katharopoulos <a_katharopoulos@apple.com>
This commit is contained in:
parent
4e22a1dffe
commit
fcb65a3897
@ -33,8 +33,7 @@ void Load::eval(const std::vector<array>& inputs, array& out) {
|
|||||||
assert(inputs.size() == 0);
|
assert(inputs.size() == 0);
|
||||||
out.set_data(allocator::malloc_or_wait(out.nbytes()));
|
out.set_data(allocator::malloc_or_wait(out.nbytes()));
|
||||||
|
|
||||||
reader_->seek(offset_);
|
reader_->read(out.data<char>(), out.nbytes(), offset_);
|
||||||
reader_->read(out.data<char>(), out.nbytes());
|
|
||||||
|
|
||||||
if (swap_endianness_) {
|
if (swap_endianness_) {
|
||||||
switch (out.itemsize()) {
|
switch (out.itemsize()) {
|
||||||
|
@ -298,7 +298,51 @@ array load(std::shared_ptr<io::Reader> in_stream, StreamOrDevice s) {
|
|||||||
|
|
||||||
/** Load array from file in .npy format */
|
/** Load array from file in .npy format */
|
||||||
array load(std::string file, StreamOrDevice s) {
|
array load(std::string file, StreamOrDevice s) {
|
||||||
return load(std::make_shared<io::FileReader>(std::move(file)), s);
|
return load(std::make_shared<io::ParallelFileReader>(std::move(file), 4), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace io {
|
||||||
|
|
||||||
|
void ParallelFileReader::read(char* data, size_t n) {
|
||||||
|
while (n != 0) {
|
||||||
|
auto m = ::read(fd_, data, std::min(n, static_cast<size_t>(INT32_MAX)));
|
||||||
|
if (m <= 0) {
|
||||||
|
std::ostringstream msg;
|
||||||
|
msg << "[read] Unable to read " << n << " bytes from file.";
|
||||||
|
throw std::runtime_error(msg.str());
|
||||||
|
}
|
||||||
|
data += m;
|
||||||
|
n -= m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ParallelFileReader::read(char* data, size_t n, size_t offset) {
|
||||||
|
auto readfn = [fd = fd_](size_t offset, size_t size, char* buffer) -> bool {
|
||||||
|
while (size != 0) {
|
||||||
|
auto m = pread(fd, buffer, size, offset);
|
||||||
|
if (m <= 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
buffer += m;
|
||||||
|
size -= m;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
std::vector<std::future<bool>> futs;
|
||||||
|
while (n != 0) {
|
||||||
|
size_t m = std::min(batch_size_, n);
|
||||||
|
futs.emplace_back(thread_pool_.enqueue(readfn, offset, m, data));
|
||||||
|
data += m;
|
||||||
|
n -= m;
|
||||||
|
offset += m;
|
||||||
|
}
|
||||||
|
for (auto& f : futs) {
|
||||||
|
if (!f.get()) {
|
||||||
|
throw std::runtime_error("[read] Unable to read from file.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace io
|
||||||
|
|
||||||
} // namespace mlx::core
|
} // namespace mlx::core
|
||||||
|
@ -8,6 +8,8 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
|
#include "mlx/io/threadpool.h"
|
||||||
|
|
||||||
namespace mlx::core {
|
namespace mlx::core {
|
||||||
|
|
||||||
namespace io {
|
namespace io {
|
||||||
@ -21,6 +23,7 @@ class Reader {
|
|||||||
int64_t off,
|
int64_t off,
|
||||||
std::ios_base::seekdir way = std::ios_base::beg) = 0;
|
std::ios_base::seekdir way = std::ios_base::beg) = 0;
|
||||||
virtual void read(char* data, size_t n) = 0;
|
virtual void read(char* data, size_t n) = 0;
|
||||||
|
virtual void read(char* data, size_t n, size_t offset) = 0;
|
||||||
virtual std::string label() const = 0;
|
virtual std::string label() const = 0;
|
||||||
virtual ~Reader() = default;
|
virtual ~Reader() = default;
|
||||||
};
|
};
|
||||||
@ -38,12 +41,14 @@ class Writer {
|
|||||||
virtual ~Writer() = default;
|
virtual ~Writer() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
class FileReader : public Reader {
|
class ParallelFileReader : public Reader {
|
||||||
public:
|
public:
|
||||||
explicit FileReader(std::string file_path)
|
explicit ParallelFileReader(std::string file_path, int num_threads)
|
||||||
: fd_(open(file_path.c_str(), O_RDONLY)), label_(std::move(file_path)) {}
|
: fd_(open(file_path.c_str(), O_RDONLY)),
|
||||||
|
label_(std::move(file_path)),
|
||||||
|
thread_pool_(ThreadPool(num_threads)) {}
|
||||||
|
|
||||||
~FileReader() override {
|
~ParallelFileReader() override {
|
||||||
close(fd_);
|
close(fd_);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,35 +64,26 @@ class FileReader : public Reader {
|
|||||||
return lseek(fd_, 0, SEEK_CUR);
|
return lseek(fd_, 0, SEEK_CUR);
|
||||||
}
|
}
|
||||||
|
|
||||||
void seek(int64_t off, std::ios_base::seekdir way = std::ios_base::beg)
|
void seek(int64_t, std::ios_base::seekdir = std::ios_base::beg) override {
|
||||||
override {
|
throw std::runtime_error("[ParallelFileReader::seek] Not allowed");
|
||||||
if (way == std::ios_base::beg) {
|
|
||||||
lseek(fd_, off, 0);
|
|
||||||
} else {
|
|
||||||
lseek(fd_, off, SEEK_CUR);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void read(char* data, size_t n) override {
|
// Warning: do not use this function from multiple threads as
|
||||||
while (n != 0) {
|
// it advances the file descriptor
|
||||||
auto m = ::read(fd_, data, std::min(n, static_cast<size_t>(INT32_MAX)));
|
void read(char* data, size_t n) override;
|
||||||
if (m <= 0) {
|
|
||||||
std::ostringstream msg;
|
void read(char* data, size_t n, size_t offset) override;
|
||||||
msg << "[read] Unable to read " << n << " bytes from file.";
|
|
||||||
throw std::runtime_error(msg.str());
|
|
||||||
}
|
|
||||||
data += m;
|
|
||||||
n -= m;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string label() const override {
|
std::string label() const override {
|
||||||
return "file " + label_;
|
return "file " + label_;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// 4MB
|
||||||
|
static constexpr size_t batch_size_ = (1 << 22);
|
||||||
int fd_;
|
int fd_;
|
||||||
std::string label_;
|
std::string label_;
|
||||||
|
ThreadPool thread_pool_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class FileWriter : public Writer {
|
class FileWriter : public Writer {
|
||||||
|
@ -147,7 +147,7 @@ SafetensorsLoad load_safetensors(
|
|||||||
}
|
}
|
||||||
|
|
||||||
SafetensorsLoad load_safetensors(const std::string& file, StreamOrDevice s) {
|
SafetensorsLoad load_safetensors(const std::string& file, StreamOrDevice s) {
|
||||||
return load_safetensors(std::make_shared<io::FileReader>(file), s);
|
return load_safetensors(std::make_shared<io::ParallelFileReader>(file, 4), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
void save_safetensors(
|
void save_safetensors(
|
||||||
|
86
mlx/io/threadpool.h
Normal file
86
mlx/io/threadpool.h
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <functional>
|
||||||
|
#include <future>
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
#include <queue>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
class ThreadPool {
|
||||||
|
public:
|
||||||
|
ThreadPool(size_t);
|
||||||
|
template <class F, class... Args>
|
||||||
|
auto enqueue(F&& f, Args&&... args)
|
||||||
|
-> std::future<typename std::result_of_t<F(Args...)>>;
|
||||||
|
~ThreadPool();
|
||||||
|
|
||||||
|
private:
|
||||||
|
// need to keep track of threads so we can join them
|
||||||
|
std::vector<std::thread> workers;
|
||||||
|
// the task queue
|
||||||
|
std::queue<std::function<void()>> tasks;
|
||||||
|
|
||||||
|
// synchronization
|
||||||
|
std::mutex queue_mutex;
|
||||||
|
std::condition_variable condition;
|
||||||
|
bool stop;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
|
||||||
|
for (size_t i = 0; i < threads; ++i)
|
||||||
|
workers.emplace_back([this] {
|
||||||
|
for (;;) {
|
||||||
|
std::function<void()> task;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(this->queue_mutex);
|
||||||
|
this->condition.wait(
|
||||||
|
lock, [this] { return this->stop || !this->tasks.empty(); });
|
||||||
|
if (this->stop && this->tasks.empty())
|
||||||
|
return;
|
||||||
|
task = std::move(this->tasks.front());
|
||||||
|
this->tasks.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
task();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class... Args>
|
||||||
|
auto ThreadPool::enqueue(F&& f, Args&&... args)
|
||||||
|
-> std::future<typename std::result_of_t<F(Args...)>> {
|
||||||
|
using return_type = typename std::result_of_t<F(Args...)>;
|
||||||
|
|
||||||
|
auto task = std::make_shared<std::packaged_task<return_type()>>(
|
||||||
|
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
|
||||||
|
|
||||||
|
std::future<return_type> res = task->get_future();
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||||
|
|
||||||
|
// don't allow enqueueing after stopping the pool
|
||||||
|
if (stop) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"[ThreadPool::enqueue] Not allowed on stopped ThreadPool");
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.emplace([task]() { (*task)(); });
|
||||||
|
}
|
||||||
|
condition.notify_one();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ThreadPool::~ThreadPool() {
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||||
|
stop = true;
|
||||||
|
}
|
||||||
|
condition.notify_all();
|
||||||
|
for (std::thread& worker : workers)
|
||||||
|
worker.join();
|
||||||
|
}
|
@ -146,6 +146,11 @@ class PyFileReader : public io::Reader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void read(char* data, size_t n, size_t offset) override {
|
||||||
|
seek(offset);
|
||||||
|
read(data, n);
|
||||||
|
}
|
||||||
|
|
||||||
std::string label() const override {
|
std::string label() const override {
|
||||||
return "python file object";
|
return "python file object";
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user