mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
redesign for faster cpu/gpu synch (#1869)
* redesign for faster cpu/gpu synch * load + more async CPU * use command encoder API and move more ops to use it * make fence back-end generic + CPU only fence * faster build * fix async eval * fixes + handle temporaries * fix / improve cpu conv * remove unused status, fix siblings * fix extensions * fix * fix no cpu build * format * comments * fix perf regression, remove unecessary abort * fix events, task limit cpu * fix waiting * fix donation / temporaries in normalization
This commit is contained in:
@@ -1231,11 +1231,7 @@ class Load : public UnaryPrimitive {
|
||||
: UnaryPrimitive(stream),
|
||||
reader_(std::move(reader)),
|
||||
offset_(offset),
|
||||
swap_endianness_(swap_endianness) {
|
||||
if (stream.device == Device::gpu) {
|
||||
io_stream();
|
||||
}
|
||||
}
|
||||
swap_endianness_(swap_endianness) {}
|
||||
|
||||
void eval_cpu(const std::vector<array>& inputs, array& out) override;
|
||||
void eval_gpu(const std::vector<array>& inputs, array& out) override;
|
||||
@@ -1243,10 +1239,6 @@ class Load : public UnaryPrimitive {
|
||||
DEFINE_PRINT(Load)
|
||||
|
||||
private:
|
||||
Stream& io_stream() {
|
||||
static Stream io_stream = new_stream(Device::cpu);
|
||||
return io_stream;
|
||||
};
|
||||
std::shared_ptr<io::Reader> reader_;
|
||||
size_t offset_;
|
||||
bool swap_endianness_;
|
||||
|
||||
Reference in New Issue
Block a user