Compile with capture (#629)

* Simple kernel generation

* Remove the generate kernel from graph_utils

* fix multi-output with compile

* fuse with stopgrad

* v1 input, output capture in compile

* cleanup tree update with visitor update

* nit

* remove todo

* state for model, optional explicit init and more pure optimizer steps

* move learning rate to state

* add lr to opt state, some fixes in capture

* fix optim

* update tuple of containers as well

* fix stream for compiled output

* rng state for compile

* nit

* updates and comments

---------

Co-authored-by: Angelos Katharopoulos <a_katharopoulos@apple.com>
This commit is contained in:
Awni Hannun
2024-02-07 17:29:22 -08:00
committed by GitHub
parent e5e816a5ef
commit 1b97b2958b
13 changed files with 723 additions and 157 deletions

View File

@@ -191,10 +191,7 @@ struct CompilerCache {
auto is_match = [](const std::vector<array>& in1,
const std::vector<array>& in2) {
if (in1.size() != in2.size()) {
std::ostringstream msg;
msg << "[compiler] Unexpected number of inputs to compiled function:"
<< " expected " << in2.size() << " got " << in1.size() << ".";
throw std::invalid_argument(msg.str());
return false;
}
for (int i = 0; i < in1.size(); ++i) {
if (in1[i].shape() != in2[i].shape()) {
@@ -603,7 +600,7 @@ void compile_fuse(
shapes,
types,
std::make_shared<Compiled>(
outputs.back().primitive().stream(),
old_outputs.back().primitive().stream(),
inputs,
old_outputs,
std::move(fused_tape),