CPU compile (#691)

* build and load shared object for cpu compile

* nits

* cpu compile tests pass

* cpu compile tests pass

* fix preamble for g++

* donation

* fix gpu buffer donation

* reuse prebuilt libraries

* faster contiguity conditoins

* fix test

* rid compiler warning

* fast erf

* Fix float16 for compile and add more types to cpu compile

* Remove a forgotten comment

* use cached libs

* nits

---------

Co-authored-by: Angelos Katharopoulos <a_katharopoulos@apple.com>
This commit is contained in:
Awni Hannun
2024-02-17 06:54:32 -08:00
committed by GitHub
parent c3965fc5ee
commit dc937b8ed3
13 changed files with 1716 additions and 192 deletions

View File

@@ -44,8 +44,8 @@ TEST_CASE("test compile with grad") {
auto y = array(1.0f);
auto grads_expected = grad_fun({x, y});
auto grads_compile = compile(grad_fun)({x, y});
CHECK_EQ(grads_compile[0].item<float>(), grads_expected[0].item<float>());
CHECK_EQ(grads_compile[1].item<float>(), grads_expected[1].item<float>());
CHECK(allclose(grads_compile[0], grads_expected[0]).item<bool>());
CHECK(allclose(grads_compile[1], grads_expected[1]).item<bool>());
}
TEST_CASE("test compile inputs with primitive") {
@@ -272,7 +272,7 @@ TEST_CASE("test compile unary fused") {
CHECK_EQ(out.inputs()[0].id(), x.id());
auto expected_out = unary_fused_1({array(2.0)})[0];
CHECK_EQ(out.item<float>(), expected_out.item<float>());
CHECK(allclose(out, expected_out).item<bool>());
}
{