throw for certain cases of non captured inputs in compile (#1401)

This commit is contained in:
Awni Hannun 2024-09-09 14:54:31 -07:00 committed by GitHub
parent dc627dcb5e
commit 3ae6aabe9f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 70 additions and 18 deletions

View File

@ -306,21 +306,27 @@ std::pair<std::vector<array>, std::vector<array>> compile_trace(
// Traverses the graph to build a tape and a map of array ids to their parents // Traverses the graph to build a tape and a map of array ids to their parents
std::pair<std::vector<array>, ParentsMap> compile_dfs( std::pair<std::vector<array>, ParentsMap> compile_dfs(
const std::vector<array>& inputs, const std::vector<array>& inputs,
const std::vector<array>& outputs) { const std::vector<array>& outputs,
const std::vector<array>& original_inputs) {
std::function<void(const array&)> recurse; std::function<void(const array&)> recurse;
std::vector<array> tape; std::vector<array> tape;
std::unordered_set<std::uintptr_t> input_set; std::unordered_set<std::uintptr_t> input_set;
std::unordered_set<std::uintptr_t> original_input_set;
std::unordered_map<std::uintptr_t, std::vector<std::pair<array, int>>> std::unordered_map<std::uintptr_t, std::vector<std::pair<array, int>>>
parents_map; parents_map;
for (int i = 0; i < inputs.size(); ++i) { for (int i = 0; i < inputs.size(); ++i) {
auto in = inputs[i]; input_set.insert(inputs[i].id());
input_set.insert(in.id()); original_input_set.insert(original_inputs[i].id());
} }
// DFS the graph to build the tape, and log parents and scalars // DFS the graph to build the tape, and log parents and scalars
std::unordered_set<std::uintptr_t> cache; std::unordered_set<std::uintptr_t> cache;
recurse = [&](const array& a) { recurse = [&](const array& a) {
auto id = a.id(); auto id = a.id();
if (original_input_set.find(id) != original_input_set.end()) {
throw std::invalid_argument(
"[compile] Attempting to compile a function with uncaptured inputs is not allowed.");
}
if (cache.find(id) != cache.end()) { if (cache.find(id) != cache.end()) {
return; return;
} }
@ -833,7 +839,7 @@ std::function<std::vector<array>(const std::vector<array>&)> compile(
std::unordered_map<uintptr_t, std::vector<std::pair<array, int>>> std::unordered_map<uintptr_t, std::vector<std::pair<array, int>>>
parents_map; parents_map;
std::tie(entry.tape, parents_map) = std::tie(entry.tape, parents_map) =
compile_dfs(entry.inputs, entry.outputs); compile_dfs(entry.inputs, entry.outputs, inputs);
// Simplify the tape // Simplify the tape
if (compile_mode() != CompileMode::no_simplify) { if (compile_mode() != CompileMode::no_simplify) {

View File

@ -972,7 +972,7 @@ void write_signature(
{"threadgroups_per_grid", "uint3"}, {"threadgroups_per_grid", "uint3"},
{"threads_per_grid", "uint3"}, {"threads_per_grid", "uint3"},
{"threads_per_simdgroup", "uint"}, {"threads_per_simdgroup", "uint"},
{"thread_per_threadgroup", "uint3"}, {"threads_per_threadgroup", "uint3"},
}; };
std::vector<std::pair<std::string, std::string>> attrs; std::vector<std::pair<std::string, std::string>> attrs;
for (const auto& [attr, dtype] : metal_attributes) { for (const auto& [attr, dtype] : metal_attributes) {

View File

@ -302,20 +302,20 @@ void init_fast(nb::module_& parent_module) {
A jit-compiled custom Metal kernel defined from a source string. A jit-compiled custom Metal kernel defined from a source string.
Args: Args:
name (str): Name for the kernel. name (str): Name for the kernel.
input_names (List[str]): The parameter names of the inputs in the input_names (List[str]): The parameter names of the inputs in the
function signature.
output_names (List[str]): The parameter names of the outputs in the
function signature. function signature.
source (str): Source code. This is the body of a function in Metal, output_names (List[str]): The parameter names of the outputs in the
the function signature will be automatically generated. function signature.
header (str): Header source code to include before the main function. source (str): Source code. This is the body of a function in Metal,
Useful for helper functions or includes that should live outside of the function signature will be automatically generated.
the main function body. header (str): Header source code to include before the main function.
ensure_row_contiguous (bool): Whether to ensure the inputs are row contiguous Useful for helper functions or includes that should live outside of
before the kernel runs. Default: ``True``. the main function body.
atomic_outputs (bool): Whether to use atomic outputs in the function signature ensure_row_contiguous (bool): Whether to ensure the inputs are row contiguous
e.g. ``device atomic<float>``. Default: ``False``. before the kernel runs. Default: ``True``.
atomic_outputs (bool): Whether to use atomic outputs in the function signature
e.g. ``device atomic<float>``. Default: ``False``.
Returns: Returns:
Callable ``metal_kernel``. Callable ``metal_kernel``.

View File

@ -733,6 +733,31 @@ class TestCompile(mlx_tests.MLXTestCase):
expected = fn(x) expected = fn(x)
self.assertTrue(mx.array_equal(expected, out)) self.assertTrue(mx.array_equal(expected, out))
def test_compile_without_captured_inputs(self):
x = mx.array([1, 2, 3]) + 2
def fn(a):
y = x + 1
return a + y
with self.assertRaises(ValueError):
y = mx.compile(fn)(x)
x = mx.array([1.0, 2.0]) + mx.array([1.0, 2.0])
y = None
def fn(x):
nonlocal y
if y is None:
y = mx.array([1.0, 2.0])
y = y + x
return y
fn(x)
with self.assertRaises(ValueError):
y = mx.compile(fn)(x)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -689,6 +689,27 @@ class TestFast(mlx_tests.MLXTestCase):
) )
self.assertTrue(mx.allclose(out[0], mx.exp(a))) self.assertTrue(mx.allclose(out[0], mx.exp(a)))
@unittest.skipIf(not mx.metal.is_available(), "Metal is not available")
def test_custom_kernel_attributes(self):
a = mx.zeros(shape=(1, 1))
kernel = mx.fast.metal_kernel(
name="test_fun",
input_names=["a"],
output_names=["out"],
source="""
out[0] = threads_per_threadgroup.x;
""",
)
out = kernel(
inputs=[a],
grid=(2, 1, 1),
threadgroup=(2, 1, 1),
output_shapes=[(1, 1)],
output_dtypes=[mx.uint32],
stream=mx.gpu,
)[0]
self.assertEqual(out.item(), 2)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()