mirror of
https://github.com/ml-explore/mlx.git
synced 2025-06-24 09:21:16 +08:00
throw for certain cases of non captured inputs in compile (#1401)
This commit is contained in:
parent
dc627dcb5e
commit
3ae6aabe9f
@ -306,21 +306,27 @@ std::pair<std::vector<array>, std::vector<array>> compile_trace(
|
|||||||
// Traverses the graph to build a tape and a map of array ids to their parents
|
// Traverses the graph to build a tape and a map of array ids to their parents
|
||||||
std::pair<std::vector<array>, ParentsMap> compile_dfs(
|
std::pair<std::vector<array>, ParentsMap> compile_dfs(
|
||||||
const std::vector<array>& inputs,
|
const std::vector<array>& inputs,
|
||||||
const std::vector<array>& outputs) {
|
const std::vector<array>& outputs,
|
||||||
|
const std::vector<array>& original_inputs) {
|
||||||
std::function<void(const array&)> recurse;
|
std::function<void(const array&)> recurse;
|
||||||
std::vector<array> tape;
|
std::vector<array> tape;
|
||||||
std::unordered_set<std::uintptr_t> input_set;
|
std::unordered_set<std::uintptr_t> input_set;
|
||||||
|
std::unordered_set<std::uintptr_t> original_input_set;
|
||||||
std::unordered_map<std::uintptr_t, std::vector<std::pair<array, int>>>
|
std::unordered_map<std::uintptr_t, std::vector<std::pair<array, int>>>
|
||||||
parents_map;
|
parents_map;
|
||||||
for (int i = 0; i < inputs.size(); ++i) {
|
for (int i = 0; i < inputs.size(); ++i) {
|
||||||
auto in = inputs[i];
|
input_set.insert(inputs[i].id());
|
||||||
input_set.insert(in.id());
|
original_input_set.insert(original_inputs[i].id());
|
||||||
}
|
}
|
||||||
|
|
||||||
// DFS the graph to build the tape, and log parents and scalars
|
// DFS the graph to build the tape, and log parents and scalars
|
||||||
std::unordered_set<std::uintptr_t> cache;
|
std::unordered_set<std::uintptr_t> cache;
|
||||||
recurse = [&](const array& a) {
|
recurse = [&](const array& a) {
|
||||||
auto id = a.id();
|
auto id = a.id();
|
||||||
|
if (original_input_set.find(id) != original_input_set.end()) {
|
||||||
|
throw std::invalid_argument(
|
||||||
|
"[compile] Attempting to compile a function with uncaptured inputs is not allowed.");
|
||||||
|
}
|
||||||
if (cache.find(id) != cache.end()) {
|
if (cache.find(id) != cache.end()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -833,7 +839,7 @@ std::function<std::vector<array>(const std::vector<array>&)> compile(
|
|||||||
std::unordered_map<uintptr_t, std::vector<std::pair<array, int>>>
|
std::unordered_map<uintptr_t, std::vector<std::pair<array, int>>>
|
||||||
parents_map;
|
parents_map;
|
||||||
std::tie(entry.tape, parents_map) =
|
std::tie(entry.tape, parents_map) =
|
||||||
compile_dfs(entry.inputs, entry.outputs);
|
compile_dfs(entry.inputs, entry.outputs, inputs);
|
||||||
|
|
||||||
// Simplify the tape
|
// Simplify the tape
|
||||||
if (compile_mode() != CompileMode::no_simplify) {
|
if (compile_mode() != CompileMode::no_simplify) {
|
||||||
|
@ -972,7 +972,7 @@ void write_signature(
|
|||||||
{"threadgroups_per_grid", "uint3"},
|
{"threadgroups_per_grid", "uint3"},
|
||||||
{"threads_per_grid", "uint3"},
|
{"threads_per_grid", "uint3"},
|
||||||
{"threads_per_simdgroup", "uint"},
|
{"threads_per_simdgroup", "uint"},
|
||||||
{"thread_per_threadgroup", "uint3"},
|
{"threads_per_threadgroup", "uint3"},
|
||||||
};
|
};
|
||||||
std::vector<std::pair<std::string, std::string>> attrs;
|
std::vector<std::pair<std::string, std::string>> attrs;
|
||||||
for (const auto& [attr, dtype] : metal_attributes) {
|
for (const auto& [attr, dtype] : metal_attributes) {
|
||||||
|
@ -302,20 +302,20 @@ void init_fast(nb::module_& parent_module) {
|
|||||||
A jit-compiled custom Metal kernel defined from a source string.
|
A jit-compiled custom Metal kernel defined from a source string.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
name (str): Name for the kernel.
|
name (str): Name for the kernel.
|
||||||
input_names (List[str]): The parameter names of the inputs in the
|
input_names (List[str]): The parameter names of the inputs in the
|
||||||
function signature.
|
|
||||||
output_names (List[str]): The parameter names of the outputs in the
|
|
||||||
function signature.
|
function signature.
|
||||||
source (str): Source code. This is the body of a function in Metal,
|
output_names (List[str]): The parameter names of the outputs in the
|
||||||
the function signature will be automatically generated.
|
function signature.
|
||||||
header (str): Header source code to include before the main function.
|
source (str): Source code. This is the body of a function in Metal,
|
||||||
Useful for helper functions or includes that should live outside of
|
the function signature will be automatically generated.
|
||||||
the main function body.
|
header (str): Header source code to include before the main function.
|
||||||
ensure_row_contiguous (bool): Whether to ensure the inputs are row contiguous
|
Useful for helper functions or includes that should live outside of
|
||||||
before the kernel runs. Default: ``True``.
|
the main function body.
|
||||||
atomic_outputs (bool): Whether to use atomic outputs in the function signature
|
ensure_row_contiguous (bool): Whether to ensure the inputs are row contiguous
|
||||||
e.g. ``device atomic<float>``. Default: ``False``.
|
before the kernel runs. Default: ``True``.
|
||||||
|
atomic_outputs (bool): Whether to use atomic outputs in the function signature
|
||||||
|
e.g. ``device atomic<float>``. Default: ``False``.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Callable ``metal_kernel``.
|
Callable ``metal_kernel``.
|
||||||
|
@ -733,6 +733,31 @@ class TestCompile(mlx_tests.MLXTestCase):
|
|||||||
expected = fn(x)
|
expected = fn(x)
|
||||||
self.assertTrue(mx.array_equal(expected, out))
|
self.assertTrue(mx.array_equal(expected, out))
|
||||||
|
|
||||||
|
def test_compile_without_captured_inputs(self):
|
||||||
|
x = mx.array([1, 2, 3]) + 2
|
||||||
|
|
||||||
|
def fn(a):
|
||||||
|
y = x + 1
|
||||||
|
return a + y
|
||||||
|
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
y = mx.compile(fn)(x)
|
||||||
|
|
||||||
|
x = mx.array([1.0, 2.0]) + mx.array([1.0, 2.0])
|
||||||
|
y = None
|
||||||
|
|
||||||
|
def fn(x):
|
||||||
|
nonlocal y
|
||||||
|
if y is None:
|
||||||
|
y = mx.array([1.0, 2.0])
|
||||||
|
|
||||||
|
y = y + x
|
||||||
|
return y
|
||||||
|
|
||||||
|
fn(x)
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
y = mx.compile(fn)(x)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -689,6 +689,27 @@ class TestFast(mlx_tests.MLXTestCase):
|
|||||||
)
|
)
|
||||||
self.assertTrue(mx.allclose(out[0], mx.exp(a)))
|
self.assertTrue(mx.allclose(out[0], mx.exp(a)))
|
||||||
|
|
||||||
|
@unittest.skipIf(not mx.metal.is_available(), "Metal is not available")
|
||||||
|
def test_custom_kernel_attributes(self):
|
||||||
|
a = mx.zeros(shape=(1, 1))
|
||||||
|
kernel = mx.fast.metal_kernel(
|
||||||
|
name="test_fun",
|
||||||
|
input_names=["a"],
|
||||||
|
output_names=["out"],
|
||||||
|
source="""
|
||||||
|
out[0] = threads_per_threadgroup.x;
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
out = kernel(
|
||||||
|
inputs=[a],
|
||||||
|
grid=(2, 1, 1),
|
||||||
|
threadgroup=(2, 1, 1),
|
||||||
|
output_shapes=[(1, 1)],
|
||||||
|
output_dtypes=[mx.uint32],
|
||||||
|
stream=mx.gpu,
|
||||||
|
)[0]
|
||||||
|
self.assertEqual(out.item(), 2)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
Loading…
Reference in New Issue
Block a user