Bug fix in metal binary kernel dispatch for large arrays (#125)

* bug fix

* format
This commit is contained in:
Awni Hannun 2023-12-10 16:12:31 -08:00 committed by GitHub
parent 0cfbfc9904
commit 71d1fff90a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10 additions and 4 deletions

View File

@ -84,9 +84,9 @@ void binary_op(
} }
// Launch up to 3D grid of threads // Launch up to 3D grid of threads
int dim0 = ndim > 0 ? shape[ndim - 1] : 1; size_t dim0 = ndim > 0 ? shape[ndim - 1] : 1;
int dim1 = ndim > 1 ? shape[ndim - 2] : 1; size_t dim1 = ndim > 1 ? shape[ndim - 2] : 1;
int rest = out.size() / (dim0 * dim1); size_t rest = out.size() / (dim0 * dim1);
NS::UInteger thread_group_size = kernel->maxTotalThreadsPerThreadgroup(); NS::UInteger thread_group_size = kernel->maxTotalThreadsPerThreadgroup();
if (thread_group_size != 1024) { if (thread_group_size != 1024) {
throw std::runtime_error("[Metal::binary] Must use 1024 sized block"); throw std::runtime_error("[Metal::binary] Must use 1024 sized block");

View File

@ -22,7 +22,6 @@ def sigmoid(x):
\text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)} \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)}
""" """
return mx.sigmoid(x) return mx.sigmoid(x)
def relu(x): def relu(x):
@ -89,10 +88,12 @@ def gelu_fast_approx(x):
""" """
return x * mx.sigmoid(1.773 * x) return x * mx.sigmoid(1.773 * x)
@_make_activation_module @_make_activation_module
class Sigmoid(Module): class Sigmoid(Module):
pass pass
@_make_activation_module(relu) @_make_activation_module(relu)
class ReLU(Module): class ReLU(Module):
pass pass

View File

@ -1305,6 +1305,11 @@ class TestOps(mlx_tests.MLXTestCase):
d_np = np.take(b_mx, np.arange(kth), axis=axis) d_np = np.take(b_mx, np.arange(kth), axis=axis)
self.assertTrue(np.all(d_np <= c_mx)) self.assertTrue(np.all(d_np <= c_mx))
def test_large_binary(self):
a = mx.ones([1000, 2147484], mx.int8)
b = mx.ones([2147484], mx.int8)
self.assertEqual((a + b)[0, 0].item(), 2)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()