Bug fix in metal binary kernel dispatch for large arrays (#125)

* bug fix

* format
This commit is contained in:
Awni Hannun
2023-12-10 16:12:31 -08:00
committed by GitHub
parent 0cfbfc9904
commit 71d1fff90a
3 changed files with 10 additions and 4 deletions

View File

@@ -84,9 +84,9 @@ void binary_op(
}
// Launch up to 3D grid of threads
int dim0 = ndim > 0 ? shape[ndim - 1] : 1;
int dim1 = ndim > 1 ? shape[ndim - 2] : 1;
int rest = out.size() / (dim0 * dim1);
size_t dim0 = ndim > 0 ? shape[ndim - 1] : 1;
size_t dim1 = ndim > 1 ? shape[ndim - 2] : 1;
size_t rest = out.size() / (dim0 * dim1);
NS::UInteger thread_group_size = kernel->maxTotalThreadsPerThreadgroup();
if (thread_group_size != 1024) {
throw std::runtime_error("[Metal::binary] Must use 1024 sized block");