mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-26 15:58:14 +08:00
fix metal scan (#2591)
This commit is contained in:
@@ -306,6 +306,7 @@ template <
|
||||
U prev_thread = op.simd_exclusive_scan(values[N_READS - 1]);
|
||||
|
||||
// Write simdgroup_sums to SM
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (simd_lane_id == simd_size - 1) {
|
||||
simdgroup_sums[simd_group_id] = op(prev_thread, values[N_READS - 1]);
|
||||
}
|
||||
@@ -440,6 +441,7 @@ template <
|
||||
}
|
||||
|
||||
// Read in SM
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (check_index_y < axis_size && (read_offset_x + N_READS) < stride_limit) {
|
||||
for (int i = 0; i < N_READS; i++) {
|
||||
read_into[i] = in[index_y * stride + i];
|
||||
|
@@ -36,14 +36,6 @@ void Scan::eval_gpu(const std::vector<array>& inputs, array& out) {
|
||||
|
||||
bool contiguous = in.strides()[axis_] == 1;
|
||||
|
||||
std::ostringstream kname;
|
||||
kname << (contiguous ? "contig_" : "strided_");
|
||||
kname << "scan_";
|
||||
if (reverse_) {
|
||||
kname << "reverse_";
|
||||
}
|
||||
kname << ((inclusive_) ? "inclusive_" : "exclusive_");
|
||||
|
||||
std::string reduce_type;
|
||||
switch (reduce_type_) {
|
||||
case Scan::Sum:
|
||||
@@ -62,9 +54,22 @@ void Scan::eval_gpu(const std::vector<array>& inputs, array& out) {
|
||||
reduce_type = "logaddexp";
|
||||
break;
|
||||
}
|
||||
kname << reduce_type << "_" << type_to_name(in) << "_" << type_to_name(out);
|
||||
auto kernel = get_scan_kernel(
|
||||
d, kname.str(), reverse_, inclusive_, reduce_type, in, out);
|
||||
|
||||
std::string kname;
|
||||
concatenate(
|
||||
kname,
|
||||
contiguous ? "contig_" : "strided_",
|
||||
"scan_",
|
||||
reverse_ ? "reverse_" : "",
|
||||
(inclusive_) ? "inclusive_" : "exclusive_",
|
||||
reduce_type,
|
||||
"_",
|
||||
type_to_name(in),
|
||||
"_",
|
||||
type_to_name(out));
|
||||
|
||||
auto kernel =
|
||||
get_scan_kernel(d, kname, reverse_, inclusive_, reduce_type, in, out);
|
||||
|
||||
if (contiguous) {
|
||||
auto& compute_encoder = d.get_command_encoder(s.index);
|
||||
|
Reference in New Issue
Block a user