Fix layernorm race condition (#2340)

This commit is contained in:
Angelos Katharopoulos 2025-07-07 06:06:01 -07:00 committed by GitHub
parent 0e0d9ac522
commit f5299f72cd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -31,6 +31,7 @@ inline void threadgroup_sum(
for (int i = 0; i < N; i++) {
x[i] = simd_sum(x[i]);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if (simd_lane_id == 0) {
for (int i = 0; i < N; i++) {
xs[N * simd_group_id + i] = x[i];