mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
Fix for edge checking bug in matmul
This commit is contained in:
@@ -69,16 +69,16 @@ auto gemm_loop(
|
|||||||
if constexpr (kAlignedM) {
|
if constexpr (kAlignedM) {
|
||||||
Atile.load(A + A_offset, params->lda);
|
Atile.load(A + A_offset, params->lda);
|
||||||
} else {
|
} else {
|
||||||
const short rmax = transpose_a ? UK : sgp_sm;
|
const short rmax = transpose_a ? SK : sgp_sm;
|
||||||
const short cmax = transpose_a ? sgp_sm : UK;
|
const short cmax = transpose_a ? sgp_sm : SK;
|
||||||
Atile.load_safe(A + A_offset, params->lda, short2(cmax, rmax));
|
Atile.load_safe(A + A_offset, params->lda, short2(cmax, rmax));
|
||||||
}
|
}
|
||||||
|
|
||||||
if constexpr (kAlignedN) {
|
if constexpr (kAlignedN) {
|
||||||
Btile.load(B + B_offset, params->ldb);
|
Btile.load(B + B_offset, params->ldb);
|
||||||
} else {
|
} else {
|
||||||
const short rmax = transpose_b ? sgp_sn : UK;
|
const short rmax = transpose_b ? sgp_sn : SK;
|
||||||
const short cmax = transpose_b ? UK : sgp_sn;
|
const short cmax = transpose_b ? SK : sgp_sn;
|
||||||
Btile.load_safe(B + B_offset, params->ldb, short2(cmax, rmax));
|
Btile.load_safe(B + B_offset, params->ldb, short2(cmax, rmax));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user