Lower sorted QMM gather threshold (#2609)

This commit is contained in:
Awni Hannun
2025-09-19 18:22:55 -07:00
committed by GitHub
parent 787c0d90cd
commit ec2ab42888

View File

@@ -948,8 +948,8 @@ void GatherQMM::eval_gpu(const std::vector<array>& inputs, array& out) {
// We are walking x in order and w is also in order so we can batch up the
// matmuls and reuse reading x and w.
//
// TODO: Tune 16 and 8 here a bit better.
if (M == 1 && B >= 16 && right_sorted_ == true && B / E >= 8) {
// TODO: Tune 16 and 4 here a bit better.
if (M == 1 && B >= 16 && right_sorted_ == true && B / E >= 4) {
gather_qmm_rhs(
x,
w,