mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-26 15:58:14 +08:00
Lower sorted QMM gather threshold (#2609)
This commit is contained in:
@@ -948,8 +948,8 @@ void GatherQMM::eval_gpu(const std::vector<array>& inputs, array& out) {
|
||||
// We are walking x in order and w is also in order so we can batch up the
|
||||
// matmuls and reuse reading x and w.
|
||||
//
|
||||
// TODO: Tune 16 and 8 here a bit better.
|
||||
if (M == 1 && B >= 16 && right_sorted_ == true && B / E >= 8) {
|
||||
// TODO: Tune 16 and 4 here a bit better.
|
||||
if (M == 1 && B >= 16 && right_sorted_ == true && B / E >= 4) {
|
||||
gather_qmm_rhs(
|
||||
x,
|
||||
w,
|
||||
|
Reference in New Issue
Block a user