mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
Batched Quantized Matmul + Fast Small QMV (#1503)
* add fast qmv for small dims * fix test * batched cpu * add batched template param * refactor metal quantized.cpp
This commit is contained in:
@@ -1287,10 +1287,10 @@ array conv_transpose3d(
|
||||
|
||||
/** Quantized matmul multiplies x with a quantized matrix w*/
|
||||
array quantized_matmul(
|
||||
const array& x,
|
||||
const array& w,
|
||||
const array& scales,
|
||||
const array& biases,
|
||||
array x,
|
||||
array w,
|
||||
array scales,
|
||||
array biases,
|
||||
bool transpose = true,
|
||||
int group_size = 64,
|
||||
int bits = 4,
|
||||
|
||||
Reference in New Issue
Block a user