mirror of
https://github.com/ml-explore/mlx.git
synced 2025-11-06 03:58:12 +08:00
Change the argument name to quantization_type
This commit is contained in:
@@ -4025,7 +4025,7 @@ void init_ops(nb::module_& m) {
|
||||
bool transpose,
|
||||
int group_size,
|
||||
int bits,
|
||||
const std::string& type,
|
||||
const std::string& quantization_type,
|
||||
mx::StreamOrDevice s) {
|
||||
return mx::quantized_matmul(
|
||||
std::move(x),
|
||||
@@ -4035,7 +4035,7 @@ void init_ops(nb::module_& m) {
|
||||
transpose,
|
||||
group_size,
|
||||
bits,
|
||||
mx::from_string(type),
|
||||
mx::from_string(quantization_type),
|
||||
s);
|
||||
},
|
||||
nb::arg(),
|
||||
@@ -4045,11 +4045,11 @@ void init_ops(nb::module_& m) {
|
||||
"transpose"_a = true,
|
||||
"group_size"_a = 64,
|
||||
"bits"_a = 4,
|
||||
"type"_a = "affine",
|
||||
"quantization_type"_a = "affine",
|
||||
nb::kw_only(),
|
||||
"stream"_a = nb::none(),
|
||||
nb::sig(
|
||||
"def quantized_matmul(x: array, w: array, /, scales: array, biases: Optional[array], transpose: bool = True, group_size: int = 64, bits: int = 4, type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> array"),
|
||||
"def quantized_matmul(x: array, w: array, /, scales: array, biases: Optional[array], transpose: bool = True, group_size: int = 64, bits: int = 4, quantization_type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> array"),
|
||||
R"pbdoc(
|
||||
Perform the matrix multiplication with the quantized matrix ``w``. The
|
||||
quantization uses one floating point scale and bias per ``group_size`` of
|
||||
@@ -4069,7 +4069,7 @@ void init_ops(nb::module_& m) {
|
||||
shares a scale and bias. Default: ``64``.
|
||||
bits (int, optional): The number of bits occupied by each element in
|
||||
``w``. Default: ``4``.
|
||||
type (str, optional): The type of quantization used for the matrix.
|
||||
quantization_type (str, optional): The type of quantization used for the matrix.
|
||||
It can be 'affine' or 'affine-packed'.
|
||||
|
||||
Returns:
|
||||
@@ -4080,18 +4080,19 @@ void init_ops(nb::module_& m) {
|
||||
[](const mx::array& w,
|
||||
int group_size,
|
||||
int bits,
|
||||
const std::string& type,
|
||||
const std::string& quantization_type,
|
||||
mx::StreamOrDevice s) {
|
||||
return mx::quantize(w, group_size, bits, mx::from_string(type), s);
|
||||
return mx::quantize(
|
||||
w, group_size, bits, mx::from_string(quantization_type), s);
|
||||
},
|
||||
nb::arg(),
|
||||
"group_size"_a = 64,
|
||||
"bits"_a = 4,
|
||||
"type"_a = "affine",
|
||||
"quantization_type"_a = "affine",
|
||||
nb::kw_only(),
|
||||
"stream"_a = nb::none(),
|
||||
nb::sig(
|
||||
"def quantize(w: array, /, group_size: int = 64, bits : int = 4, type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> tuple[array, array, Optional[array]]"),
|
||||
"def quantize(w: array, /, group_size: int = 64, bits : int = 4, quantization_type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> tuple[array, array, Optional[array]]"),
|
||||
R"pbdoc(
|
||||
Quantize the matrix ``w`` using ``bits`` bits per element.
|
||||
|
||||
@@ -4133,7 +4134,7 @@ void init_ops(nb::module_& m) {
|
||||
scale and bias. Default: ``64``.
|
||||
bits (int, optional): The number of bits occupied by each element of
|
||||
``w`` in the returned quantized matrix. Default: ``4``.
|
||||
type (str, optional): The type of quantization used for the matrix.
|
||||
quantization_type (str, optional): The type of quantization used for the matrix.
|
||||
It can be 'affine' or 'affine-packed'.
|
||||
|
||||
Returns:
|
||||
@@ -4152,21 +4153,27 @@ void init_ops(nb::module_& m) {
|
||||
const std::optional<mx::array>& biases,
|
||||
int group_size,
|
||||
int bits,
|
||||
const std::string& type,
|
||||
const std::string& quantization_type,
|
||||
mx::StreamOrDevice s) {
|
||||
return mx::dequantize(
|
||||
wq, scales, biases, group_size, bits, mx::from_string(type), s);
|
||||
wq,
|
||||
scales,
|
||||
biases,
|
||||
group_size,
|
||||
bits,
|
||||
mx::from_string(quantization_type),
|
||||
s);
|
||||
},
|
||||
nb::arg(),
|
||||
"scales"_a,
|
||||
"biases"_a,
|
||||
"group_size"_a = 64,
|
||||
"bits"_a = 4,
|
||||
"type"_a = "affine",
|
||||
"quantization_type"_a = "affine",
|
||||
nb::kw_only(),
|
||||
"stream"_a = nb::none(),
|
||||
nb::sig(
|
||||
"def dequantize(w: array, /, scales: array, biases: Optional[array], group_size: int = 64, bits: int = 4, type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> array"),
|
||||
"def dequantize(w: array, /, scales: array, biases: Optional[array], group_size: int = 64, bits: int = 4, quantization_type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> array"),
|
||||
R"pbdoc(
|
||||
Dequantize the matrix ``w`` using the provided ``scales`` and
|
||||
``biases`` and the ``group_size`` and ``bits`` configuration.
|
||||
@@ -4187,7 +4194,7 @@ void init_ops(nb::module_& m) {
|
||||
scale and bias. Default: ``64``.
|
||||
bits (int, optional): The number of bits occupied by each element in
|
||||
``w``. Default: ``4``.
|
||||
type (str, optional): The type of quantization used for the matrix.
|
||||
quantization_type (str, optional): The type of quantization used for the matrix.
|
||||
It can be 'affine' or 'affine-packed'.
|
||||
|
||||
Returns:
|
||||
@@ -4205,7 +4212,7 @@ void init_ops(nb::module_& m) {
|
||||
"transpose"_a = true,
|
||||
"group_size"_a = 64,
|
||||
"bits"_a = 4,
|
||||
"type"_a = "affine",
|
||||
"quantization_type"_a = "affine",
|
||||
nb::kw_only(),
|
||||
"stream"_a = nb::none(),
|
||||
nb::sig(
|
||||
@@ -4235,7 +4242,7 @@ void init_ops(nb::module_& m) {
|
||||
shares a scale and bias. Default: ``64``.
|
||||
bits (int, optional): The number of bits occupied by each element in
|
||||
``w``. Default: ``4``.
|
||||
type (str, optional): The type of quantization used for the matrix.
|
||||
quantization_type (str, optional): The type of quantization used for the matrix.
|
||||
It can be 'affine' or 'affine-packed'.
|
||||
|
||||
Returns:
|
||||
|
||||
Reference in New Issue
Block a user