Change the argument name to quantization_type

2025-12-16 01:49:05 +08:00 · 2024-12-16 13:31:34 -08:00
parent f5da489a3c
commit 410ccdbed5
4 changed files with 78 additions and 50 deletions
--- a/python/src/ops.cpp
+++ b/python/src/ops.cpp
@@ -4025,7 +4025,7 @@ void init_ops(nb::module_& m) {
         bool transpose,
         int group_size,
         int bits,
-         const std::string& type,
+         const std::string& quantization_type,
         mx::StreamOrDevice s) {
        return mx::quantized_matmul(
            std::move(x),
@@ -4035,7 +4035,7 @@ void init_ops(nb::module_& m) {
            transpose,
            group_size,
            bits,
-            mx::from_string(type),
+            mx::from_string(quantization_type),
            s);
      },
      nb::arg(),
@@ -4045,11 +4045,11 @@ void init_ops(nb::module_& m) {
      "transpose"_a = true,
      "group_size"_a = 64,
      "bits"_a = 4,
-      "type"_a = "affine",
+      "quantization_type"_a = "affine",
      nb::kw_only(),
      "stream"_a = nb::none(),
      nb::sig(
-          "def quantized_matmul(x: array, w: array, /, scales: array, biases: Optional[array], transpose: bool = True, group_size: int = 64, bits: int = 4, type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> array"),
+          "def quantized_matmul(x: array, w: array, /, scales: array, biases: Optional[array], transpose: bool = True, group_size: int = 64, bits: int = 4, quantization_type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> array"),
      R"pbdoc(
        Perform the matrix multiplication with the quantized matrix ``w``. The
        quantization uses one floating point scale and bias per ``group_size`` of
@@ -4069,7 +4069,7 @@ void init_ops(nb::module_& m) {
            shares a scale and bias. Default: ``64``.
          bits (int, optional): The number of bits occupied by each element in
            ``w``. Default: ``4``.
-          type (str, optional): The type of quantization used for the matrix.
+          quantization_type (str, optional): The type of quantization used for the matrix.
            It can be 'affine' or 'affine-packed'.

        Returns:
@@ -4080,18 +4080,19 @@ void init_ops(nb::module_& m) {
      [](const mx::array& w,
         int group_size,
         int bits,
-         const std::string& type,
+         const std::string& quantization_type,
         mx::StreamOrDevice s) {
-        return mx::quantize(w, group_size, bits, mx::from_string(type), s);
+        return mx::quantize(
+            w, group_size, bits, mx::from_string(quantization_type), s);
      },
      nb::arg(),
      "group_size"_a = 64,
      "bits"_a = 4,
-      "type"_a = "affine",
+      "quantization_type"_a = "affine",
      nb::kw_only(),
      "stream"_a = nb::none(),
      nb::sig(
-          "def quantize(w: array, /, group_size: int = 64, bits : int = 4, type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> tuple[array, array, Optional[array]]"),
+          "def quantize(w: array, /, group_size: int = 64, bits : int = 4, quantization_type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> tuple[array, array, Optional[array]]"),
      R"pbdoc(
        Quantize the matrix ``w`` using ``bits`` bits per element.

@@ -4133,7 +4134,7 @@ void init_ops(nb::module_& m) {
            scale and bias. Default: ``64``.
          bits (int, optional): The number of bits occupied by each element of
            ``w`` in the returned quantized matrix. Default: ``4``.
-          type (str, optional): The type of quantization used for the matrix.
+          quantization_type (str, optional): The type of quantization used for the matrix.
            It can be 'affine' or 'affine-packed'.

        Returns:
@@ -4152,21 +4153,27 @@ void init_ops(nb::module_& m) {
         const std::optional<mx::array>& biases,
         int group_size,
         int bits,
-         const std::string& type,
+         const std::string& quantization_type,
         mx::StreamOrDevice s) {
        return mx::dequantize(
-            wq, scales, biases, group_size, bits, mx::from_string(type), s);
+            wq,
+            scales,
+            biases,
+            group_size,
+            bits,
+            mx::from_string(quantization_type),
+            s);
      },
      nb::arg(),
      "scales"_a,
      "biases"_a,
      "group_size"_a = 64,
      "bits"_a = 4,
-      "type"_a = "affine",
+      "quantization_type"_a = "affine",
      nb::kw_only(),
      "stream"_a = nb::none(),
      nb::sig(
-          "def dequantize(w: array, /, scales: array, biases: Optional[array], group_size: int = 64, bits: int = 4, type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> array"),
+          "def dequantize(w: array, /, scales: array, biases: Optional[array], group_size: int = 64, bits: int = 4, quantization_type: str = 'affine', *, stream: Union[None, Stream, Device] = None) -> array"),
      R"pbdoc(
        Dequantize the matrix ``w`` using the provided ``scales`` and
        ``biases`` and the ``group_size`` and ``bits`` configuration.
@@ -4187,7 +4194,7 @@ void init_ops(nb::module_& m) {
            scale and bias. Default: ``64``.
          bits (int, optional): The number of bits occupied by each element in
            ``w``. Default: ``4``.
-          type (str, optional): The type of quantization used for the matrix.
+          quantization_type (str, optional): The type of quantization used for the matrix.
            It can be 'affine' or 'affine-packed'.

        Returns:
@@ -4205,7 +4212,7 @@ void init_ops(nb::module_& m) {
      "transpose"_a = true,
      "group_size"_a = 64,
      "bits"_a = 4,
-      "type"_a = "affine",
+      "quantization_type"_a = "affine",
      nb::kw_only(),
      "stream"_a = nb::none(),
      nb::sig(
@@ -4235,7 +4242,7 @@ void init_ops(nb::module_& m) {
            shares a scale and bias. Default: ``64``.
          bits (int, optional): The number of bits occupied by each element in
            ``w``. Default: ``4``.
-          type (str, optional): The type of quantization used for the matrix.
+          quantization_type (str, optional): The type of quantization used for the matrix.
            It can be 'affine' or 'affine-packed'.

        Returns: