Add quantize/dequantize for mxfp8 and nvfp4 (#2688)

* Add quantize/dequantize slow path for mxfp8 and nvfp4 * fast cuda kernel for mx/nv quantization * fallback for cuda < 12.8 (#2697) * format (#2700) * fix (#2701) * metal kernels * docs * fix jit * add default bits and group sizes * improve quant docs * fix output type of mxfp4 matmuls
2025-12-16 01:49:05 +08:00 · 2025-10-28 16:23:12 -07:00
parent 460691a0e8
commit ec72b44417
25 changed files with 1400 additions and 588 deletions
--- a/mlx/backend/cuda/CMakeLists.txt
+++ b/mlx/backend/cuda/CMakeLists.txt
@@ -51,6 +51,7 @@ target_sources(
          ${CMAKE_CURRENT_SOURCE_DIR}/ternary.cu
          ${CMAKE_CURRENT_SOURCE_DIR}/utils.cpp
          ${CMAKE_CURRENT_SOURCE_DIR}/quantized/affine_quantize.cu
+          ${CMAKE_CURRENT_SOURCE_DIR}/quantized/fp_quantize.cu
          ${CMAKE_CURRENT_SOURCE_DIR}/quantized/quantized.cpp
          ${CMAKE_CURRENT_SOURCE_DIR}/quantized/convert_fp8.cu
          ${CMAKE_CURRENT_SOURCE_DIR}/worker.cpp)
@@ -58,6 +59,11 @@ target_sources(
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/binary)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/unary)

+# fp4 is not available on < 12.8
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.8.0)
+  target_include_directories(mlx PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/quantized/)
+endif()
+
 if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9.0)
  target_sources(
    mlx PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/gemms/cublas_gemm_batched_12_9.cu)